r109800 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r109799‎ | r109800 | r109801 >
Date:02:39, 23 January 2012
Author:oren
Status:deferred
Tags:
Comment:
eclipse organize imports + reformatted source
Modified paths:
  • /trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/WikiQueryParser.java (modified) (history)

Diff [purge]

Index: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/WikiQueryParser.java
@@ -21,6 +21,7 @@
2222 import org.apache.lucene.search.ArticleQueryWrap;
2323 import org.apache.lucene.search.ArticleScaling;
2424 import org.apache.lucene.search.BooleanClause;
 25+import org.apache.lucene.search.BooleanClause.Occur;
2526 import org.apache.lucene.search.BooleanQuery;
2627 import org.apache.lucene.search.MultiPhraseQuery;
2728 import org.apache.lucene.search.PositionalMultiQuery;
@@ -29,7 +30,6 @@
3031 import org.apache.lucene.search.Query;
3132 import org.apache.lucene.search.RelevanceQuery;
3233 import org.apache.lucene.search.TermQuery;
33 -import org.apache.lucene.search.BooleanClause.Occur;
3434 import org.wikimedia.lsearch.config.GlobalConfiguration;
3535 import org.wikimedia.lsearch.config.IndexId;
3636 import org.wikimedia.lsearch.config.IndexId.AgeScaling;
@@ -44,13 +44,12 @@
4545 * Parser for wiki query syntax
4646 *
4747 * @author rainman
48 - *
4948 */
5049 public class WikiQueryParser {
5150 private static final int MAX_TERM_LEN = 255;
52 - private final char[] buffer = new char[MAX_TERM_LEN+1];
 51+ private final char[] buffer = new char[MAX_TERM_LEN + 1];
5352 private int length; // length of the token in the buffer
54 - private Analyzer analyzer;
 53+ private Analyzer analyzer;
5554 private char[] text; // text that is being parsed
5655 private int cur; // current position in text
5756 private int prev_cur; // cur before parsing this token (for backToken())
@@ -61,76 +60,90 @@
6261 private String defaultField; // the default field value
6362 private float defaultBoost = 1;
6463 private float defaultAliasBoost = ALIAS_BOOST;
65 - protected enum TokenType {WORD, FIELD, AND, OR, EOF };
66 -
67 - private TokenStream tokenStream;
 64+
 65+ protected enum TokenType {
 66+ WORD, FIELD, AND, OR, EOF
 67+ };
 68+
 69+ private TokenStream tokenStream;
6870 private ArrayList<Token> tokens; // tokens from analysis
6971 protected ParsedWords parsedWords;
7072 protected String[] prefixFilters;
71 - protected enum ExpandedType { WORD, WILDCARD, FUZZY, PHRASE };
 73+
 74+ protected enum ExpandedType {
 75+ WORD, WILDCARD, FUZZY, PHRASE
 76+ };
 77+
7278 protected Term[] highlightTerms = null;
73 -
 79+
7480 protected ArrayList<ArrayList<Term>> urls;
75 -
76 - /** sometimes the fieldsubquery takes the bool modifier, to retrieve it, use this variable,
77 - * this will always point to the last unused bool modifier */
78 - BooleanClause.Occur explicitOccur = null;
79 -
 81+
 82+ /**
 83+ * sometimes the fieldsubquery takes the bool modifier, to retrieve it, use
 84+ * this variable,
 85+ * this will always point to the last unused bool modifier
 86+ */
 87+ BooleanClause.Occur explicitOccur = null;
 88+
8089 /** Wheather to include aliases during title rewrite */
8190 protected boolean disableTitleAliases;
82 -
 91+
8392 /** boost for alias words from analyzer */
84 - public static float ALIAS_BOOST = 0.5f;
 93+ public static float ALIAS_BOOST = 0.5f;
8594 /** boost for title field */
86 - public static float TITLE_BOOST = 6;
 95+ public static float TITLE_BOOST = 6;
8796 public static float TITLE_ALIAS_BOOST = 0.2f;
8897 public static float TITLE_PHRASE_BOOST = 2;
89 - public static float STEM_TITLE_BOOST = 0.8f;
 98+ public static float STEM_TITLE_BOOST = 0.8f;
9099 public static float STEM_TITLE_ALIAS_BOOST = 0.4f;
91100 public static float ALT_TITLE_BOOST = 4;
92101 public static float ALT_TITLE_ALIAS_BOOST = 0.4f;
93102 public static float CONTENTS_BOOST = 0.2f;
94 -
 103+
95104 public static float STEM_WORD_BOOST = 0.01f;
96105 public static float SINGULAR_WORD_BOOST = 0.5f;
97 -
 106+
98107 // main phrase stuff:
99108 public static int MAINPHRASE_SLOP = 100;
100109 public static float MAINPHRASE_BOOST = 2f;
101 - public static float RELEVANCE_RELATED_BOOST = 12f;
 110+ public static float RELEVANCE_RELATED_BOOST = 12f;
102111 public static float RELEVANCE_ALTTITLE_BOOST = 2.5f;
103112 public static float SECTIONS_BOOST = 0.25f;
104113 public static float ALTTITLE_BOOST = 0.5f;
105114 public static float RELATED_BOOST = 1f;
106115 // additional to main phrase:
107116 public static float ADD_RELATED_BOOST = 4f;
108 -
 117+
109118 public static float WILDCARD_BOOST = 2f;
110119 public static float FUZZY_BOOST = 4f;
111 -
 120+
112121 public static boolean ADD_STEM_TITLE = true;
113122 public static boolean ADD_TITLE_PHRASES = true;
114 -
115 - /** Policies in treating field names:
116 - *
 123+
 124+ /**
 125+ * Policies in treating field names:
117126 * LEAVE - don't mess with field rewriting
118127 * IGNORE - convert all field names to contents (except category)
119 - * REWRITE - rewrite (help:searchterm) => (+namespace:12 contents:searchterm)
 128+ * REWRITE - rewrite (help:searchterm) => (+namespace:12
 129+ * contents:searchterm)
120130 */
121 - public enum NamespacePolicy { LEAVE, IGNORE, REWRITE };
 131+ public enum NamespacePolicy {
 132+ LEAVE, IGNORE, REWRITE
 133+ };
 134+
122135 /** Rewritten namespace queries. prefix => query */
123 - static protected Hashtable<String,Query> namespaceQueries = null;
 136+ static protected Hashtable<String, Query> namespaceQueries = null;
124137 /** The 'all' keyword */
125138 static protected String namespaceAllKeyword = null;
126139 /** Prefixes and associated filters. prefix -> filter */
127 - static protected Hashtable<String,NamespaceFilter> namespaceFilters = null;
 140+ static protected Hashtable<String, NamespaceFilter> namespaceFilters = null;
128141 /** nsfilter -> prefix (reverse table to namespaceFilters */
129 - static protected Hashtable<NamespaceFilter,String> namespacePrefixes = null;
 142+ static protected Hashtable<NamespaceFilter, String> namespacePrefixes = null;
130143 private String defaultNamespaceName;
131144 private Query namespaceRewriteQuery;
132145 private NamespacePolicy namespacePolicy;
133146 protected NamespaceFilter defaultNamespaceFilter;
134 - protected static GlobalConfiguration global=null;
 147+ protected static GlobalConfiguration global = null;
135148 protected FieldBuilder.BuilderSet builder;
136149 protected FieldNameFactory fields;
137150 protected FilterFactory filters;
@@ -140,219 +153,246 @@
141154 protected IndexId iid;
142155 protected boolean isInTitle = false;
143156 protected int isInTitleLevel = 0;
144 -
 157+
145158 /** Raw fields to append to queries like ondiscussionpage */
146 - protected HashMap<String,String> rawFields = new HashMap<String,String>();
147 -
148 - Hashtable<String,String> keywordFieldMapping = new Hashtable<String,String>();
149 -
150 - protected Pattern urlPattern = Pattern.compile("(\\w+:{0,1}\\w*@)?(\\S+)(:[0-9]+)?(\\/|\\/([\\w#!:.?+=&%@!\\-\\/]))?");
151 -
 159+ protected HashMap<String, String> rawFields = new HashMap<String, String>();
 160+
 161+ Hashtable<String, String> keywordFieldMapping = new Hashtable<String, String>();
 162+
 163+ protected Pattern urlPattern = Pattern
 164+ .compile("(\\w+:{0,1}\\w*@)?(\\S+)(:[0-9]+)?(\\/|\\/([\\w#!:.?+=&%@!\\-\\/]))?");
 165+
152166 /** default operator (must = AND, should = OR) for boolean queries */
153167 public BooleanClause.Occur boolDefault = BooleanClause.Occur.MUST;
154 -
 168+
155169 /** Word + boost for expanded term */
156170 static class WordBoost {
157171 String word;
158172 float boost;
 173+
159174 public WordBoost(String word, float boost) {
160175 this.word = word;
161176 this.boost = boost;
162177 }
163178 }
164 -
 179+
165180 /** Descriptor for words within queries */
166181 static class WordsDesc {
167182 /** original term text */
168 - String original = null;
 183+ String original = null;
169184 /** words in which the term is expaned to */
170 - ArrayList<WordBoost> expanded = new ArrayList<WordBoost>();
 185+ ArrayList<WordBoost> expanded = new ArrayList<WordBoost>();
171186 ExpandedType type = ExpandedType.WORD;
172187 int position;
173 -
 188+
174189 public WordsDesc(String original, ExpandedType type, int position) {
175190 this.original = original;
176191 this.type = type;
177192 this.position = position;
178193 }
179194
180 - void add(WordBoost wb){
 195+ void add(WordBoost wb) {
181196 expanded.add(wb);
182197 }
183 -
184 - String first(){
 198+
 199+ String first() {
185200 return expanded.get(0).word;
186201 }
187 -
188 - WordBoost firstWordBoost(){
 202+
 203+ WordBoost firstWordBoost() {
189204 return expanded.get(0);
190205 }
 206+
191207 /** new word desc with first word extracted only */
192 - WordsDesc firstWordsDesc(){
193 - WordsDesc d = new WordsDesc(original,type,position);
 208+ WordsDesc firstWordsDesc() {
 209+ WordsDesc d = new WordsDesc(original, type, position);
194210 d.add(firstWordBoost());
195211 return d;
196212 }
197 -
 213+
198214 /** create search terms */
199 - Term[] getTerms(String field){
 215+ Term[] getTerms(String field) {
200216 Term[] terms = new Term[expanded.size()];
201 - for(int i=0;i<expanded.size();i++)
202 - terms[i] = new Term(field,expanded.get(i).word);
 217+ for (int i = 0; i < expanded.size(); i++)
 218+ terms[i] = new Term(field, expanded.get(i).word);
203219 return terms;
204220 }
205 -
206 - ArrayList<Float> getBoosts(){
 221+
 222+ ArrayList<Float> getBoosts() {
207223 ArrayList<Float> boosts = new ArrayList<Float>();
208 - for(WordBoost w : expanded)
 224+ for (WordBoost w : expanded)
209225 boosts.add(w.boost);
210226 return boosts;
211227 }
212 -
213 - int getPosition(){
 228+
 229+ int getPosition() {
214230 return position;
215231 }
216 -
217 - boolean isWildcardOrFuzzy(){
218 - return type == ExpandedType.WILDCARD || type == ExpandedType.FUZZY;
 232+
 233+ boolean isWildcardOrFuzzy() {
 234+ return type == ExpandedType.WILDCARD || type == ExpandedType.FUZZY;
219235 }
220 -
 236+
221237 }
222 -
 238+
223239 /** Words from parser */
224240 static class ParsedWords {
225241 ArrayList<WordsDesc> words = new ArrayList<WordsDesc>();
226 -
227 - void add(String original, ArrayList<String> words, ArrayList<Float> boosts, ExpandedType type){
 242+
 243+ void add(String original, ArrayList<String> words,
 244+ ArrayList<Float> boosts, ExpandedType type) {
228245 int pos = this.words.size();
229 - WordsDesc wd = new WordsDesc(original,type,pos);
230 - for(int i=0;i<words.size();i++){
231 - wd.add(new WordBoost(words.get(i),boosts.get(i)));
 246+ WordsDesc wd = new WordsDesc(original, type, pos);
 247+ for (int i = 0; i < words.size(); i++) {
 248+ wd.add(new WordBoost(words.get(i), boosts.get(i)));
232249 }
233250 this.words.add(wd);
234251 }
235 -
236 - void add(String original, ArrayList<String> words, float boost, ExpandedType type){
 252+
 253+ void add(String original, ArrayList<String> words, float boost,
 254+ ExpandedType type) {
237255 int pos = this.words.size();
238 - WordsDesc wd = new WordsDesc(original,type,pos);
239 - for(int i=0;i<words.size();i++){
240 - wd.add(new WordBoost(words.get(i),boost));
 256+ WordsDesc wd = new WordsDesc(original, type, pos);
 257+ for (int i = 0; i < words.size(); i++) {
 258+ wd.add(new WordBoost(words.get(i), boost));
241259 }
242260 this.words.add(wd);
243261 }
244 -
245 - void add(String original, String word, float boost, ExpandedType type){
 262+
 263+ void add(String original, String word, float boost, ExpandedType type) {
246264 int pos = this.words.size();
247 - WordsDesc wd = new WordsDesc(original,type,pos);
248 - wd.add(new WordBoost(word,boost));
 265+ WordsDesc wd = new WordsDesc(original, type, pos);
 266+ wd.add(new WordBoost(word, boost));
249267 this.words.add(wd);
250268 }
251 -
252 - WordsDesc last(){
253 - return words.get(words.size()-1);
 269+
 270+ WordsDesc last() {
 271+ return words.get(words.size() - 1);
254272 }
255 -
 273+
256274 /** Extract the main stream of words, excludes wildcards and such */
257 - ArrayList<String> extractFirst(){
 275+ ArrayList<String> extractFirst() {
258276 ArrayList<String> ret = new ArrayList<String>();
259 - for(WordsDesc d : words){
260 - if(d.type==ExpandedType.WORD || d.type==ExpandedType.PHRASE)
 277+ for (WordsDesc d : words) {
 278+ if (d.type == ExpandedType.WORD
 279+ || d.type == ExpandedType.PHRASE)
261280 ret.add(d.first());
262281 }
263282 return ret;
264283 }
265 -
 284+
266285 /** First string at index of expanded */
267 - String firstAt(int index){
 286+ String firstAt(int index) {
268287 return words.get(index).first();
269288 }
270 -
271 - int size(){
 289+
 290+ int size() {
272291 return words.size();
273292 }
274 -
 293+
275294 /** get ParsedWords with only a single word on given position */
276 - ParsedWords cloneSingleWord(int index){
277 - return cloneRange(index,index);
 295+ ParsedWords cloneSingleWord(int index) {
 296+ return cloneRange(index, index);
278297 }
 298+
279299 /** get ParsedWords with a range of words (both i1, i2 inclusive) */
280 - ParsedWords cloneRange(int i1, int i2){
 300+ ParsedWords cloneRange(int i1, int i2) {
281301 ParsedWords ret = new ParsedWords();
282 - for(int i=i1;i<=i2;i++)
 302+ for (int i = i1; i <= i2; i++)
283303 ret.words.add(words.get(i));
284304 return ret;
285305 }
 306+
286307 /** Get ParsedWords of first words */
287 - ParsedWords cloneFirst(){
 308+ ParsedWords cloneFirst() {
288309 ParsedWords ret = new ParsedWords();
289 - for(WordsDesc d : words){
290 - if(d.type==ExpandedType.WORD || d.type==ExpandedType.PHRASE)
 310+ for (WordsDesc d : words) {
 311+ if (d.type == ExpandedType.WORD
 312+ || d.type == ExpandedType.PHRASE)
291313 ret.add(d.firstWordsDesc());
292314 }
293315 return ret;
294316 }
295 -
296 - /** Get ParsedWords of first words, or whole ParsedWords if wildcard/fuzzy */
297 - ParsedWords cloneFirstWithWildcards(){
 317+
 318+ /**
 319+ * Get ParsedWords of first words, or whole ParsedWords if
 320+ * wildcard/fuzzy
 321+ */
 322+ ParsedWords cloneFirstWithWildcards() {
298323 ParsedWords ret = new ParsedWords();
299 - for(WordsDesc d : words){
300 - if(d.type==ExpandedType.WORD || d.type==ExpandedType.PHRASE)
 324+ for (WordsDesc d : words) {
 325+ if (d.type == ExpandedType.WORD
 326+ || d.type == ExpandedType.PHRASE)
301327 ret.add(d.firstWordsDesc());
302 - else if(d.isWildcardOrFuzzy())
 328+ else if (d.isWildcardOrFuzzy())
303329 ret.add(d);
304330 }
305331 return ret;
306332 }
307 -
308 - void add(WordsDesc desc){
 333+
 334+ void add(WordsDesc desc) {
309335 words.add(desc);
310336 }
311 -
 337+
312338 }
313 -
 339+
314340 /** Init namespace queries */
315 - protected void initNamespaces(){
316 - if(namespaceQueries != null)
 341+ protected void initNamespaces() {
 342+ if (namespaceQueries != null)
317343 return;
318 - if(global == null)
319 - global = GlobalConfiguration.getInstance();
 344+ if (global == null)
 345+ global = GlobalConfiguration.getInstance();
320346 namespaceAllKeyword = global.getNamespacePrefixAll();
321 - namespaceQueries = new Hashtable<String,Query>();
322 - namespacePrefixes = new Hashtable<NamespaceFilter,String>();
 347+ namespaceQueries = new Hashtable<String, Query>();
 348+ namespacePrefixes = new Hashtable<NamespaceFilter, String>();
323349 namespaceFilters = global.getNamespacePrefixes();
324 - for(Entry<String,NamespaceFilter> prefix : namespaceFilters.entrySet()){
325 - namespaceQueries.put(prefix.getKey(),generateRewrite(prefix.getValue()));
326 - namespacePrefixes.put(prefix.getValue(),prefix.getKey());
 350+ for (Entry<String, NamespaceFilter> prefix : namespaceFilters
 351+ .entrySet()) {
 352+ namespaceQueries.put(prefix.getKey(),
 353+ generateRewrite(prefix.getValue()));
 354+ namespacePrefixes.put(prefix.getValue(), prefix.getKey());
327355 }
328356 }
329 -
 357+
330358 /**
331359 * Construct using default policy (LEAVE), without any namespace rewriting
332 - * @param field default field name
 360+ *
 361+ * @param field
 362+ * default field name
333363 * @param analyzer
334364 */
335 - public WikiQueryParser(String field, Analyzer analyzer, FieldBuilder.BuilderSet builder, Collection<String> stopWords){
336 - this(field,(NamespaceFilter)null,analyzer,builder,NamespacePolicy.LEAVE,stopWords);
 365+ public WikiQueryParser(String field, Analyzer analyzer,
 366+ FieldBuilder.BuilderSet builder, Collection<String> stopWords) {
 367+ this(field, (NamespaceFilter) null, analyzer, builder,
 368+ NamespacePolicy.LEAVE, stopWords);
337369 }
338 -
 370+
339371 /**
340372 * Construct with default field (e.g. contents), with default namespace
341373 * (e.g. main), and with analyzer and namespace policy
 374+ *
342375 * @param field
343376 * @param namespace
344377 * @param analyzer
345378 * @param nsPolicy
346379 */
347 - public WikiQueryParser(String field, String namespace, Analyzer analyzer, FieldBuilder.BuilderSet builder, NamespacePolicy nsPolicy, Collection<String> stopWords){
348 - this(field,new NamespaceFilter(namespace),analyzer,builder,nsPolicy,stopWords);
 380+ public WikiQueryParser(String field, String namespace, Analyzer analyzer,
 381+ FieldBuilder.BuilderSet builder, NamespacePolicy nsPolicy,
 382+ Collection<String> stopWords) {
 383+ this(field, new NamespaceFilter(namespace), analyzer, builder,
 384+ nsPolicy, stopWords);
349385 }
350 -
351 - public WikiQueryParser(String field, String namespace, Analyzer analyzer, FieldBuilder.BuilderSet builder, NamespacePolicy nsPolicy){
352 - this(field,new NamespaceFilter(namespace),analyzer,builder,nsPolicy,null);
 386+
 387+ public WikiQueryParser(String field, String namespace, Analyzer analyzer,
 388+ FieldBuilder.BuilderSet builder, NamespacePolicy nsPolicy) {
 389+ this(field, new NamespaceFilter(namespace), analyzer, builder,
 390+ nsPolicy, null);
353391 }
354 -
355 - public WikiQueryParser(String field, NamespaceFilter nsfilter, Analyzer analyzer, FieldBuilder.BuilderSet builder, NamespacePolicy nsPolicy, Collection<String> stopWords){
356 - defaultField = field;
 392+
 393+ public WikiQueryParser(String field, NamespaceFilter nsfilter,
 394+ Analyzer analyzer, FieldBuilder.BuilderSet builder,
 395+ NamespacePolicy nsPolicy, Collection<String> stopWords) {
 396+ defaultField = field;
357397 this.analyzer = analyzer;
358398 this.builder = builder;
359399 this.fields = builder.getFields();
@@ -361,299 +401,313 @@
362402 tokens = new ArrayList<Token>();
363403 this.namespacePolicy = nsPolicy;
364404 disableTitleAliases = true;
365 - keywordFieldMapping = new Hashtable<String,String>();
366 - keywordFieldMapping.put("inthread", "ThreadAncestor");
 405+ keywordFieldMapping = new Hashtable<String, String>();
 406+ keywordFieldMapping.put("inthread", "ThreadAncestor");
367407 keywordFieldMapping.put("ondiscussionpage", "ThreadPage");
368408 initNamespaces();
369409 this.stopWords = new HashSet<String>();
370 - if(stopWords != null)
 410+ if (stopWords != null)
371411 this.stopWords.addAll(stopWords);
372 - this.defaultNamespaceFilter=nsfilter;
373 - if(nsfilter != null){
374 - namespaceRewriteQuery = generateRewrite(nsfilter);
375 - if(namespaceRewriteQuery != null && namespacePrefixes.containsKey(nsfilter))
 412+ this.defaultNamespaceFilter = nsfilter;
 413+ if (nsfilter != null) {
 414+ namespaceRewriteQuery = generateRewrite(nsfilter);
 415+ if (namespaceRewriteQuery != null
 416+ && namespacePrefixes.containsKey(nsfilter))
376417 defaultNamespaceName = namespacePrefixes.get(nsfilter);
377418 else
378419 defaultNamespaceName = null;
379 - }
380 - else{
 420+ } else {
381421 namespaceRewriteQuery = null;
382422 defaultNamespaceName = null;
383423 }
384424 }
385 -
 425+
386426 /** Generate a rewrite query for a collection of namespaces */
387 - public static Query generateRewrite(NamespaceFilter nsfilter){
388 - if(nsfilter.cardinality() == 0)
 427+ public static Query generateRewrite(NamespaceFilter nsfilter) {
 428+ if (nsfilter.cardinality() == 0)
389429 return null;
390 - else if(nsfilter.cardinality() == 1)
391 - return new TermQuery(new Term("namespace",Integer.toString(nsfilter.getNamespace())));
392 -
 430+ else if (nsfilter.cardinality() == 1)
 431+ return new TermQuery(new Term("namespace",
 432+ Integer.toString(nsfilter.getNamespace())));
 433+
393434 BooleanQuery bq = new BooleanQuery();
394435 BitSet bs = nsfilter.getIncluded();
395436 // iterate over set bits
396 - for(int i=bs.nextSetBit(0); i>=0; i=bs.nextSetBit(i+1)){
397 - bq.add(new TermQuery(new Term("namespace",Integer.toString(i))),
 437+ for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) {
 438+ bq.add(new TermQuery(new Term("namespace", Integer.toString(i))),
398439 BooleanClause.Occur.SHOULD);
399 - bq.add(new TermQuery(new Term("redirect_namespace",Integer.toString(i))),
400 - BooleanClause.Occur.MUST_NOT);
 440+ bq.add(new TermQuery(new Term("redirect_namespace", Integer
 441+ .toString(i))), BooleanClause.Occur.MUST_NOT);
401442 }
402443 return bq;
403444 }
404 -
 445+
405446 /** Generate a rewrite query for a collection of namespaces */
406 - public static Query generateRedirectRewrite(NamespaceFilter nsfilter){
407 - if(nsfilter.cardinality() == 0)
 447+ public static Query generateRedirectRewrite(NamespaceFilter nsfilter) {
 448+ if (nsfilter.cardinality() == 0)
408449 return null;
409 - else if(nsfilter.cardinality() == 1)
410 - return new TermQuery(new Term("redirect_namespace",Integer.toString(nsfilter.getNamespace())));
411 -
 450+ else if (nsfilter.cardinality() == 1)
 451+ return new TermQuery(new Term("redirect_namespace",
 452+ Integer.toString(nsfilter.getNamespace())));
 453+
412454 BooleanQuery bq = new BooleanQuery();
413455 BitSet bs = nsfilter.getIncluded();
414456 // iterate over set bits
415 - for(int i=bs.nextSetBit(0); i>=0; i=bs.nextSetBit(i+1)){
416 - bq.add(new TermQuery(new Term("redirect_namespace",Integer.toString(i))),
417 - BooleanClause.Occur.SHOULD);
 457+ for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) {
 458+ bq.add(new TermQuery(new Term("redirect_namespace", Integer
 459+ .toString(i))), BooleanClause.Occur.SHOULD);
418460 }
419461 return bq;
420462 }
421 -
422 - /**
 463+
 464+ /**
423465 * Get a hashset of namespace numbers for fields that are
424 - * valid namespace keys.
 466+ * valid namespace keys.
 467+ *
425468 * @param queryText
426469 * @return
427470 */
428 - public HashSet<NamespaceFilter> getFieldNamespaces(String queryText){
 471+ public HashSet<NamespaceFilter> getFieldNamespaces(String queryText) {
429472 HashSet<String> fields = getFields(queryText);
430473 HashSet<NamespaceFilter> ret = new HashSet<NamespaceFilter>();
431474 List ThreadingKeywords = new ArrayList();
432 - ThreadingKeywords.add("inthread");
433 -
434 - for(String field : fields){
 475+ ThreadingKeywords.add("inthread");
 476+
 477+ for (String field : fields) {
435478 field = field.toLowerCase();
436 - if(namespaceFilters.containsKey(field))
 479+ if (namespaceFilters.containsKey(field))
437480 ret.add(namespaceFilters.get(field));
438 - else if(field.equals(namespaceAllKeyword))
 481+ else if (field.equals(namespaceAllKeyword))
439482 ret.add(new NamespaceFilter());
440 - else if(field.equals(defaultField) && defaultNamespaceFilter != null)
 483+ else if (field.equals(defaultField)
 484+ && defaultNamespaceFilter != null)
441485 ret.add(defaultNamespaceFilter);
442 - else if(field.startsWith("[")){
443 - ret.add(new NamespaceFilter(field.substring(1,field.length()-1)));
 486+ else if (field.startsWith("[")) {
 487+ ret.add(new NamespaceFilter(field.substring(1,
 488+ field.length() - 1)));
444489 } else if (ThreadingKeywords.contains(field)) {
445 - ret.add( new NamespaceFilter(90) );
 490+ ret.add(new NamespaceFilter(90));
446491 }
447492 }
448 -
 493+
449494 return ret;
450495 }
451 -
 496+
452497 /** get all fields that appear in a query */
453 - public HashSet<String> getFields(String queryText){
 498+ public HashSet<String> getFields(String queryText) {
454499 int level = 0; // parenthesis count
455500 HashSet<String> fields = new HashSet<String>();
456501 int fieldLevel = -1;
457502 TokenType tokenType;
458503 boolean inPhrase = false;
459 -
 504+
460505 reset();
461 -
462 - queryLength = queryText.length();
 506+
 507+ queryLength = queryText.length();
463508 text = queryText.toCharArray();
464 -
465 - for(cur = 0; cur < text.length; cur++ ){
 509+
 510+ for (cur = 0; cur < text.length; cur++) {
466511 c = text[cur];
467 - if(c == '"'){
 512+ if (c == '"') {
468513 inPhrase = !inPhrase;
469 - if(inPhrase && fieldLevel == -1)
 514+ if (inPhrase && fieldLevel == -1)
470515 fields.add(defaultField);
471516 }
472 -
473 - if(inPhrase)
 517+
 518+ if (inPhrase)
474519 continue; // ignore stuff between ""
475 -
476 - if(c == ')'){
 520+
 521+ if (c == ')') {
477522 level--;
478 - if(level < fieldLevel)
 523+ if (level < fieldLevel)
479524 fieldLevel = -1;
480525 continue;
481 - } else if(c == '('){
482 - level++;
 526+ } else if (c == '(') {
 527+ level++;
483528 continue;
484 - } else if(fieldLevel != -1 && level>fieldLevel)
 529+ } else if (fieldLevel != -1 && level > fieldLevel)
485530 continue;
486 -
487 - if(Character.isLetterOrDigit(c)){
 531+
 532+ if (Character.isLetterOrDigit(c)) {
488533 tokenType = fetchToken();
489 - if(tokenType == TokenType.FIELD){
 534+ if (tokenType == TokenType.FIELD) {
490535 fieldLevel = level;
491 - fields.add(new String(buffer,0,length));
492 - } else if(tokenType == TokenType.WORD){
493 - if(fieldLevel == -1)
 536+ fields.add(new String(buffer, 0, length));
 537+ } else if (tokenType == TokenType.WORD) {
 538+ if (fieldLevel == -1)
494539 fields.add(defaultField);
495540 }
496 - } else if(c == '['){
497 - if(fetchGenericPrefix()){
 541+ } else if (c == '[') {
 542+ if (fetchGenericPrefix()) {
498543 fieldLevel = level;
499 - fields.add(new String(buffer,0,length));
 544+ fields.add(new String(buffer, 0, length));
500545 }
501546 }
502547 }
503 -
504 -
 548+
505549 return fields;
506550 }
507 -
 551+
508552 /** Find and delete all valid prefixes, return search terms in tokens */
509 - public ArrayList<Token> tokenizeForSpellCheck(String queryText){
 553+ public ArrayList<Token> tokenizeForSpellCheck(String queryText) {
510554 int level = 0; // parenthesis count
511555 int fieldLevel = -1;
512556 TokenType tokenType;
513557 boolean inPhrase = false;
514 -
 558+
515559 Analyzer oldAnalyzer = this.analyzer;
516 - this.analyzer = Analyzers.getReusableAnalyzer(filters,new TokenizerOptions.SpellCheckSearch());
517 -
 560+ this.analyzer = Analyzers.getReusableAnalyzer(filters,
 561+ new TokenizerOptions.SpellCheckSearch());
 562+
518563 ArrayList<Token> ret = new ArrayList<Token>();
519 -
 564+
520565 reset();
521 -
522 - queryLength = queryText.length();
 566+
 567+ queryLength = queryText.length();
523568 text = queryText.toCharArray();
524569 String oldDefault = defaultField;
525570 defaultField = "title"; // no stemming
526 -
527 - for(cur = 0; cur < text.length; cur++ ){
 571+
 572+ for (cur = 0; cur < text.length; cur++) {
528573 c = text[cur];
529 - if(c == '"'){
 574+ if (c == '"') {
530575 inPhrase = !inPhrase;
531576 }
532 -
533 - if(inPhrase) // skip words in phrases
534 - continue;
535 - else if(c == ')'){
 577+
 578+ if (inPhrase) // skip words in phrases
 579+ continue;
 580+ else if (c == ')') {
536581 level--;
537 - if(level < fieldLevel)
 582+ if (level < fieldLevel)
538583 fieldLevel = -1;
539584 continue;
540 - } else if(c == '('){
541 - level++;
 585+ } else if (c == '(') {
 586+ level++;
542587 continue;
543 - } else if(fieldLevel != -1 && level>fieldLevel)
 588+ } else if (fieldLevel != -1 && level > fieldLevel)
544589 continue;
545 -
 590+
546591 // include exclusion/inclusion marks
547 - if(isTermChar(c) && text[cur]!='-' && text[cur]!='+'){
 592+ if (isTermChar(c) && text[cur] != '-' && text[cur] != '+') {
548593 int start = cur;
549594 tokenType = fetchToken(inPhrase);
550595 // ignore excluded words
551 - if(tokenType == TokenType.WORD && (start==0 || text[start-1]!='-')){
 596+ if (tokenType == TokenType.WORD
 597+ && (start == 0 || text[start - 1] != '-')) {
552598 String type = "word";
553 - if(bufferIsWildCard())
 599+ if (bufferIsWildCard())
554600 type = "wildcard";
555 - else if(bufferIsFuzzy())
 601+ else if (bufferIsFuzzy())
556602 type = "fuzzy";
557603 analyzeBuffer();
558 - for(Token t : tokens){
559 - if(t.getPositionIncrement() > 0){
560 - ret.add(new Token(t.termText(),start+t.startOffset(),start+t.endOffset(),type));
 604+ for (Token t : tokens) {
 605+ if (t.getPositionIncrement() > 0) {
 606+ ret.add(new Token(t.termText(), start
 607+ + t.startOffset(), start + t.endOffset(),
 608+ type));
561609 }
562 - }
 610+ }
563611 }
564 - } else if(c == '[' && !inPhrase){
 612+ } else if (c == '[' && !inPhrase) {
565613 fetchGenericPrefix();
566614 }
567615 }
568 -
 616+
569617 this.analyzer = oldAnalyzer;
570618 defaultField = oldDefault;
571 -
 619+
572620 return ret;
573 -
 621+
574622 }
575 -
 623+
576624 /** rewrite field name (e.g. help) into a term query like namespace:12 */
577 - private Query getNamespaceQuery(String fieldName){
578 - if(fieldName == null || namespacePolicy != NamespacePolicy.REWRITE)
 625+ private Query getNamespaceQuery(String fieldName) {
 626+ if (fieldName == null || namespacePolicy != NamespacePolicy.REWRITE)
579627 return null;
580 -
 628+
581629 Query q;
582 - if((q = namespaceQueries.get(fieldName))!=null){
 630+ if ((q = namespaceQueries.get(fieldName)) != null) {
583631 return q;
584 - } else if(fieldName.startsWith("[")){
585 - return generateRewrite(new NamespaceFilter(fieldName.substring(1,fieldName.length()-1)));
 632+ } else if (fieldName.startsWith("[")) {
 633+ return generateRewrite(new NamespaceFilter(fieldName.substring(1,
 634+ fieldName.length() - 1)));
586635 } else
587636 return null;
588637 }
589 -
590 - private NamespaceFilter getNamespaceFilter(String fieldName){
591 - if(fieldName == null)
 638+
 639+ private NamespaceFilter getNamespaceFilter(String fieldName) {
 640+ if (fieldName == null)
592641 return defaultNamespaceFilter;
593 - else if(namespaceFilters.contains(fieldName))
 642+ else if (namespaceFilters.contains(fieldName))
594643 return namespaceFilters.get(fieldName);
595 - else if(fieldName.startsWith("["))
596 - return new NamespaceFilter(fieldName.substring(1,fieldName.length()-1));
 644+ else if (fieldName.startsWith("["))
 645+ return new NamespaceFilter(fieldName.substring(1,
 646+ fieldName.length() - 1));
597647 else
598648 return defaultNamespaceFilter;
599649 }
600 -
601 - private final boolean isTermChar(char ch){
602 - return !Character.isWhitespace(ch) && ch != ':' && ch != '(' && ch != ')' && ch !='[' && ch != ']' && ch != ',' && ch != ';' && ch != '"';
 650+
 651+ private final boolean isTermChar(char ch) {
 652+ return !Character.isWhitespace(ch) && ch != ':' && ch != '('
 653+ && ch != ')' && ch != '[' && ch != ']' && ch != ','
 654+ && ch != ';' && ch != '"';
603655 }
604 -
 656+
605657 /**
606 - * Fetch token into <code>buffer</code> starting from current position (<code>cur</code>)
 658+ * Fetch token into <code>buffer</code> starting from current position (
 659+ * <code>cur</code>)
607660 *
608661 * @return type of the token in buffer
609662 */
610 - private TokenType fetchToken(){
 663+ private TokenType fetchToken() {
611664 return fetchToken(false);
612665 }
613 - private TokenType fetchToken(boolean termOnly){
 666+
 667+ private TokenType fetchToken(boolean termOnly) {
614668 char ch;
615669 prev_cur = cur;
616 - for(length = 0; cur < queryLength; cur++){
 670+ for (length = 0; cur < queryLength; cur++) {
617671 ch = text[cur];
618 - if(length == 0 && ch == ' ')
 672+ if (length == 0 && ch == ' ')
619673 continue; // ignore whitespaces
620 -
621 - // pluses and minuses, underscores can be within words (to prevent to be missinterpeted), *,? are for wildcard queries
622 - if(isTermChar(ch)){
623 - if(length<buffer.length)
 674+
 675+ // pluses and minuses, underscores can be within words (to prevent
 676+ // to be missinterpeted), *,? are for wildcard queries
 677+ if (isTermChar(ch)) {
 678+ if (length < buffer.length)
624679 buffer[length++] = ch;
625 - } else{
 680+ } else {
626681 cur--; // position before the nonletter character
627682 break;
628683 }
629684 }
630 - if(length == 0)
 685+ if (length == 0)
631686 return TokenType.EOF;
632 -
633 - if(termOnly)
634 - return TokenType.WORD;
635 -
 687+
 688+ if (termOnly)
 689+ return TokenType.WORD;
 690+
636691 // check for keywords
637 - if(length == 3 && buffer[0]=='A' && buffer[1]=='N' && buffer[2]=='D')
 692+ if (length == 3 && buffer[0] == 'A' && buffer[1] == 'N'
 693+ && buffer[2] == 'D')
638694 return TokenType.AND;
639 - else if(length == 2 && buffer[0]=='O' && buffer[1]=='R')
 695+ else if (length == 2 && buffer[0] == 'O' && buffer[1] == 'R')
640696 return TokenType.OR;
641 -
642 -
 697+
643698 // lookahead to see if this is a field
644 - for(lookup = cur+1; lookup < queryLength; lookup++ ){
 699+ for (lookup = cur + 1; lookup < queryLength; lookup++) {
645700 ch = text[lookup];
646 - if(ch == ' ')
 701+ if (ch == ' ')
647702 continue;
648 - else if(ch == ':'){
 703+ else if (ch == ':') {
649704 // check if it's a valid field
650 - String f = new String(buffer,0,length);
651 -
 705+ String f = new String(buffer, 0, length);
 706+
652707 List<String> fieldOperators = getFieldOperators();
653 -
654 - if( f.equals(namespaceAllKeyword)
655 - || fieldOperators.contains(f)
 708+
 709+ if (f.equals(namespaceAllKeyword) || fieldOperators.contains(f)
656710 || namespaceFilters.containsKey(f)
657 - || namespacePolicy == NamespacePolicy.LEAVE){
 711+ || namespacePolicy == NamespacePolicy.LEAVE) {
658712 cur = lookup;
659713 return TokenType.FIELD;
660714 } else
@@ -661,35 +715,35 @@
662716 } else
663717 break;
664718 }
665 -
666 - return TokenType.WORD;
 719+
 720+ return TokenType.WORD;
667721 }
668 -
 722+
669723 private List<String> getFieldOperators() {
670724 List<String> fieldOperators = new ArrayList<String>();
671725 fieldOperators.add("intitle");
672726 fieldOperators.add("incategory");
673 - fieldOperators.add("inthread");
674 -
 727+ fieldOperators.add("inthread");
 728+
675729 return fieldOperators;
676730 }
677 -
 731+
678732 /**
679 - * Fetches prefixes like [0,1,2] (in [0,1,2]:query)
 733+ * Fetches prefixes like [0,1,2] (in [0,1,2]:query)
680734 *
681735 * @return true if search prefixes is successfully fetched
682736 */
683 - private boolean fetchGenericPrefix(){
 737+ private boolean fetchGenericPrefix() {
684738 char ch;
685739 prev_cur = cur;
686 - if(text[cur] != '[')
 740+ if (text[cur] != '[')
687741 return false; // sanity check
688742 buffer[0] = '[';
689 - for(length = 1, cur++; cur < queryLength; cur++){
 743+ for (length = 1, cur++; cur < queryLength; cur++) {
690744 ch = text[cur];
691 - if(Character.isDigit(ch) || ch ==',')
 745+ if (Character.isDigit(ch) || ch == ',')
692746 buffer[length++] = ch;
693 - else if(ch == ']' && cur+1 < queryLength && text[cur+1]==':'){
 747+ else if (ch == ']' && cur + 1 < queryLength && text[cur + 1] == ':') {
694748 cur++; // position on :
695749 buffer[length++] = ch;
696750 return true;
@@ -698,323 +752,359 @@
699753 }
700754 cur = prev_cur; // traceback
701755 return false;
702 -
 756+
703757 }
704 -
 758+
705759 /** Go back one token */
706 - private void backToken(){
 760+ private void backToken() {
707761 cur = prev_cur;
708762 }
709763
710764 /** analyzer buffer into tokens using default analyzer */
711 - private void analyzeBuffer(){
 765+ private void analyzeBuffer() {
712766 String analysisField = defaultField;
713 - if(defaultField.equals("contents") && isInTitle)
 767+ if (defaultField.equals("contents") && isInTitle)
714768 analysisField = "title";
715 - tokenStream = analyzer.tokenStream(analysisField,
716 - new String(buffer,0,length));
717 -
 769+ tokenStream = analyzer.tokenStream(analysisField, new String(buffer, 0,
 770+ length));
 771+
718772 Token token;
719773 tokens.clear();
720 - try{
721 - while((token = tokenStream.next()) != null){
 774+ try {
 775+ while ((token = tokenStream.next()) != null) {
722776 tokens.add(token);
723777 }
724 - } catch (IOException e){
 778+ } catch (IOException e) {
725779 e.printStackTrace();
726 - }
 780+ }
727781 }
728 -
729 - /** Analyze a string, and return tokens (doesn't use any of the object storage attributes) */
730 - private ArrayList<Token> analyzeString(String input){
 782+
 783+ /**
 784+ * Analyze a string, and return tokens (doesn't use any of the object
 785+ * storage attributes)
 786+ */
 787+ private ArrayList<Token> analyzeString(String input) {
731788 tokenStream = analyzer.tokenStream("contents", input);
732 -
 789+
733790 ArrayList<Token> ret = new ArrayList<Token>();
734791 Token token;
735 - try{
736 - while((token = tokenStream.next()) != null){
 792+ try {
 793+ while ((token = tokenStream.next()) != null) {
737794 ret.add(token);
738795 }
739 - } catch (IOException e){
 796+ } catch (IOException e) {
740797 e.printStackTrace();
741798 }
742799 return ret;
743800 }
744 -
745 -
 801+
746802 /** Make term form lucene token */
747 - private Term makeTerm(Token token){
 803+ private Term makeTerm(Token token) {
748804 return makeTerm(token.termText());
749805 }
750 -
 806+
751807 /** Make term from <code>buffer</code> */
752 - private Term makeTerm(){
753 - return makeTerm(new String(buffer,0,length));
 808+ private Term makeTerm() {
 809+ return makeTerm(new String(buffer, 0, length));
754810 }
755 -
 811+
756812 /** Make a lucene term from string */
757 - private Term makeTerm(String t){
758 -
759 -
760 - if(currentField == null)
761 - return new Term(defaultField,builder.isExactCase()? t : t.toLowerCase());
762 - else if(defaultField.equals("contents") && isInTitle)
763 - return new Term("title",builder.isExactCase()? t : t.toLowerCase());
764 - else if(currentField.equals("incategory")){
765 - String norm = t.replace("_"," "); // bug 10822
766 - return new Term("category",builder.isExactCase()? norm : norm.toLowerCase());
767 - } else if( keywordFieldMapping.containsKey(currentField) ) {
 813+ private Term makeTerm(String t) {
 814+
 815+ if (currentField == null)
 816+ return new Term(defaultField, builder.isExactCase() ? t
 817+ : t.toLowerCase());
 818+ else if (defaultField.equals("contents") && isInTitle)
 819+ return new Term("title", builder.isExactCase() ? t
 820+ : t.toLowerCase());
 821+ else if (currentField.equals("incategory")) {
 822+ String norm = t.replace("_", " "); // bug 10822
 823+ return new Term("category", builder.isExactCase() ? norm
 824+ : norm.toLowerCase());
 825+ } else if (keywordFieldMapping.containsKey(currentField)) {
768826 String field = keywordFieldMapping.get(currentField);
769 -
 827+
770828 return new Term(field, t);
771 - } else if(!"incategory".equals(currentField) &&
772 - (namespacePolicy == NamespacePolicy.IGNORE ||
773 - namespacePolicy == NamespacePolicy.REWRITE))
774 - return new Term(defaultField,t);
 829+ } else if (!"incategory".equals(currentField)
 830+ && (namespacePolicy == NamespacePolicy.IGNORE || namespacePolicy == NamespacePolicy.REWRITE))
 831+ return new Term(defaultField, t);
775832 else
776 - return new Term(currentField,t);
 833+ return new Term(currentField, t);
777834 }
778 -
779 - /**
 835+
 836+ /**
780837 * Parses a phrase query (i.e. between ""), the cur
781 - * should be set to the char just after the first
782 - * quotation mark
783 - *
 838+ * should be set to the char just after the first
 839+ * quotation mark
 840+ *
784841 * @return a query, or null if the query is empty
785842 */
786 - private Query parsePhrase(){
787 - // special case for incategory
788 - if(currentField!=null && currentField.equals("incategory")){
 843+ private Query parsePhrase() {
 844+ // special case for incategory
 845+ if (currentField != null && currentField.equals("incategory")) {
789846 length = 0;
790 - for(; cur < queryLength ; cur++ ){
791 - if(text[cur] == '"')
 847+ for (; cur < queryLength; cur++) {
 848+ if (text[cur] == '"')
792849 break;
793 - else if(length < buffer.length)
 850+ else if (length < buffer.length)
794851 buffer[length++] = text[cur];
795852 }
796 - if(length > 0){
 853+ if (length > 0) {
797854 // no tokenization, we want whole category name
798855 return new TermQuery(makeTerm());
799856 }
800857 return null;
801 - }
802 - //PositionalMultiQuery query = new PositionalMultiQuery(new PositionalOptions.PhraseQueryFallback());
 858+ }
 859+ // PositionalMultiQuery query = new PositionalMultiQuery(new
 860+ // PositionalOptions.PhraseQueryFallback());
803861 MultiPhraseQuery query = new MultiPhraseQuery();
804 - for(; cur < queryLength ; cur++ ){
 862+ for (; cur < queryLength; cur++) {
805863 length = 0;
806864 // fetch next word
807 - while(cur<queryLength && isTermChar(text[cur]) && length<buffer.length){
 865+ while (cur < queryLength && isTermChar(text[cur])
 866+ && length < buffer.length) {
808867 buffer[length++] = text[cur++];
809868 }
810 -
 869+
811870 // add to phrase
812 - if(length > 0){
 871+ if (length > 0) {
813872 boolean added = false;
814 - if(bufferIsWildCard()){
 873+ if (bufferIsWildCard()) {
815874 Term term = makeTerm();
816 - Term[] terms = wildcards.makeTerms(term.text(),term.field());
817 - if(terms != null){
 875+ Term[] terms = wildcards.makeTerms(term.text(),
 876+ term.field());
 877+ if (terms != null) {
818878 query.add(terms);
819 - ArrayList<String> words = wildcards.getWords(term.text());
820 - parsedWords.add(term.text(),words,1f,ExpandedType.WILDCARD);
 879+ ArrayList<String> words = wildcards.getWords(term
 880+ .text());
 881+ parsedWords.add(term.text(), words, 1f,
 882+ ExpandedType.WILDCARD);
821883 added = true;
822884 }
823885 }
824 - if(bufferIsFuzzy()){
 886+ if (bufferIsFuzzy()) {
825887 Term term = makeTerm();
826888 NamespaceFilter nsf = getNamespaceFilter(currentField);
827 - Term[] terms = fuzzy.makeTerms(term.text(),term.field(),nsf);
828 - if(terms != null){
829 - //query.add(terms,fuzzy.getBoosts(term.text(),nsf,terms));
 889+ Term[] terms = fuzzy.makeTerms(term.text(), term.field(),
 890+ nsf);
 891+ if (terms != null) {
 892+ // query.add(terms,fuzzy.getBoosts(term.text(),nsf,terms));
830893 query.add(terms);
831 - ArrayList<String> words = fuzzy.getWords(term.text(),nsf);
832 - parsedWords.add(term.text(),words,fuzzy.getBoosts(term.text(),nsf,words),ExpandedType.FUZZY);
 894+ ArrayList<String> words = fuzzy.getWords(term.text(),
 895+ nsf);
 896+ parsedWords.add(term.text(), words,
 897+ fuzzy.getBoosts(term.text(), nsf, words),
 898+ ExpandedType.FUZZY);
833899 added = true;
834900 }
835901 }
836 - if(!added){
 902+ if (!added) {
837903 // fallback to ordinary words
838904 analyzeBuffer();
839 - for(Token token : tokens){
840 - if(token.getPositionIncrement()>0){ // ignore aliases and stemmed words
 905+ for (Token token : tokens) {
 906+ if (token.getPositionIncrement() > 0) { // ignore
 907+ // aliases and
 908+ // stemmed words
841909 Term t = makeTerm(token);
842 - addToWords(t,1,ExpandedType.PHRASE);
 910+ addToWords(t, 1, ExpandedType.PHRASE);
843911 query.add(t);
844912 }
845 - }
 913+ }
846914 }
847 - }
 915+ }
848916 // end of phrase query
849 - if(cur < queryLength && text[cur] == '"')
 917+ if (cur < queryLength && text[cur] == '"')
850918 break;
851919 }
852 - if(query.getPositions().length > 0){
 920+ if (query.getPositions().length > 0) {
853921 query.setBoost(defaultBoost);
854922 return query;
855923 } else
856924 return null;
857925 }
858 -
859 - final private Query parseClause(int level){
860 - return parseClause(level,false,null);
 926+
 927+ final private Query parseClause(int level) {
 928+ return parseClause(level, false, null);
861929 }
862 -
863 - private final boolean needsRewrite(){
864 - return namespaceRewriteQuery != null && namespacePolicy == NamespacePolicy.REWRITE;
 930+
 931+ private final boolean needsRewrite() {
 932+ return namespaceRewriteQuery != null
 933+ && namespacePolicy == NamespacePolicy.REWRITE;
865934 }
866 -
867 - /** Parses a clause: (in regexp-like notation)
 935+
 936+ /**
 937+ * Parses a clause: (in regexp-like notation)
 938+ * Clause := ([+-]? (<field>:)? <term> | [AND,OR] | \( Clause \) )+
868939 *
869 - * Clause := ([+-]? (<field>:)? <term> | [AND,OR] | \( Clause \) )+
870 - *
871 - * @param level - level of recurstion
872 - * @param returnOnFieldDef - if this is a nested field rewrite call
 940+ * @param level
 941+ * - level of recurstion
 942+ * @param returnOnFieldDef
 943+ * - if this is a nested field rewrite call
873944 * @return
874945 */
875 - private Query parseClause(int level, boolean returnOnFieldDef, String topFieldName){
 946+ private Query parseClause(int level, boolean returnOnFieldDef,
 947+ String topFieldName) {
876948 // the whole query
877 - Query query = null;
 949+ Query query = null;
878950 // reference to boolean query if one is constructed
879951 BooleanQuery boolquery = null;
880952 BooleanClause.Occur occur = boolDefault;
881953 // the first query
882 - BooleanClause.Occur firstOccur = boolDefault;
 954+ BooleanClause.Occur firstOccur = boolDefault;
883955 // state
884956 TokenType tokenType;
885 - Query subquery = null;
 957+ Query subquery = null;
886958 boolean definedField = false;
887959 boolean definedExplicitField = false;
888960 Query fieldQuery = null; // the namespace term, e.g. namespace:0
889 - Query fieldsubquery = null; // e.g. 'all:something else' will be parsed 'something else'
890 -
 961+ Query fieldsubquery = null; // e.g. 'all:something else' will be parsed
 962+ // 'something else'
 963+
891964 // assume default namespace value on rewrite
892 - if(!returnOnFieldDef && currentField == null && needsRewrite()){
893 - fieldQuery = namespaceRewriteQuery;
 965+ if (!returnOnFieldDef && currentField == null && needsRewrite()) {
 966+ fieldQuery = namespaceRewriteQuery;
894967 }
895 -
896 - mainloop: for( ; cur < queryLength; cur++ ){
 968+
 969+ mainloop: for (; cur < queryLength; cur++) {
897970 c = text[cur];
898 -
899 - if(c == ' ')
 971+
 972+ if (c == ' ')
900973 continue;
901 -
 974+
902975 // terms, fields
903 - if(Character.isLetterOrDigit(c) || c=='.' || c == '[' || c=='*'){
 976+ if (Character.isLetterOrDigit(c) || c == '.' || c == '['
 977+ || c == '*') {
904978 // check for generic namespace prefixes, e.g. [0,1]:
905 - if(c == '['){
906 - if(fetchGenericPrefix())
 979+ if (c == '[') {
 980+ if (fetchGenericPrefix())
907981 tokenType = TokenType.FIELD;
908982 else
909983 continue;
910 - } else // fetch next token
 984+ } else
 985+ // fetch next token
911986 tokenType = fetchToken();
912 -
913 - switch(tokenType){
 987+
 988+ switch (tokenType) {
914989 case FIELD:
915990 // this is where the function returns if called from the
916991 // next if (i.e. some 10 lines down)
917 - if(returnOnFieldDef){
918 - String newfield = new String(buffer,0,length);
919 - if(!newfield.equals("incategory") && !newfield.equals(topFieldName)){
920 - backToken(); cur--;
 992+ if (returnOnFieldDef) {
 993+ String newfield = new String(buffer, 0, length);
 994+ if (!newfield.equals("incategory")
 995+ && !newfield.equals(topFieldName)) {
 996+ backToken();
 997+ cur--;
921998 break mainloop;
922999 }
9231000 }
924 - if(currentField == null || definedExplicitField){
 1001+ if (currentField == null || definedExplicitField) {
9251002 // set field name
926 - currentField = new String(buffer,0,length);
927 - if("intitle".equals(currentField)){
 1003+ currentField = new String(buffer, 0, length);
 1004+ if ("intitle".equals(currentField)) {
9281005 isInTitle = true;
9291006 isInTitleLevel = level;
9301007 }
931 - if((defaultNamespaceName!=null && currentField.equals(defaultNamespaceName)) || currentField.equals(defaultField)){
 1008+ if ((defaultNamespaceName != null && currentField
 1009+ .equals(defaultNamespaceName))
 1010+ || currentField.equals(defaultField)) {
9321011 currentField = null;
9331012 break; // repeated definition of field, ignore
9341013 }
9351014 definedExplicitField = true;
936 -
937 - fieldQuery = getNamespaceQuery(currentField); // depending on policy rewrite this field
938 - if(fieldQuery != null){
939 - // save field, we will need it to be set to null to fetch categories
 1015+
 1016+ fieldQuery = getNamespaceQuery(currentField); // depending
 1017+ // on
 1018+ // policy
 1019+ // rewrite
 1020+ // this
 1021+ // field
 1022+ if (fieldQuery != null) {
 1023+ // save field, we will need it to be set to null to
 1024+ // fetch categories
9401025 String myfield = currentField;
9411026 currentField = null;
9421027 // fetch the clause until the next field
943 - fieldsubquery = parseClause(level+1,true,myfield);
 1028+ fieldsubquery = parseClause(level + 1, true,
 1029+ myfield);
9441030 currentField = myfield;
9451031 }
946 - } else{
 1032+ } else {
9471033 // nested field names, don't allow, just add to query
9481034 analyzeBuffer();
9491035 subquery = makeQueryFromTokens(occur);
9501036 }
9511037 break;
9521038 case WORD:
953 - if(fieldQuery != null){
 1039+ if (fieldQuery != null) {
9541040 backToken();
955 - String myfield = (topFieldName != null)? topFieldName : (currentField !=null)? currentField : (defaultNamespaceName!=null)? defaultNamespaceName : defaultField;
956 - fieldsubquery = parseClause(level+1,true,myfield);
957 - } else{
 1041+ String myfield = (topFieldName != null) ? topFieldName
 1042+ : (currentField != null) ? currentField
 1043+ : (defaultNamespaceName != null) ? defaultNamespaceName
 1044+ : defaultField;
 1045+ fieldsubquery = parseClause(level + 1, true, myfield);
 1046+ } else {
9581047 analyzeBuffer();
959 - subquery = makeQueryFromTokens(explicitOccur!=null? explicitOccur : occur);
 1048+ subquery = makeQueryFromTokens(explicitOccur != null ? explicitOccur
 1049+ : occur);
9601050 }
9611051 break;
9621052 case AND:
9631053 firstOccur = BooleanClause.Occur.MUST;
9641054 occur = BooleanClause.Occur.MUST;
965 - if(returnOnFieldDef)
 1055+ if (returnOnFieldDef)
9661056 explicitOccur = BooleanClause.Occur.MUST;
9671057 continue;
9681058 case OR:
9691059 firstOccur = BooleanClause.Occur.SHOULD;
9701060 occur = BooleanClause.Occur.SHOULD;
971 - if(returnOnFieldDef)
 1061+ if (returnOnFieldDef)
9721062 explicitOccur = BooleanClause.Occur.SHOULD;
9731063 continue;
9741064 case EOF:
975 - break mainloop;
976 - }
 1065+ break mainloop;
 1066+ }
9771067 }
978 -
 1068+
9791069 // field subquery, the fetched clause while doing rewriting
980 - if(fieldsubquery != null){
 1070+ if (fieldsubquery != null) {
9811071 // this not the first field definition at this level
982 - if(definedField){
 1072+ if (definedField) {
9831073 // embed the old query
9841074 BooleanQuery bq = new BooleanQuery();
985 - bq.add(query,BooleanClause.Occur.SHOULD);
 1075+ bq.add(query, BooleanClause.Occur.SHOULD);
9861076 query = boolquery = bq;
9871077 }
988 -
 1078+
9891079 BooleanQuery bq = new BooleanQuery();
990 - bq.add(fieldQuery,BooleanClause.Occur.MUST);
991 - bq.add(fieldsubquery,BooleanClause.Occur.MUST);
992 -
 1080+ bq.add(fieldQuery, BooleanClause.Occur.MUST);
 1081+ bq.add(fieldsubquery, BooleanClause.Occur.MUST);
 1082+
9931083 // add to existing queries
994 - if(boolquery != null)
995 - boolquery.add(bq,BooleanClause.Occur.SHOULD);
996 - else if(query != null){
 1084+ if (boolquery != null)
 1085+ boolquery.add(bq, BooleanClause.Occur.SHOULD);
 1086+ else if (query != null) {
9971087 boolquery = new BooleanQuery();
998 - boolquery.add(query,firstOccur);
999 - boolquery.add(bq,BooleanClause.Occur.SHOULD);
 1088+ boolquery.add(query, firstOccur);
 1089+ boolquery.add(bq, BooleanClause.Occur.SHOULD);
10001090 query = boolquery;
10011091 } else
10021092 query = bq;
1003 -
 1093+
10041094 fieldQuery = null;
10051095 definedField = true;
10061096 fieldsubquery = null;
10071097 }
1008 -
 1098+
10091099 // modifiers
1010 - switch(c){
 1100+ switch (c) {
10111101 case '+':
10121102 occur = BooleanClause.Occur.MUST;
1013 - if(returnOnFieldDef)
1014 - explicitOccur = BooleanClause.Occur.MUST;
 1103+ if (returnOnFieldDef)
 1104+ explicitOccur = BooleanClause.Occur.MUST;
10151105 continue;
10161106 case '-':
10171107 occur = BooleanClause.Occur.MUST_NOT;
1018 - if(returnOnFieldDef)
 1108+ if (returnOnFieldDef)
10191109 explicitOccur = BooleanClause.Occur.MUST_NOT;
10201110 continue;
10211111 case '"':
@@ -1023,12 +1113,12 @@
10241114 break;
10251115 case '(':
10261116 cur++;
1027 - subquery = parseClause(level+1);
 1117+ subquery = parseClause(level + 1);
10281118 break;
10291119 case ')':
1030 - if(level > 0){
 1120+ if (level > 0) {
10311121 // get out of titles on appropriate level of parenthesis
1032 - if(isInTitle && level <= isInTitleLevel)
 1122+ if (isInTitle && level <= isInTitleLevel)
10331123 isInTitle = false;
10341124 break mainloop;
10351125 }
@@ -1036,23 +1126,22 @@
10371127 }
10381128
10391129 // if we fetched some tokens or a subquery add it to main query
1040 - if(subquery != null){
1041 - if(query == null){
 1130+ if (subquery != null) {
 1131+ if (query == null) {
10421132 query = subquery;
10431133 firstOccur = occur; // save the boolean modifier
10441134 occur = boolDefault; // return to default
1045 - }
1046 - else{
1047 - if(explicitOccur != null)
 1135+ } else {
 1136+ if (explicitOccur != null)
10481137 occur = explicitOccur;
1049 - if(boolquery == null){
 1138+ if (boolquery == null) {
10501139 // we have found the second term, make boolean query
10511140 boolquery = new BooleanQuery();
1052 - boolquery.add(query,firstOccur);
1053 - boolquery.add(subquery,occur);
 1141+ boolquery.add(query, firstOccur);
 1142+ boolquery.add(subquery, occur);
10541143 query = boolquery;
1055 - } else{
1056 - boolquery.add(subquery,occur);
 1144+ } else {
 1145+ boolquery.add(subquery, occur);
10571146 }
10581147 occur = boolDefault; // return to default
10591148 explicitOccur = null;
@@ -1060,193 +1149,199 @@
10611150 subquery = null;
10621151 }
10631152 }
1064 -
1065 - if(definedExplicitField)
 1153+
 1154+ if (definedExplicitField)
10661155 currentField = null;
10671156 return query;
10681157 }
1069 -
1070 - /**
 1158+
 1159+ /**
10711160 * return true if buffer is wildcard
1072 - * the only allowed patterns are *q and q* and not other combinations like *q* or q*r
1073 - *
 1161+ * the only allowed patterns are *q and q* and not other combinations like
 1162+ * *q* or q*r
10741163 */
1075 - private boolean bufferIsWildCard(){
1076 - if(length < 2)
 1164+ private boolean bufferIsWildCard() {
 1165+ if (length < 2)
10771166 return false;
10781167 boolean wild = false;
10791168 int index = -1;
10801169 // only allow '*' at begin and end
1081 - if(buffer[0] == '*'){
 1170+ if (buffer[0] == '*') {
10821171 index = 0;
10831172 wild = true;
1084 - } else if( buffer[length-1] == '*' ){
1085 - index = length-1;
 1173+ } else if (buffer[length - 1] == '*') {
 1174+ index = length - 1;
10861175 wild = true;
10871176 }
10881177
10891178 // check if it's a valid wildcard
1090 - if(wild){
 1179+ if (wild) {
10911180 // check if this is the only asterix
1092 - for(int i=0;i<length;i++){
1093 - if( i!= index && buffer[i] == '*'){
 1181+ for (int i = 0; i < length; i++) {
 1182+ if (i != index && buffer[i] == '*') {
10941183 return false; // more than one '*'
10951184 }
10961185 }
1097 -
 1186+
10981187 // require at least one letter besides the wildcard sign
1099 - for(int i=0;i<length;i++){
1100 - if(Character.isLetterOrDigit(buffer[i]))
 1188+ for (int i = 0; i < length; i++) {
 1189+ if (Character.isLetterOrDigit(buffer[i]))
11011190 return true; // found it!
11021191 }
11031192 }
11041193 return false;
11051194 }
1106 -
1107 - private boolean bufferIsFuzzy(){
1108 - return length>1 && (buffer[0]=='~' || buffer[length-1]=='~');
 1195+
 1196+ private boolean bufferIsFuzzy() {
 1197+ return length > 1 && (buffer[0] == '~' || buffer[length - 1] == '~');
11091198 }
1110 -
1111 - private boolean bufferContains(char c){
1112 - for(int i=0;i<length;i++){
1113 - if(buffer[i] == c)
 1199+
 1200+ private boolean bufferContains(char c) {
 1201+ for (int i = 0; i < length; i++) {
 1202+ if (buffer[i] == c)
11141203 return true;
11151204 }
11161205 return false;
11171206 }
1118 -
1119 - private void addToWords(Term t){
1120 - addToWords(t,1,ExpandedType.WORD);
 1207+
 1208+ private void addToWords(Term t) {
 1209+ addToWords(t, 1, ExpandedType.WORD);
11211210 }
1122 - private void addToWords(Term t, float boost, ExpandedType type){
1123 - parsedWords.add(t.text(),t.text(),boost,type);
 1211+
 1212+ private void addToWords(Term t, float boost, ExpandedType type) {
 1213+ parsedWords.add(t.text(), t.text(), boost, type);
11241214 }
1125 -
1126 - private void addToWordsAsAlias(Token t){
 1215+
 1216+ private void addToWordsAsAlias(Token t) {
11271217 float boost = STEM_WORD_BOOST;
1128 - if(t.type().equals("singular"))
 1218+ if (t.type().equals("singular"))
11291219 boost = SINGULAR_WORD_BOOST;
1130 - parsedWords.last().add(new WordBoost(t.termText(),boost));
 1220+ parsedWords.last().add(new WordBoost(t.termText(), boost));
11311221 }
1132 -
1133 - /**
 1222+
 1223+ /**
11341224 * Constructs either a termquery or a boolean query depending on
11351225 * analysis of the fetched token. A single "word" might be analyzed
1136 - * into many tokens, and some of them might be aliases
 1226+ * into many tokens, and some of them might be aliases
 1227+ *
11371228 * @return
11381229 */
1139 - private Query makeQueryFromTokens(BooleanClause.Occur toplevelOccur){
 1230+ private Query makeQueryFromTokens(BooleanClause.Occur toplevelOccur) {
11401231 BooleanQuery bq = null;
11411232 TermQuery t;
11421233 boolean addAliases = true;
1143 -
 1234+
11441235 // check for urls
1145 - Matcher urlMatcher = urlPattern.matcher(new String(buffer,0,length));
1146 - while(bufferContains('.') && urlMatcher.find()){
 1236+ Matcher urlMatcher = urlPattern.matcher(new String(buffer, 0, length));
 1237+ while (bufferContains('.') && urlMatcher.find()) {
11471238 ArrayList<Token> urlTokens = analyzeString(urlMatcher.group());
11481239 ArrayList<Term> urlTerms = new ArrayList<Term>();
1149 - for(Token tt : urlTokens)
 1240+ for (Token tt : urlTokens)
11501241 urlTerms.add(makeTerm(tt.termText()));
1151 - urls.add(urlTerms);
 1242+ urls.add(urlTerms);
11521243 }
1153 -
 1244+
11541245 // categories should not be analyzed
1155 - if(currentField != null && currentField.equals("incategory")){
 1246+ if (currentField != null && currentField.equals("incategory")) {
11561247 return new TermQuery(makeTerm());
11571248 }
1158 -
1159 - // check for wildcard seaches, they are also not analyzed/stemmed, only for titles
1160 - // wildcard signs are allowed only at the end of the word, minimum one letter word
1161 - if(length>1 && wildcards != null && bufferIsWildCard()){
1162 - Term term = makeTerm();
1163 - Query ret = wildcards.makeQuery(term.text(),term.field());
1164 - if(ret != null){
 1249+
 1250+ // check for wildcard seaches, they are also not analyzed/stemmed, only
 1251+ // for titles
 1252+ // wildcard signs are allowed only at the end of the word, minimum one
 1253+ // letter word
 1254+ if (length > 1 && wildcards != null && bufferIsWildCard()) {
 1255+ Term term = makeTerm();
 1256+ Query ret = wildcards.makeQuery(term.text(), term.field());
 1257+ if (ret != null) {
11651258 ArrayList<String> words = wildcards.getWords(term.text());
1166 - parsedWords.add(term.text(),words,1,ExpandedType.WILDCARD);
 1259+ parsedWords.add(term.text(), words, 1, ExpandedType.WILDCARD);
11671260 ret.setBoost(WILDCARD_BOOST);
11681261 return ret;
1169 - } else{
 1262+ } else {
11701263 // something is wrong, try making normal query
11711264 addToWords(term);
11721265 return new TermQuery(term);
11731266 }
11741267 }
11751268 // parse fuzzy queries
1176 - if(length>1 && fuzzy != null && bufferIsFuzzy()){
 1269+ if (length > 1 && fuzzy != null && bufferIsFuzzy()) {
11771270 Term term = makeTerm();
1178 - String termText = term.text().replaceAll("~","");
 1271+ String termText = term.text().replaceAll("~", "");
11791272 NamespaceFilter nsf = getNamespaceFilter(currentField);
1180 - Query ret = fuzzy.makeQuery(termText,term.field(),nsf);
1181 - if(ret != null){
1182 - ArrayList<String> words = fuzzy.getWords(termText,nsf);
1183 - parsedWords.add(term.text(),words,fuzzy.getBoosts(termText,nsf,words),ExpandedType.FUZZY);
 1273+ Query ret = fuzzy.makeQuery(termText, term.field(), nsf);
 1274+ if (ret != null) {
 1275+ ArrayList<String> words = fuzzy.getWords(termText, nsf);
 1276+ parsedWords.add(term.text(), words,
 1277+ fuzzy.getBoosts(termText, nsf, words),
 1278+ ExpandedType.FUZZY);
11841279 ret.setBoost(FUZZY_BOOST);
11851280 return ret;
11861281 }
11871282 }
1188 -
1189 - if(toplevelOccur == BooleanClause.Occur.MUST_NOT)
 1283+
 1284+ if (toplevelOccur == BooleanClause.Occur.MUST_NOT)
11901285 addAliases = false;
11911286
1192 - if(tokens.size() == 1){
 1287+ if (tokens.size() == 1) {
11931288 t = new TermQuery(makeTerm(tokens.get(0)));
11941289 t.setBoost(defaultBoost);
1195 - if(toplevelOccur != Occur.MUST_NOT)
 1290+ if (toplevelOccur != Occur.MUST_NOT)
11961291 addToWords(t.getTerm());
11971292 return t;
1198 - } else{
 1293+ } else {
11991294 // make a nested boolean query
12001295 ArrayList<BooleanQuery> queries = new ArrayList<BooleanQuery>();
12011296 ArrayList<Token> aliases = new ArrayList<Token>();
1202 - for(int i=0; i<tokens.size(); i++){
 1297+ for (int i = 0; i < tokens.size(); i++) {
12031298 BooleanQuery query = new BooleanQuery();
12041299 // main token
12051300 Token token = tokens.get(i);
12061301 t = new TermQuery(makeTerm(token));
12071302 t.setBoost(defaultBoost);
1208 - if(toplevelOccur != Occur.MUST_NOT)
 1303+ if (toplevelOccur != Occur.MUST_NOT)
12091304 addToWords(t.getTerm());
1210 - query.add(t,Occur.SHOULD);
 1305+ query.add(t, Occur.SHOULD);
12111306 // group aliases together
12121307 aliases.clear();
1213 - for(int j=i+1;j<tokens.size();j++){
1214 - if(tokens.get(j).getPositionIncrement() == 0){
 1308+ for (int j = i + 1; j < tokens.size(); j++) {
 1309+ if (tokens.get(j).getPositionIncrement() == 0) {
12151310 aliases.add(tokens.get(j));
12161311 i = j;
12171312 } else
12181313 break;
1219 - }
1220 - if(addAliases){
1221 - for(Token alias : aliases){
 1314+ }
 1315+ if (addAliases) {
 1316+ for (Token alias : aliases) {
12221317 t = new TermQuery(makeTerm(alias));
1223 - t.setBoost(defaultAliasBoost*defaultBoost);
1224 - query.add(t,Occur.SHOULD);
 1318+ t.setBoost(defaultAliasBoost * defaultBoost);
 1319+ query.add(t, Occur.SHOULD);
12251320 addToWordsAsAlias(alias);
12261321 }
12271322 }
12281323 queries.add(query);
12291324 }
12301325 // don't returned nested if one query only
1231 - if(queries.size() == 1){
1232 - BooleanQuery q = (BooleanQuery)queries.get(0);
 1326+ if (queries.size() == 1) {
 1327+ BooleanQuery q = (BooleanQuery) queries.get(0);
12331328 // one nested clause
1234 - if(q.getClauses().length == 1)
 1329+ if (q.getClauses().length == 1)
12351330 return q.getClauses()[0].getQuery();
12361331 return queries.get(0);
12371332 }
12381333 // multiple tokens, e.g. super-hero -> +super +hero
12391334 bq = new BooleanQuery();
1240 - for(BooleanQuery q : queries){
1241 - if(q.getClauses().length == 1)
1242 - bq.add(q.getClauses()[0].getQuery(),boolDefault);
 1335+ for (BooleanQuery q : queries) {
 1336+ if (q.getClauses().length == 1)
 1337+ bq.add(q.getClauses()[0].getQuery(), boolDefault);
12431338 else
1244 - bq.add(q,boolDefault);
 1339+ bq.add(q, boolDefault);
12451340 }
12461341 return bq;
1247 -
 1342+
12481343 }
12491344 }
1250 -
 1345+
12511346 /**
12521347 * Extract prefix: field from the query and put it into prefixFilter
12531348 * variable for later retrieval
@@ -1254,94 +1349,101 @@
12551350 * @param queryText
12561351 * @return queryText with prefix part deleted
12571352 */
1258 - public String extractPrefixFilter(String queryText){
 1353+ public String extractPrefixFilter(String queryText) {
12591354 this.prefixFilters = null;
1260 - ArrayList<String> filters = new ArrayList<String>();
 1355+ ArrayList<String> filters = new ArrayList<String>();
12611356 int start = 0;
1262 - while(start < queryText.length()){
1263 - int end = indexOf(queryText,'"',start); // begin of phrase
1264 - int inx = queryText.indexOf("prefix:");
1265 - if(inx >=0 && inx < end){
1266 - String[] prefixes = queryText.substring(inx+"prefix:".length()).split("\\|");
 1357+ while (start < queryText.length()) {
 1358+ int end = indexOf(queryText, '"', start); // begin of phrase
 1359+ int inx = queryText.indexOf("prefix:");
 1360+ if (inx >= 0 && inx < end) {
 1361+ String[] prefixes = queryText.substring(
 1362+ inx + "prefix:".length()).split("\\|");
12671363
1268 - for(String prefix : prefixes){
 1364+ for (String prefix : prefixes) {
12691365 String full = null;
1270 - if(prefix.startsWith("[") && prefix.contains("]:")){
 1366+ if (prefix.startsWith("[") && prefix.contains("]:")) {
12711367 // convert from [2]:query to 2:query form
1272 - full = prefix.replace("[","").replace("]:",":");
1273 - } else // default to main namespace
1274 - full = "0:"+prefix ;
1275 -
 1368+ full = prefix.replace("[", "").replace("]:", ":");
 1369+ } else
 1370+ // default to main namespace
 1371+ full = "0:" + prefix;
 1372+
12761373 // add lowercase nonempty prefixes
1277 - if(full != null && full.length()>0)
 1374+ if (full != null && full.length() > 0)
12781375 filters.add(full.toLowerCase());
1279 -
 1376+
12801377 }
1281 - this.prefixFilters = filters.toArray(new String[]{});
 1378+ this.prefixFilters = filters.toArray(new String[] {});
12821379 // return the actual query without prefix
1283 - return queryText.substring(0,inx);
 1380+ return queryText.substring(0, inx);
12841381 }
1285 - start = end+1;
1286 - if(start < queryText.length()){
 1382+ start = end + 1;
 1383+ if (start < queryText.length()) {
12871384 // skip phrase
1288 - start = indexOf(queryText,'"',start) + 1;
 1385+ start = indexOf(queryText, '"', start) + 1;
12891386 }
12901387 }
1291 -
 1388+
12921389 return queryText;
12931390 }
1294 -
 1391+
12951392 /**
12961393 * Extract prefix: field from the query and put it into prefixFilter
12971394 * variable for later retrieval
12981395 *
12991396 * @param queryText
1300 - * @param field (like "ondiscussionthread:")
 1397+ * @param field
 1398+ * (like "ondiscussionthread:")
13011399 * @return [0] - queryText with field part deleted
13021400 * [1] - the field part
13031401 */
1304 - public static String[] extractRawField(String queryText, String field){
1305 - ArrayList<String> filters = new ArrayList<String>();
 1402+ public static String[] extractRawField(String queryText, String field) {
 1403+ ArrayList<String> filters = new ArrayList<String>();
13061404 int start = 0;
1307 - while(start < queryText.length()){
1308 - int end = indexOf(queryText,'"',start); // begin of phrase
1309 - int inx = queryText.indexOf(field);
1310 - if(inx >=0 && inx < end){
1311 - String prefix = queryText.substring(inx+field.length());
 1405+ while (start < queryText.length()) {
 1406+ int end = indexOf(queryText, '"', start); // begin of phrase
 1407+ int inx = queryText.indexOf(field);
 1408+ if (inx >= 0 && inx < end) {
 1409+ String prefix = queryText.substring(inx + field.length());
13121410
13131411 String full = null;
1314 - if(prefix.startsWith("[") && prefix.contains("]:")){
 1412+ if (prefix.startsWith("[") && prefix.contains("]:")) {
13151413 // convert from [2]:query to 2:query form
1316 - full = prefix.replace("[","").replace("]:",":");
1317 - } else // default to main namespace
1318 - full = "0:"+prefix ;
1319 -
 1414+ full = prefix.replace("[", "").replace("]:", ":");
 1415+ } else
 1416+ // default to main namespace
 1417+ full = "0:" + prefix;
 1418+
13201419 // add lowercase nonempty prefixes
1321 - if(full != null && full.length()>0)
 1420+ if (full != null && full.length() > 0)
13221421 filters.add(full);
1323 -
1324 - return new String[]{ queryText.substring(0,inx), full };
1325 -
 1422+
 1423+ return new String[] { queryText.substring(0, inx), full };
 1424+
13261425 }
1327 - start = end+1;
1328 - if(start < queryText.length()){
 1426+ start = end + 1;
 1427+ if (start < queryText.length()) {
13291428 // skip phrase
1330 - start = indexOf(queryText,'"',start) + 1;
 1429+ start = indexOf(queryText, '"', start) + 1;
13311430 }
13321431 }
1333 -
1334 - return new String[]{ queryText, null };
 1432+
 1433+ return new String[] { queryText, null };
13351434 }
1336 -
1337 - /** Like string.indexOf but return end of string instead of -1 when needle is not found */
1338 - protected static int indexOf(String string, char needle, int start){
1339 - int inx = string.indexOf(needle,start);
1340 - if(inx == -1)
 1435+
 1436+ /**
 1437+ * Like string.indexOf but return end of string instead of -1 when needle is
 1438+ * not found
 1439+ */
 1440+ protected static int indexOf(String string, char needle, int start) {
 1441+ int inx = string.indexOf(needle, start);
 1442+ if (inx == -1)
13411443 return string.length();
13421444 else
13431445 return inx;
13441446 }
1345 -
 1447+
13461448 public boolean isDisableTitleAliases() {
13471449 return disableTitleAliases;
13481450 }
@@ -1351,183 +1453,207 @@
13521454 }
13531455
13541456 /** Reset the parser state */
1355 - private void reset(){
1356 - cur = 0;
 1457+ private void reset() {
 1458+ cur = 0;
13571459 length = 0;
1358 - currentField = null;
 1460+ currentField = null;
13591461 prev_cur = 0;
13601462 explicitOccur = null;
13611463 parsedWords = new ParsedWords();
13621464 urls = new ArrayList<ArrayList<Term>>();
13631465 isInTitle = false;
13641466 }
1365 -
1366 - /** Init parsing, call this function to parse text */
1367 - private Query startParsing(){
1368 - reset();
 1467+
 1468+ /** Init parsing, call this function to parse text */
 1469+ private Query startParsing() {
 1470+ reset();
13691471 return parseClause(0);
13701472 }
1371 -
1372 - /**
 1473+
 1474+ /**
13731475 * Simple parse on one default field, no rewrites.
13741476 *
13751477 * @param queryText
13761478 * @return
13771479 */
1378 - public Query parseRaw(String queryText){
 1480+ public Query parseRaw(String queryText) {
13791481 queryText = extractPrefixFilter(queryText);
1380 - if(queryText.trim().length()==0 && hasPrefixFilters())
 1482+ if (queryText.trim().length() == 0 && hasPrefixFilters())
13811483 return new MatchAllTitlesQuery(fields.title());
1382 - queryLength = queryText.length();
 1484+ queryLength = queryText.length();
13831485 text = queryText.toCharArray();
1384 -
 1486+
13851487 Query query = null;
13861488 query = startParsing();
1387 -
1388 - return query;
 1489+
 1490+ return query;
13891491 }
13901492
13911493 /* ======================= FULL-QUERY PARSING ========================= */
1392 -
 1494+
13931495 public static class ParsingOptions {
13941496 /** use a custom namespace-transformation policy */
13951497 NamespacePolicy policy = null;
1396 - /** only parse the main query (on contents and title) without relevance stuff */
 1498+ /**
 1499+ * only parse the main query (on contents and title) without relevance
 1500+ * stuff
 1501+ */
13971502 boolean coreQueryOnly = false;
13981503 /** interface to fetch wildcard hits */
13991504 Wildcards wildcards = null;
14001505 /** fuzzy queries interface */
14011506 Fuzzy fuzzy = null;
1402 -
1403 - public ParsingOptions() {}
1404 - public ParsingOptions(NamespacePolicy policy){
 1507+
 1508+ public ParsingOptions() {
 1509+ }
 1510+
 1511+ public ParsingOptions(NamespacePolicy policy) {
14051512 this.policy = policy;
14061513 }
1407 - public ParsingOptions(boolean coreQueryOnly){
 1514+
 1515+ public ParsingOptions(boolean coreQueryOnly) {
14081516 this.coreQueryOnly = coreQueryOnly;
14091517 }
1410 - public ParsingOptions(Wildcards wildcards){
 1518+
 1519+ public ParsingOptions(Wildcards wildcards) {
14111520 this.wildcards = wildcards;
14121521 }
1413 - public ParsingOptions(NamespacePolicy policy, Wildcards wildcards, Fuzzy fuzzy){
 1522+
 1523+ public ParsingOptions(NamespacePolicy policy, Wildcards wildcards,
 1524+ Fuzzy fuzzy) {
14141525 this.policy = policy;
14151526 this.wildcards = wildcards;
14161527 this.fuzzy = fuzzy;
14171528 }
14181529 }
1419 -
 1530+
14201531 /** Parse a full query with default options */
1421 - public Query parse(String queryText){
1422 - return parse(queryText,new ParsingOptions());
 1532+ public Query parse(String queryText) {
 1533+ return parse(queryText, new ParsingOptions());
14231534 }
1424 -
 1535+
14251536 /**
14261537 * Construct a full query on all the fields in the index from search text
1427 - *
14281538 */
14291539 @SuppressWarnings("unchecked")
1430 - public Query parse(String queryText, ParsingOptions options){
 1540+ public Query parse(String queryText, ParsingOptions options) {
14311541 this.wildcards = options.wildcards;
14321542 this.fuzzy = options.fuzzy;
14331543 queryText = quoteCJK(queryText);
14341544 NamespacePolicy defaultPolicy = this.namespacePolicy;
1435 - if(options.policy != null)
1436 - this.namespacePolicy = options.policy;
 1545+ if (options.policy != null)
 1546+ this.namespacePolicy = options.policy;
14371547 defaultBoost = CONTENTS_BOOST;
14381548 defaultAliasBoost = ALIAS_BOOST;
1439 -
1440 - this.rawFields = new HashMap<String,String>();
 1549+
 1550+ this.rawFields = new HashMap<String, String>();
14411551 // parse out raw queries
1442 - for(String field : new String[] {"ondiscussionpage:"}){
 1552+ for (String field : new String[] { "ondiscussionpage:" }) {
14431553 String[] ret = extractRawField(queryText, field);
14441554 queryText = ret[0];
1445 - if( ret[1] != null )
1446 - this.rawFields.put(field,ret[1]);
 1555+ if (ret[1] != null)
 1556+ this.rawFields.put(field, ret[1]);
14471557 }
1448 -
1449 -
1450 - Query qc = parseRaw(queryText);
 1558+
 1559+ Query qc = parseRaw(queryText);
14511560 ParsedWords words = parsedWords;
14521561 this.namespacePolicy = defaultPolicy;
1453 - if(qc == null) // empty
 1562+ if (qc == null) // empty
14541563 return null;
1455 -
1456 - highlightTerms = extractHighlightTerms(qc);
1457 -
1458 - if(options.coreQueryOnly || words.words.size()==0)
 1564+
 1565+ highlightTerms = extractHighlightTerms(qc);
 1566+
 1567+ if (options.coreQueryOnly || words.words.size() == 0)
14591568 return qc;
1460 -
 1569+
14611570 ParsedWords nostopWords = filterStopWords(words);
1462 -
 1571+
14631572 // main phrase combined with relevance meatrics
1464 - Query mainPhrase = makeMainPhraseWithRelevance(words,nostopWords);
1465 - if(mainPhrase == null)
 1573+ Query mainPhrase = makeMainPhraseWithRelevance(words, nostopWords);
 1574+ if (mainPhrase == null)
14661575 return qc;
14671576
14681577 // additional queries
1469 - //Query related = new LogTransformScore(makeRelatedRelevance(words,ADD_RELATED_BOOST));
 1578+ // Query related = new
 1579+ // LogTransformScore(makeRelatedRelevance(words,ADD_RELATED_BOOST));
14701580 // Query related = makeRelatedRelevance(words,ADD_RELATED_BOOST);
1471 -
 1581+
14721582 // mainphrase + related
1473 - /* BooleanQuery additional = new BooleanQuery(true);
1474 - additional.add(mainPhrase,Occur.MUST);
1475 - if(related != null)
1476 - additional.add(related,Occur.SHOULD); */
1477 -
1478 - /* BooleanQuery full = new BooleanQuery(true);
1479 - full.add(bq,Occur.MUST);
1480 - full.add(additional,Occur.SHOULD); */
1481 -
 1583+ /*
 1584+ * BooleanQuery additional = new BooleanQuery(true);
 1585+ * additional.add(mainPhrase,Occur.MUST);
 1586+ * if(related != null)
 1587+ * additional.add(related,Occur.SHOULD);
 1588+ */
 1589+
 1590+ /*
 1591+ * BooleanQuery full = new BooleanQuery(true);
 1592+ * full.add(bq,Occur.MUST);
 1593+ * full.add(additional,Occur.SHOULD);
 1594+ */
 1595+
14821596 // redirect match (when redirect is not contained in contents or title)
1483 - Query redirectMatch = makeAlttitleForRedirectsMulti(makeFirstAndSingular(words),20,1f);
1484 -
 1597+ Query redirectMatch = makeAlttitleForRedirectsMulti(
 1598+ makeFirstAndSingular(words), 20, 1f);
 1599+
14851600 BooleanQuery full = new BooleanQuery(true);
14861601 full.add(qc, Occur.MUST);
1487 - if(mainPhrase != null)
 1602+ if (mainPhrase != null)
14881603 full.add(mainPhrase, Occur.SHOULD);
1489 - if(redirectMatch != null)
 1604+ if (redirectMatch != null)
14901605 full.add(redirectMatch, Occur.SHOULD);
1491 -
 1606+
14921607 // add raw fields as global constrains
1493 - for(Entry<String,String> e : rawFields.entrySet()){
 1608+ for (Entry<String, String> e : rawFields.entrySet()) {
14941609 String field = e.getKey();
1495 - if(field.endsWith(":"))
1496 - field = field.substring(0, field.length()-1);
1497 - // find target field in the index, e.g. ondiscussionpage -> ThreadPage
 1610+ if (field.endsWith(":"))
 1611+ field = field.substring(0, field.length() - 1);
 1612+ // find target field in the index, e.g. ondiscussionpage ->
 1613+ // ThreadPage
14981614 String targetField = keywordFieldMapping.get(field);
1499 - if( targetField != null)
1500 - full.add(new TermQuery(new Term(targetField, e.getValue())),Occur.MUST);
 1615+ if (targetField != null)
 1616+ full.add(new TermQuery(new Term(targetField, e.getValue())),
 1617+ Occur.MUST);
15011618 }
1502 -
1503 - // init global scaling of articles
 1619+
 1620+ // init global scaling of articles
15041621 ArticleScaling scale = new ArticleScaling.None();
15051622 // based on age
15061623 AgeScaling age = iid.getAgeScaling();
1507 - if(age != AgeScaling.NONE){
1508 - switch(age){
1509 - case STRONG: scale = new ArticleScaling.StepScale(0.3f,1); break;
1510 - case MEDIUM: scale = new ArticleScaling.StepScale(0.6f,1); break;
1511 - case WEAK: scale = new ArticleScaling.StepScale(0.9f,1); break;
1512 - default: throw new RuntimeException("Unsupported age scaling "+age);
1513 - }
1514 -
 1624+ if (age != AgeScaling.NONE) {
 1625+ switch (age) {
 1626+ case STRONG:
 1627+ scale = new ArticleScaling.StepScale(0.3f, 1);
 1628+ break;
 1629+ case MEDIUM:
 1630+ scale = new ArticleScaling.StepScale(0.6f, 1);
 1631+ break;
 1632+ case WEAK:
 1633+ scale = new ArticleScaling.StepScale(0.9f, 1);
 1634+ break;
 1635+ default:
 1636+ throw new RuntimeException("Unsupported age scaling " + age);
 1637+ }
 1638+
15151639 }
1516 -
 1640+
15171641 // additional rank
1518 - AggregateInfo rank = iid.useAdditionalRank()? new AggregateInfoImpl() : null;
 1642+ AggregateInfo rank = iid.useAdditionalRank() ? new AggregateInfoImpl()
 1643+ : null;
15191644 ArticleNamespaceScaling nsScale = iid.getNamespaceScaling();
1520 - return new ArticleQueryWrap(full,new ArticleInfoImpl(),scale,rank,nsScale);
1521 -
 1645+ return new ArticleQueryWrap(full, new ArticleInfoImpl(), scale, rank,
 1646+ nsScale);
 1647+
15221648 }
1523 -
 1649+
15241650 /** Return terms that should be highlighted in snippets */
15251651 private Term[] extractHighlightTerms(Query query) {
15261652 HashSet<Term> terms = new HashSet<Term>();
15271653 query.extractTerms(terms);
1528 -
 1654+
15291655 // substract forbidden terms
15301656 BooleanQuery forbidden = extractForbidden(query);
1531 - if(forbidden != null){
 1657+ if (forbidden != null) {
15321658 HashSet<Term> forbiddenTerms = new HashSet<Term>();
15331659 forbidden.extractTerms(forbiddenTerms);
15341660 terms.removeAll(forbiddenTerms);
@@ -1536,18 +1662,19 @@
15371663 }
15381664
15391665 /** Generate singular parsed words coupled with first() words */
1540 - private ParsedWords makeFirstAndSingular(ParsedWords words){
 1666+ private ParsedWords makeFirstAndSingular(ParsedWords words) {
15411667 ParsedWords ret = words.cloneFirstWithWildcards();
1542 - if(filters.hasSingular()){
 1668+ if (filters.hasSingular()) {
15431669 Singular singular = filters.getSingular();
15441670 // generate singular forms if any
1545 - for(WordsDesc wd : ret.words){
1546 - if(wd.isWildcardOrFuzzy())
 1671+ for (WordsDesc wd : ret.words) {
 1672+ if (wd.isWildcardOrFuzzy())
15471673 continue;
15481674 String w = wd.first();
15491675 String sw = singular.getSingular(w);
1550 - if( sw!=null && !w.equals(sw) ){
1551 - wd.add( new WordBoost( sw, wd.firstWordBoost().boost * SINGULAR_WORD_BOOST ) );
 1676+ if (sw != null && !w.equals(sw)) {
 1677+ wd.add(new WordBoost(sw, wd.firstWordBoost().boost
 1678+ * SINGULAR_WORD_BOOST));
15521679 }
15531680 }
15541681 }
@@ -1556,7 +1683,7 @@
15571684
15581685 private ArrayList<String> cleanupWords(ArrayList<String> words) {
15591686 ArrayList<String> ret = new ArrayList<String>();
1560 - for(String w : words){
 1687+ for (String w : words) {
15611688 ret.add(FastWikiTokenizerEngine.clearTrailing(w));
15621689 }
15631690 return ret;
@@ -1564,85 +1691,89 @@
15651692
15661693 /** Recursively transverse queries and put stop words to SHOULD */
15671694 private void filterStopWords(BooleanQuery bq) {
1568 - if(stopWords==null && stopWords.size()==0)
 1695+ if (stopWords == null && stopWords.size() == 0)
15691696 return;
1570 - for(BooleanClause cl : bq.getClauses()){
 1697+ for (BooleanClause cl : bq.getClauses()) {
15711698 Query q = cl.getQuery();
15721699 Occur o = cl.getOccur();
1573 - if(q instanceof BooleanQuery){
1574 - filterStopWords((BooleanQuery)q);
1575 - } else if(q instanceof TermQuery && o.equals(Occur.MUST)
1576 - && stopWords.contains(((TermQuery)q).getTerm().text())){
 1700+ if (q instanceof BooleanQuery) {
 1701+ filterStopWords((BooleanQuery) q);
 1702+ } else if (q instanceof TermQuery && o.equals(Occur.MUST)
 1703+ && stopWords.contains(((TermQuery) q).getTerm().text())) {
15771704 cl.setOccur(Occur.SHOULD);
15781705 }
15791706 }
15801707 }
1581 -
 1708+
15821709 /** @return new ParsedWords with stop words deleted */
1583 - private ParsedWords filterStopWords(ParsedWords words){
 1710+ private ParsedWords filterStopWords(ParsedWords words) {
15841711 // if all stop words, don't filter
15851712 boolean allStop = true;
1586 - for(WordsDesc d : words.words){
1587 - if(!stopWords.contains(d.first())){
 1713+ for (WordsDesc d : words.words) {
 1714+ if (!stopWords.contains(d.first())) {
15881715 allStop = false;
15891716 break;
15901717 }
15911718 }
15921719 ParsedWords ret = new ParsedWords();
1593 - for(WordsDesc d : words.words){
1594 - if(allStop || !stopWords.contains(d.first()))
 1720+ for (WordsDesc d : words.words) {
 1721+ if (allStop || !stopWords.contains(d.first()))
15951722 ret.words.add(d);
15961723 }
15971724 return ret;
15981725 }
15991726
16001727 /** Quote CJK chars to avoid frequency-based analysis */
1601 - protected String quoteCJK(String queryText){
1602 - if(!builder.filters.isUsingCJK())
 1728+ protected String quoteCJK(String queryText) {
 1729+ if (!builder.filters.isUsingCJK())
16031730 return queryText;
1604 -
 1731+
16051732 StringBuilder sb = new StringBuilder();
16061733 int c;
16071734 boolean prevCJK = false;
16081735 int offset = 0;
16091736 boolean closeQuote = false;
16101737 boolean inQuotes = false;
1611 - for(int i=0;i<queryText.length();i++){
 1738+ for (int i = 0; i < queryText.length(); i++) {
16121739 c = queryText.codePointAt(i);
1613 - if(c == '"') inQuotes = !inQuotes;
1614 - if(inQuotes)
 1740+ if (c == '"')
 1741+ inQuotes = !inQuotes;
 1742+ if (inQuotes)
16151743 continue;
1616 - if(CJKFilter.isCJKChar(c)){
1617 - if(!prevCJK){ // begin of CJK stream
1618 - if(i!=0)
1619 - sb.append(queryText.substring(offset,i));
 1744+ if (CJKFilter.isCJKChar(c)) {
 1745+ if (!prevCJK) { // begin of CJK stream
 1746+ if (i != 0)
 1747+ sb.append(queryText.substring(offset, i));
16201748 offset = i;
16211749 sb.append('"');
16221750 closeQuote = true;
16231751 prevCJK = true;
16241752 }
1625 - } else if(prevCJK){
 1753+ } else if (prevCJK) {
16261754 // end of CJK stream
1627 - sb.append(queryText.substring(offset,i));
 1755+ sb.append(queryText.substring(offset, i));
16281756 offset = i;
16291757 sb.append('"');
16301758 closeQuote = true;
16311759 prevCJK = false;
16321760 }
16331761 }
1634 - if(offset == 0 && !closeQuote)
 1762+ if (offset == 0 && !closeQuote)
16351763 return queryText;
1636 - else{
1637 - sb.append(queryText.substring(offset,queryText.length()));
1638 - if(closeQuote)
 1764+ else {
 1765+ sb.append(queryText.substring(offset, queryText.length()));
 1766+ if (closeQuote)
16391767 sb.append('"');
16401768 return sb.toString();
16411769 }
16421770 }
1643 -
1644 - /** Make title query in format: title:query stemtitle:stemmedquery
1645 - * Also extract words from query (to be used for phrases additional scores)
1646 - * @return query */
 1771+
 1772+ /**
 1773+ * Make title query in format: title:query stemtitle:stemmedquery
 1774+ * Also extract words from query (to be used for phrases additional scores)
 1775+ *
 1776+ * @return query
 1777+ */
16471778 protected Query makeTitlePart(String queryText) {
16481779 // push on stack
16491780 String contentField = defaultField;
@@ -1650,437 +1781,488 @@
16511782
16521783 // stemmed title
16531784 Query qs = null;
1654 - if(ADD_STEM_TITLE && builder.getFilters().hasStemmer()){
1655 - defaultField = fields.stemtitle();
 1785+ if (ADD_STEM_TITLE && builder.getFilters().hasStemmer()) {
 1786+ defaultField = fields.stemtitle();
16561787 defaultBoost = STEM_TITLE_BOOST;
16571788 defaultAliasBoost = STEM_TITLE_ALIAS_BOOST;
16581789 qs = parseRaw(queryText);
16591790 }
16601791 // title
1661 - defaultField = fields.title();
1662 - defaultBoost = (qs!= null)? TITLE_BOOST : TITLE_BOOST+STEM_TITLE_BOOST;
1663 - defaultAliasBoost = TITLE_ALIAS_BOOST;
 1792+ defaultField = fields.title();
 1793+ defaultBoost = (qs != null) ? TITLE_BOOST : TITLE_BOOST
 1794+ + STEM_TITLE_BOOST;
 1795+ defaultAliasBoost = TITLE_ALIAS_BOOST;
16641796 Query qt = parseRaw(queryText);
1665 -
 1797+
16661798 // pop stack
16671799 defaultField = contentField;
16681800 defaultBoost = olfDefaultBoost;
16691801 defaultAliasBoost = ALIAS_BOOST;
16701802
1671 -
1672 - if(qt==qs || qt.equals(qs)) // either null, or category query
 1803+ if (qt == qs || qt.equals(qs)) // either null, or category query
16731804 return qt;
1674 - if(qt == null)
 1805+ if (qt == null)
16751806 return qs;
1676 - if(qs == null)
 1807+ if (qs == null)
16771808 return qt;
16781809 BooleanQuery bq = new BooleanQuery(true);
1679 - bq.add(qt,Occur.SHOULD);
1680 - bq.add(qs,Occur.SHOULD);
 1810+ bq.add(qt, Occur.SHOULD);
 1811+ bq.add(qs, Occur.SHOULD);
16811812 return bq;
16821813 }
1683 -
 1814+
16841815 /** Extract MUST_NOT clauses form a query */
1685 - protected static BooleanQuery extractForbidden(Query q){
 1816+ protected static BooleanQuery extractForbidden(Query q) {
16861817 BooleanQuery bq = new BooleanQuery();
1687 - extractForbiddenRecursive(bq,q);
1688 - if(bq.getClauses().length == 0)
 1818+ extractForbiddenRecursive(bq, q);
 1819+ if (bq.getClauses().length == 0)
16891820 return null;
1690 -
 1821+
16911822 return bq;
16921823 }
1693 - /** Recursivily extract all MUST_NOT clauses from query */
1694 - protected static void extractForbiddenRecursive(BooleanQuery forbidden, Query q){
1695 - if(q instanceof BooleanQuery){
1696 - BooleanQuery bq = (BooleanQuery)q;
1697 - for(BooleanClause cl : bq.getClauses()){
1698 - if(cl.getOccur() == Occur.MUST_NOT)
1699 - forbidden.add(cl.getQuery(),Occur.SHOULD);
 1824+
 1825+ /** Recursivily extract all MUST_NOT clauses from query */
 1826+ protected static void extractForbiddenRecursive(BooleanQuery forbidden,
 1827+ Query q) {
 1828+ if (q instanceof BooleanQuery) {
 1829+ BooleanQuery bq = (BooleanQuery) q;
 1830+ for (BooleanClause cl : bq.getClauses()) {
 1831+ if (cl.getOccur() == Occur.MUST_NOT)
 1832+ forbidden.add(cl.getQuery(), Occur.SHOULD);
17001833 else
1701 - extractForbiddenRecursive(forbidden,cl.getQuery());
 1834+ extractForbiddenRecursive(forbidden, cl.getQuery());
17021835 }
17031836 }
17041837 }
1705 - /** Extract forbidden terms from a query into a hashset */
1706 - public static void extractForbiddenInto(Query q, HashSet<Term> forbidden){
 1838+
 1839+ /** Extract forbidden terms from a query into a hashset */
 1840+ public static void extractForbiddenInto(Query q, HashSet<Term> forbidden) {
17071841 BooleanQuery bq = extractForbidden(q);
1708 - if(bq != null)
 1842+ if (bq != null)
17091843 bq.extractTerms(forbidden);
17101844 }
1711 -
 1845+
17121846 /** Valid after parse(), returns if the last query had phrases in it */
1713 - public boolean hasPhrases(){
1714 - for(WordsDesc wd : parsedWords.words){
1715 - if(wd.type == ExpandedType.PHRASE)
 1847+ public boolean hasPhrases() {
 1848+ for (WordsDesc wd : parsedWords.words) {
 1849+ if (wd.type == ExpandedType.PHRASE)
17161850 return true;
17171851 }
17181852 return false;
17191853 }
1720 -
1721 - /** Make the main phrases with relevance metrics */
1722 - protected Query makeMainPhraseWithRelevance(ParsedWords words, ParsedWords noStopWords){
 1854+
 1855+ /** Make the main phrases with relevance metrics */
 1856+ protected Query makeMainPhraseWithRelevance(ParsedWords words,
 1857+ ParsedWords noStopWords) {
17231858 Query main = null;
17241859 String field = fields.contents(); // put to begin() for performance
1725 -
 1860+
17261861 // all words as entered into the query
1727 - Query phrase = makePositionalMulti(noStopWords,field,new PositionalOptions.Sloppy(),MAINPHRASE_SLOP,1);
1728 -
1729 - Query sections = makeSectionsQuery(noStopWords,SECTIONS_BOOST);
 1862+ Query phrase = makePositionalMulti(noStopWords, field,
 1863+ new PositionalOptions.Sloppy(), MAINPHRASE_SLOP, 1);
 1864+
 1865+ Query sections = makeSectionsQuery(noStopWords, SECTIONS_BOOST);
17301866 // wordnet synonyms
1731 - ArrayList<ArrayList<String>> wordnet = WordNet.replaceOne(words.extractFirst(),iid.getLangCode());
1732 -
 1867+ ArrayList<ArrayList<String>> wordnet = WordNet.replaceOne(
 1868+ words.extractFirst(), iid.getLangCode());
 1869+
17331870 BooleanQuery combined = new BooleanQuery(true);
1734 - // combined various queries into mainphrase
1735 - if(phrase != null){
1736 - combined.add(phrase,Occur.SHOULD);
1737 - // wordnet
1738 - if(wordnet != null){
1739 - for(ArrayList<String> wnwords : wordnet){
1740 - if(!allStopWords(wnwords))
1741 - combined.add(makePositional(wnwords,field,new PositionalOptions.Sloppy(),MAINPHRASE_SLOP,1),Occur.SHOULD);
 1871+ // combined various queries into mainphrase
 1872+ if (phrase != null) {
 1873+ combined.add(phrase, Occur.SHOULD);
 1874+ // wordnet
 1875+ if (wordnet != null) {
 1876+ for (ArrayList<String> wnwords : wordnet) {
 1877+ if (!allStopWords(wnwords))
 1878+ combined.add(
 1879+ makePositional(wnwords, field,
 1880+ new PositionalOptions.Sloppy(),
 1881+ MAINPHRASE_SLOP, 1), Occur.SHOULD);
17421882 }
17431883 }
17441884 // urls
1745 - if(urls.size() > 0){
1746 - for(ArrayList<Term> terms : urls){
1747 - combined.add(makePositional(extractTermText(terms), extractField(terms), new PositionalOptions.Sloppy(),0,1), Occur.SHOULD);
 1885+ if (urls.size() > 0) {
 1886+ for (ArrayList<Term> terms : urls) {
 1887+ combined.add(
 1888+ makePositional(extractTermText(terms),
 1889+ extractField(terms),
 1890+ new PositionalOptions.Sloppy(), 0, 1),
 1891+ Occur.SHOULD);
17481892 }
17491893 }
17501894 }
1751 - if(sections!=null)
1752 - combined.add(sections,Occur.SHOULD);
1753 -
1754 - if(combined.getClauses().length == 1)
 1895+ if (sections != null)
 1896+ combined.add(sections, Occur.SHOULD);
 1897+
 1898+ if (combined.getClauses().length == 1)
17551899 main = combined.getClauses()[0].getQuery();
17561900 else
17571901 main = combined;
1758 -
1759 -
 1902+
17601903 main.setBoost(MAINPHRASE_BOOST);
1761 -
 1904+
17621905 // relevance: alttitle
1763 - Query alttitle = makeAlttitleRelevance(words,RELEVANCE_ALTTITLE_BOOST);
 1906+ Query alttitle = makeAlttitleRelevance(words, RELEVANCE_ALTTITLE_BOOST);
17641907 ArrayList<Query> altAdd = new ArrayList<Query>();
1765 - if(wordnet!=null)
1766 - for(ArrayList<String> wnwords : wordnet)
1767 - if(!allStopWords(wnwords))
1768 - altAdd.add(makeAlttitleRelevance(wnwords,RELEVANCE_ALTTITLE_BOOST));
1769 - alttitle = simplify(combine(alttitle,altAdd));
1770 -
 1908+ if (wordnet != null)
 1909+ for (ArrayList<String> wnwords : wordnet)
 1910+ if (!allStopWords(wnwords))
 1911+ altAdd.add(makeAlttitleRelevance(wnwords,
 1912+ RELEVANCE_ALTTITLE_BOOST));
 1913+ alttitle = simplify(combine(alttitle, altAdd));
 1914+
17711915 // relevance: related
1772 - Query related = makeRelatedRelevance(words,RELEVANCE_RELATED_BOOST);
 1916+ Query related = makeRelatedRelevance(words, RELEVANCE_RELATED_BOOST);
17731917 ArrayList<Query> relAdd = new ArrayList<Query>();
1774 - if(wordnet!=null)
1775 - for(ArrayList<String> wnwords : wordnet)
1776 - if(!allStopWords(wnwords))
1777 - relAdd.add(makeRelatedRelevance(wnwords,RELEVANCE_RELATED_BOOST));
1778 - related = simplify(combine(related,relAdd));
1779 -
 1918+ if (wordnet != null)
 1919+ for (ArrayList<String> wnwords : wordnet)
 1920+ if (!allStopWords(wnwords))
 1921+ relAdd.add(makeRelatedRelevance(wnwords,
 1922+ RELEVANCE_RELATED_BOOST));
 1923+ related = simplify(combine(related, relAdd));
 1924+
17801925 BooleanQuery relevances = new BooleanQuery(true);
1781 - relevances.add(alttitle,Occur.SHOULD);
1782 - relevances.add(related,Occur.SHOULD);
1783 -
 1926+ relevances.add(alttitle, Occur.SHOULD);
 1927+ relevances.add(related, Occur.SHOULD);
 1928+
17841929 RelevanceQuery whole = new RelevanceQuery(main);
17851930 whole.addRelevanceMeasure(relevances);
1786 -
 1931+
17871932 return whole;
17881933 }
1789 -
 1934+
17901935 private String extractField(ArrayList<Term> terms) {
1791 - if(terms.size() > 0)
 1936+ if (terms.size() > 0)
17921937 return terms.get(0).field();
17931938 else
1794 - throw new RuntimeException("Trying to extract field from zero-length list of terms");
 1939+ throw new RuntimeException(
 1940+ "Trying to extract field from zero-length list of terms");
17951941 }
17961942
17971943 private ArrayList<String> extractTermText(ArrayList<Term> terms) {
17981944 ArrayList<String> tt = new ArrayList<String>();
1799 - for(Term t : terms)
 1945+ for (Term t : terms)
18001946 tt.add(t.text());
18011947 return tt;
18021948 }
18031949
1804 - /** Combine one main query with a number of other queries into a boolean query */
 1950+ /**
 1951+ * Combine one main query with a number of other queries into a boolean
 1952+ * query
 1953+ */
18051954 private Query combine(Query query, ArrayList<Query> additional) {
1806 - if(additional.size()==0)
 1955+ if (additional.size() == 0)
18071956 return query;
18081957 BooleanQuery bq = new BooleanQuery(true);
1809 - bq.add(query,Occur.SHOULD);
1810 - for(Query q : additional){
1811 - if(q != null)
1812 - bq.add(q,Occur.SHOULD);
 1958+ bq.add(query, Occur.SHOULD);
 1959+ for (Query q : additional) {
 1960+ if (q != null)
 1961+ bq.add(q, Occur.SHOULD);
18131962 }
1814 - if(bq.clauses().size()==1)
 1963+ if (bq.clauses().size() == 1)
18151964 return query;
18161965 return bq;
1817 - }
1818 -
 1966+ }
 1967+
18191968 /** Convert multiple OR-like queries into one with larger boost */
1820 - protected Query simplify(Query q){
1821 - if(q instanceof BooleanQuery){
1822 - BooleanQuery bq = (BooleanQuery)q;
1823 - if(!allShould(bq))
 1969+ protected Query simplify(Query q) {
 1970+ if (q instanceof BooleanQuery) {
 1971+ BooleanQuery bq = (BooleanQuery) q;
 1972+ if (!allShould(bq))
18241973 return q;
18251974 // query -> boost
1826 - HashMap<Query,Float> map = new HashMap<Query,Float>();
1827 - extractAndSimplify(bq,map,1);
1828 -
 1975+ HashMap<Query, Float> map = new HashMap<Query, Float>();
 1976+ extractAndSimplify(bq, map, 1);
 1977+
18291978 // simplify
18301979 BooleanQuery ret = new BooleanQuery(true);
1831 - for(Entry<Query,Float> e : map.entrySet()){
 1980+ for (Entry<Query, Float> e : map.entrySet()) {
18321981 Query qt = (Query) e.getKey();
18331982 qt.setBoost(e.getValue());
1834 - ret.add(qt,Occur.SHOULD);
 1983+ ret.add(qt, Occur.SHOULD);
18351984 }
18361985 return ret;
18371986 }
18381987 return q;
18391988 }
1840 -
1841 - private boolean allShould(BooleanQuery bq){
1842 - for(BooleanClause cl : bq.getClauses()){
1843 - if(!cl.getOccur().equals(Occur.SHOULD))
 1989+
 1990+ private boolean allShould(BooleanQuery bq) {
 1991+ for (BooleanClause cl : bq.getClauses()) {
 1992+ if (!cl.getOccur().equals(Occur.SHOULD))
18441993 return false;
1845 - if(cl.getQuery() instanceof BooleanQuery){
1846 - if(!allShould((BooleanQuery)cl.getQuery()))
 1994+ if (cl.getQuery() instanceof BooleanQuery) {
 1995+ if (!allShould((BooleanQuery) cl.getQuery()))
18471996 return false;
18481997 }
18491998 }
18501999 return true;
18512000 }
1852 -
1853 - private void extractAndSimplify(BooleanQuery bq, HashMap<Query,Float> map, float parentBoost){
1854 - for(BooleanClause cl : bq.getClauses()){
 2001+
 2002+ private void extractAndSimplify(BooleanQuery bq, HashMap<Query, Float> map,
 2003+ float parentBoost) {
 2004+ for (BooleanClause cl : bq.getClauses()) {
18552005 Query q = cl.getQuery();
1856 - if(q instanceof BooleanQuery)
1857 - extractAndSimplify((BooleanQuery)q,map,parentBoost*bq.getBoost());
1858 - else{
 2006+ if (q instanceof BooleanQuery)
 2007+ extractAndSimplify((BooleanQuery) q, map,
 2008+ parentBoost * bq.getBoost());
 2009+ else {
18592010 Float boost = map.get(q);
1860 - float b = boost==null? 0 : boost;
1861 - b += q.getBoost()*bq.getBoost()*parentBoost;
1862 - map.put(q,b);
 2011+ float b = boost == null ? 0 : boost;
 2012+ b += q.getBoost() * bq.getBoost() * parentBoost;
 2013+ map.put(q, b);
18632014 }
18642015 }
18652016 }
1866 -
 2017+
18672018 /** Make positional query by including all of the stop words */
1868 - protected PositionalQuery makePositional(ArrayList<String> words, String field, PositionalOptions options, int slop, float boost){
1869 - return makePositional(words,field,options,slop,boost,true);
 2019+ protected PositionalQuery makePositional(ArrayList<String> words,
 2020+ String field, PositionalOptions options, int slop, float boost) {
 2021+ return makePositional(words, field, options, slop, boost, true);
18702022 }
1871 -
 2023+
18722024 /** Make generic positional query */
1873 - protected PositionalQuery makePositional(ArrayList<String> words, String field, PositionalOptions options, int slop, float boost, boolean includeStopWords){
 2025+ protected PositionalQuery makePositional(ArrayList<String> words,
 2026+ String field, PositionalOptions options, int slop, float boost,
 2027+ boolean includeStopWords) {
18742028 PositionalQuery pq = new PositionalQuery(options);
18752029 int pos = 0;
1876 - for(String w : words){
 2030+ for (String w : words) {
18772031 boolean isStop = stopWords.contains(w);
1878 - if(!(isStop && !includeStopWords))
1879 - pq.add(new Term(field,w),pos,isStop);
 2032+ if (!(isStop && !includeStopWords))
 2033+ pq.add(new Term(field, w), pos, isStop);
18802034 pos++;
18812035 }
1882 - if(slop != 0)
 2036+ if (slop != 0)
18832037 pq.setSlop(slop);
18842038 pq.setBoost(boost);
1885 - if(pq.getPositions().length > 0)
 2039+ if (pq.getPositions().length > 0)
18862040 return pq;
1887 - else return null;
 2041+ else
 2042+ return null;
18882043 }
1889 -
1890 - protected Query makePositionalMulti(ParsedWords parsed, String field, PositionalOptions options, int slop, float boost){
 2044+
 2045+ protected Query makePositionalMulti(ParsedWords parsed, String field,
 2046+ PositionalOptions options, int slop, float boost) {
18912047 PositionalMultiQuery mq = new PositionalMultiQuery(options);
1892 - for(WordsDesc wd : parsed.words){
1893 - mq.addWithBoost(wd.getTerms(field),wd.getPosition(),wd.getBoosts());
 2048+ for (WordsDesc wd : parsed.words) {
 2049+ mq.addWithBoost(wd.getTerms(field), wd.getPosition(),
 2050+ wd.getBoosts());
18942051 }
18952052 mq.setSlop(slop);
18962053 mq.setBoost(boost);
1897 - if(mq.getPositions().length > 0)
 2054+ if (mq.getPositions().length > 0)
18982055 return mq;
1899 - else
 2056+ else
19002057 return null;
19012058 }
19022059
19032060 /** Make query with short subphrases anchored in non-stop words */
1904 - protected Query makeAnchoredQuery(ArrayList<String> words, String field,
1905 - PositionalOptions options, PositionalOptions whole, PositionalOptions wholeSloppy,
1906 - float boost, int slop){
 2061+ protected Query makeAnchoredQuery(ArrayList<String> words, String field,
 2062+ PositionalOptions options, PositionalOptions whole,
 2063+ PositionalOptions wholeSloppy, float boost, int slop) {
19072064 BooleanQuery bq = new BooleanQuery(true);
1908 - if(words.size() == 1){
1909 - PositionalQuery pq = makePositional(words,field,options,0,1f);
1910 - bq.add(pq,Occur.SHOULD);
1911 - } else{
 2065+ if (words.size() == 1) {
 2066+ PositionalQuery pq = makePositional(words, field, options, 0, 1f);
 2067+ bq.add(pq, Occur.SHOULD);
 2068+ } else {
19122069 // add words
1913 - for(String w : words){
 2070+ for (String w : words) {
19142071 PositionalQuery pq = new PositionalQuery(options);
1915 - pq.add(new Term(field,w));
1916 - bq.add(pq,Occur.SHOULD);
 2072+ pq.add(new Term(field, w));
 2073+ bq.add(pq, Occur.SHOULD);
19172074 }
19182075 // phrases
1919 - int i =0;
 2076+ int i = 0;
19202077 ArrayList<String> phrase = new ArrayList<String>();
1921 - while(i < words.size()){
 2078+ while (i < words.size()) {
19222079 phrase.clear();
1923 - for(;i<words.size();i++){
 2080+ for (; i < words.size(); i++) {
19242081 String w = words.get(i);
1925 - if(phrase.size() == 0 || stopWords.contains(w))
 2082+ if (phrase.size() == 0 || stopWords.contains(w))
19262083 phrase.add(w);
1927 - else{
1928 - phrase.add(w);
 2084+ else {
 2085+ phrase.add(w);
19292086 break;
19302087 }
19312088 }
1932 - if(phrase.size() > 1)
1933 - bq.add(makePositional(phrase,field,options,0,phrase.size()),Occur.SHOULD);
 2089+ if (phrase.size() > 1)
 2090+ bq.add(makePositional(phrase, field, options, 0,
 2091+ phrase.size()), Occur.SHOULD);
19342092 }
19352093 }
19362094 // add the whole-only query
1937 - if(whole != null)
1938 - bq.add(makePositional(words,field,whole,slop,1),Occur.SHOULD);
1939 - if(wholeSloppy != null){
1940 - Query ws = makePositional(words,field,wholeSloppy,slop,1,false);
1941 - if(ws != null)
1942 - bq.add(ws,Occur.SHOULD);
 2095+ if (whole != null)
 2096+ bq.add(makePositional(words, field, whole, slop, 1), Occur.SHOULD);
 2097+ if (wholeSloppy != null) {
 2098+ Query ws = makePositional(words, field, wholeSloppy, slop, 1, false);
 2099+ if (ws != null)
 2100+ bq.add(ws, Occur.SHOULD);
19432101 }
19442102 bq.setBoost(boost);
1945 -
 2103+
19462104 return bq;
19472105 }
1948 -
 2106+
19492107 /** Make query with short subphrases anchored in non-stop words */
1950 - protected Query makeAnchoredQueryMulti(ParsedWords words, String field,
1951 - PositionalOptions options, PositionalOptions whole, int slopWhole, float boost){
 2108+ protected Query makeAnchoredQueryMulti(ParsedWords words, String field,
 2109+ PositionalOptions options, PositionalOptions whole, int slopWhole,
 2110+ float boost) {
19522111 BooleanQuery bq = new BooleanQuery(true);
19532112 // for one word will make whole only
1954 - if(words.size() >= 2){
 2113+ if (words.size() >= 2) {
19552114 // add single words
1956 - for(int i=0;i<words.size();i++){
1957 - if(!stopWords.contains(words.firstAt(i))) // skip single stop words
1958 - bq.add(makePositionalMulti(words.cloneSingleWord(i),field,options,0,1),Occur.SHOULD);
 2115+ for (int i = 0; i < words.size(); i++) {
 2116+ if (!stopWords.contains(words.firstAt(i))) // skip single stop
 2117+ // words
 2118+ bq.add(makePositionalMulti(words.cloneSingleWord(i), field,
 2119+ options, 0, 1), Occur.SHOULD);
19592120 }
19602121 // add two words to score higher two-word correlations
1961 - if(words.size() >= 3){
1962 - for(int i=0;i<words.size()-1;){
 2122+ if (words.size() >= 3) {
 2123+ for (int i = 0; i < words.size() - 1;) {
19632124 int i1 = i; // first word
19642125 int i2 = i1 + 1; // second non-stop word
1965 - for(; i2<words.size()-1; i2++){
1966 - if(!stopWords.contains(words.firstAt(i2)))
1967 - break;
 2126+ for (; i2 < words.size() - 1; i2++) {
 2127+ if (!stopWords.contains(words.firstAt(i2)))
 2128+ break;
19682129 }
1969 - bq.add(makePositionalMulti(words.cloneRange(i1,i2),field,options,10,2),Occur.SHOULD);
 2130+ bq.add(makePositionalMulti(words.cloneRange(i1, i2), field,
 2131+ options, 10, 2), Occur.SHOULD);
19702132 i = i2;
19712133 }
1972 - }
 2134+ }
19732135 }
19742136 // add the whole-only query
1975 - if(whole != null)
1976 - bq.add(makePositionalMulti(words,field,whole,slopWhole,1),Occur.SHOULD);
1977 -
 2137+ if (whole != null)
 2138+ bq.add(makePositionalMulti(words, field, whole, slopWhole, 1),
 2139+ Occur.SHOULD);
 2140+
19782141 bq.setBoost(boost);
1979 -
 2142+
19802143 return bq;
19812144 }
1982 -
 2145+
19832146 /** Query for section headings */
1984 - protected Query makeSectionsQuery(ParsedWords words, float boost){
1985 - return makeAnchoredQueryMulti(words,fields.sections(),new PositionalOptions.Sections(),new PositionalOptions.SectionsWhole(),0,boost);
 2147+ protected Query makeSectionsQuery(ParsedWords words, float boost) {
 2148+ return makeAnchoredQueryMulti(words, fields.sections(),
 2149+ new PositionalOptions.Sections(),
 2150+ new PositionalOptions.SectionsWhole(), 0, boost);
19862151 }
1987 -
 2152+
19882153 /** Relevance metrics based on rank (of titles and redirects) */
1989 - protected Query makeAlttitleRelevance(ParsedWords words, float boost){
1990 - return makeAnchoredQueryMulti(words,fields.alttitle(),new PositionalOptions.Alttitle(),new PositionalOptions.AlttitleWholeSloppy(),20,boost);
 2154+ protected Query makeAlttitleRelevance(ParsedWords words, float boost) {
 2155+ return makeAnchoredQueryMulti(words, fields.alttitle(),
 2156+ new PositionalOptions.Alttitle(),
 2157+ new PositionalOptions.AlttitleWholeSloppy(), 20, boost);
19912158 }
1992 -
 2159+
19932160 /** Make relevance metrics based on context via related articles */
1994 - protected Query makeRelatedRelevance(ParsedWords words, float boost){
1995 - return makeAnchoredQueryMulti(words,fields.related(),new PositionalOptions.Related(),new PositionalOptions.RelatedWhole(),0,boost);
 2161+ protected Query makeRelatedRelevance(ParsedWords words, float boost) {
 2162+ return makeAnchoredQueryMulti(words, fields.related(),
 2163+ new PositionalOptions.Related(),
 2164+ new PositionalOptions.RelatedWhole(), 0, boost);
19962165 }
1997 -
 2166+
19982167 /** Relevance metrics based on rank (of titles and redirects) */
1999 - protected Query makeAlttitleRelevance(ArrayList<String> words, float boost){
2000 - return makeAnchoredQuery(words,fields.alttitle(),new PositionalOptions.Alttitle(),new PositionalOptions.AlttitleWhole(), new PositionalOptions.AlttitleWholeSloppy(),boost,20);
 2168+ protected Query makeAlttitleRelevance(ArrayList<String> words, float boost) {
 2169+ return makeAnchoredQuery(words, fields.alttitle(),
 2170+ new PositionalOptions.Alttitle(),
 2171+ new PositionalOptions.AlttitleWhole(),
 2172+ new PositionalOptions.AlttitleWholeSloppy(), boost, 20);
20012173 }
20022174
2003 -
20042175 /** Make relevance metrics based on context via related articles */
2005 - protected Query makeRelatedRelevance(ArrayList<String> words, float boost){
2006 - return makeAnchoredQuery(words,fields.related(),new PositionalOptions.Related(),null,null,boost,0);
 2176+ protected Query makeRelatedRelevance(ArrayList<String> words, float boost) {
 2177+ return makeAnchoredQuery(words, fields.related(),
 2178+ new PositionalOptions.Related(), null, null, boost, 0);
20072179 }
20082180
2009 -
2010 - /** Additional query to match words in redirects that are not in title or article */
2011 - protected Query makeAlttitleForRedirects(ArrayList<String> words, int slop, float boost){
2012 - return makePositional(words,fields.alttitle(),new PositionalOptions.RedirectMatch(),slop,boost);
 2181+ /**
 2182+ * Additional query to match words in redirects that are not in title or
 2183+ * article
 2184+ */
 2185+ protected Query makeAlttitleForRedirects(ArrayList<String> words, int slop,
 2186+ float boost) {
 2187+ return makePositional(words, fields.alttitle(),
 2188+ new PositionalOptions.RedirectMatch(), slop, boost);
20132189 }
20142190
2015 - protected Query makeAlttitleForRedirectsMulti(ParsedWords words, int slop, float boost){
2016 - return makePositionalMulti(words,fields.alttitle(),new PositionalOptions.RedirectMatch(),slop,boost);
 2191+ protected Query makeAlttitleForRedirectsMulti(ParsedWords words, int slop,
 2192+ float boost) {
 2193+ return makePositionalMulti(words, fields.alttitle(),
 2194+ new PositionalOptions.RedirectMatch(), slop, boost);
20172195 }
2018 -
2019 - /** Make alttitle phrase for titles indexes */
2020 - public Query makeAlttitleForTitles(List<String> words){
 2196+
 2197+ /** Make alttitle phrase for titles indexes */
 2198+ public Query makeAlttitleForTitles(List<String> words) {
20212199 BooleanQuery main = new BooleanQuery(true);
20222200
2023 - PositionalQuery exact = new PositionalQuery(new PositionalOptions.AlttitleExact());
2024 - PositionalQuery sloppy = new PositionalQuery(new PositionalOptions.AlttitleSloppy());
 2201+ PositionalQuery exact = new PositionalQuery(
 2202+ new PositionalOptions.AlttitleExact());
 2203+ PositionalQuery sloppy = new PositionalQuery(
 2204+ new PositionalOptions.AlttitleSloppy());
20252205
20262206 // make exact + sloppy
20272207 int pos = 0;
2028 - for(String w : words){
2029 - Term term = new Term(fields.alttitle(),w);
 2208+ for (String w : words) {
 2209+ Term term = new Term(fields.alttitle(), w);
20302210 boolean isStop = stopWords.contains(w);
2031 - exact.add(term,isStop);
2032 - if(!isStop)
2033 - sloppy.add(term,pos,isStop); // maintain gaps
 2211+ exact.add(term, isStop);
 2212+ if (!isStop)
 2213+ sloppy.add(term, pos, isStop); // maintain gaps
20342214 pos++;
20352215 }
2036 - if(sloppy.getTerms().length == 0)
 2216+ if (sloppy.getTerms().length == 0)
20372217 return exact;
2038 -
 2218+
20392219 sloppy.setSlop(10);
2040 - main.add(exact,Occur.SHOULD);
2041 - main.add(sloppy,Occur.SHOULD);
 2220+ main.add(exact, Occur.SHOULD);
 2221+ main.add(sloppy, Occur.SHOULD);
20422222 main.setBoost(1);
20432223 return main;
2044 -
 2224+
20452225 }
2046 -
 2226+
20472227 /** Make a query to search grouped titles indexes */
2048 - public Query parseForTitles(String queryText){
 2228+ public Query parseForTitles(String queryText) {
20492229 String oldDefaultField = this.defaultField;
20502230 NamespacePolicy oldPolicy = this.namespacePolicy;
20512231 FieldBuilder.BuilderSet oldBuilder = this.builder;
20522232 this.defaultField = "alttitle";
20532233 this.namespacePolicy = NamespacePolicy.IGNORE;
2054 -
 2234+
20552235 Query q = parseRaw(queryText);
20562236
20572237 ParsedWords words = parsedWords;
2058 -
2059 - this.builder = oldBuilder;
 2238+
 2239+ this.builder = oldBuilder;
20602240 this.defaultField = oldDefaultField;
20612241 this.namespacePolicy = oldPolicy;
2062 -
 2242+
20632243 BooleanQuery full = new BooleanQuery(true);
2064 - full.add(q,Occur.MUST);
 2244+ full.add(q, Occur.MUST);
20652245
2066 - if(words.size() == 0)
 2246+ if (words.size() == 0)
20672247 return q;
2068 -
2069 - // match whole titles
2070 - Query redirectsMulti = makeAlttitleForRedirectsMulti(makeFirstAndSingular(words),20,1f);
2071 - if(redirectsMulti != null)
2072 - full.add(redirectsMulti,Occur.SHOULD);
2073 -
 2248+
 2249+ // match whole titles
 2250+ Query redirectsMulti = makeAlttitleForRedirectsMulti(
 2251+ makeFirstAndSingular(words), 20, 1f);
 2252+ if (redirectsMulti != null)
 2253+ full.add(redirectsMulti, Occur.SHOULD);
 2254+
20742255 ArticleNamespaceScaling nsScale = iid.getNamespaceScaling();
2075 - return new ArticleQueryWrap(full,new ArticleInfoImpl(),null,null,nsScale);
2076 -
 2256+ return new ArticleQueryWrap(full, new ArticleInfoImpl(), null, null,
 2257+ nsScale);
 2258+
20772259 }
2078 -
 2260+
20792261 /** check if all the words in the array are stop words */
2080 - private boolean allStopWords(ArrayList<String> words){
2081 - if(words == null || words.size() == 0)
 2262+ private boolean allStopWords(ArrayList<String> words) {
 2263+ if (words == null || words.size() == 0)
20822264 return false;
2083 - for(String w : words){
2084 - if(!stopWords.contains(w)){
 2265+ for (String w : words) {
 2266+ if (!stopWords.contains(w)) {
20852267 return false;
20862268 }
20872269 }
@@ -2091,16 +2273,17 @@
20922274 public Term[] getHighlightTerms() {
20932275 return highlightTerms;
20942276 }
2095 -
 2277+
20962278 /** @return if last parsed query had wildcards in it */
2097 - public boolean hasWildcards(){
2098 - return wildcards!=null && wildcards.hasWildcards();
 2279+ public boolean hasWildcards() {
 2280+ return wildcards != null && wildcards.hasWildcards();
20992281 }
 2282+
21002283 /** @return if last parsed query has fuzzy words in it */
2101 - public boolean hasFuzzy(){
2102 - return fuzzy!=null && fuzzy.hasFuzzy();
 2284+ public boolean hasFuzzy() {
 2285+ return fuzzy != null && fuzzy.hasFuzzy();
21032286 }
2104 -
 2287+
21052288 public void setNamespacePolicy(NamespacePolicy namespacePolicy) {
21062289 this.namespacePolicy = namespacePolicy;
21072290 }
@@ -2108,13 +2291,13 @@
21092292 public ArrayList<String> getWordsClean() {
21102293 return cleanupWords(parsedWords.extractFirst());
21112294 }
2112 -
2113 - public boolean hasPrefixFilters(){
2114 - return prefixFilters != null && prefixFilters.length>0;
 2295+
 2296+ public boolean hasPrefixFilters() {
 2297+ return prefixFilters != null && prefixFilters.length > 0;
21152298 }
2116 -
 2299+
21172300 /** Gets the raw prefix text, e.g. project:npov */
2118 - public String[] getPrefixFilters(){
 2301+ public String[] getPrefixFilters() {
21192302 return prefixFilters;
21202303 }
21212304
@@ -2123,7 +2306,4 @@
21242307 return urls;
21252308 }
21262309
2127 -
2128 -
2129 -
21302310 }

Status & tagging log