r109800 MediaWiki - Code Review archive

Repository:	MediaWiki
Revision:	< r109799‎ \| r109800 \| r109801 >
Date:	02:39, 23 January 2012
Author:	oren
Status:	deferred
Tags:
Comment:	eclipse organize imports + reformatted source
Modified paths:	/trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/WikiQueryParser.java (modified) (history)

Diff [purge]

Index: trunk/lucene-search-3/src/main/java/org/wikimedia/lsearch/analyzers/WikiQueryParser.java
—	—	@@ -21,6 +21,7 @@
22	22	import org.apache.lucene.search.ArticleQueryWrap;
23	23	import org.apache.lucene.search.ArticleScaling;
24	24	import org.apache.lucene.search.BooleanClause;
	25	+import org.apache.lucene.search.BooleanClause.Occur;
25	26	import org.apache.lucene.search.BooleanQuery;
26	27	import org.apache.lucene.search.MultiPhraseQuery;
27	28	import org.apache.lucene.search.PositionalMultiQuery;
—	—	@@ -29,7 +30,6 @@
30	31	import org.apache.lucene.search.Query;
31	32	import org.apache.lucene.search.RelevanceQuery;
32	33	import org.apache.lucene.search.TermQuery;
33		~~-import org.apache.lucene.search.BooleanClause.Occur;~~
34	34	import org.wikimedia.lsearch.config.GlobalConfiguration;
35	35	import org.wikimedia.lsearch.config.IndexId;
36	36	import org.wikimedia.lsearch.config.IndexId.AgeScaling;
—	—	@@ -44,13 +44,12 @@
45	45	* Parser for wiki query syntax
46	46	*
47	47	* @author rainman
48		- *
49	48	*/
50	49	public class WikiQueryParser {
51	50	private static final int MAX_TERM_LEN = 255;
52		~~- private final char[] buffer = new char[MAX_TERM_LEN+1];~~
	51	+ private final char[] buffer = new char[MAX_TERM_LEN + 1];
53	52	private int length; // length of the token in the buffer
54		~~- private Analyzer analyzer;~~
	53	+ private Analyzer analyzer;
55	54	private char[] text; // text that is being parsed
56	55	private int cur; // current position in text
57	56	private int prev_cur; // cur before parsing this token (for backToken())
—	—	@@ -61,76 +60,90 @@
62	61	private String defaultField; // the default field value
63	62	private float defaultBoost = 1;
64	63	private float defaultAliasBoost = ALIAS_BOOST;
65		~~- protected enum TokenType {WORD, FIELD, AND, OR, EOF };~~
66		-
67		~~- private TokenStream tokenStream;~~
	64	+
	65	+ protected enum TokenType {
	66	+ WORD, FIELD, AND, OR, EOF
	67	+ };
	68	+
	69	+ private TokenStream tokenStream;
68	70	private ArrayList<Token> tokens; // tokens from analysis
69	71	protected ParsedWords parsedWords;
70	72	protected String[] prefixFilters;
71		~~- protected enum ExpandedType { WORD, WILDCARD, FUZZY, PHRASE };~~
	73	+
	74	+ protected enum ExpandedType {
	75	+ WORD, WILDCARD, FUZZY, PHRASE
	76	+ };
	77	+
72	78	protected Term[] highlightTerms = null;
73		-
	79	+
74	80	protected ArrayList<ArrayList<Term>> urls;
75		-
76		~~- /** sometimes the fieldsubquery takes the bool modifier, to retrieve it, use this variable,~~
77		~~- * this will always point to the last unused bool modifier */~~
78		~~- BooleanClause.Occur explicitOccur = null;~~
79		-
	81	+
	82	+ /**
	83	+ * sometimes the fieldsubquery takes the bool modifier, to retrieve it, use
	84	+ * this variable,
	85	+ * this will always point to the last unused bool modifier
	86	+ */
	87	+ BooleanClause.Occur explicitOccur = null;
	88	+
80	89	/** Wheather to include aliases during title rewrite */
81	90	protected boolean disableTitleAliases;
82		-
	91	+
83	92	/** boost for alias words from analyzer */
84		~~- public static float ALIAS_BOOST = 0.5f;~~
	93	+ public static float ALIAS_BOOST = 0.5f;
85	94	/** boost for title field */
86		~~- public static float TITLE_BOOST = 6;~~
	95	+ public static float TITLE_BOOST = 6;
87	96	public static float TITLE_ALIAS_BOOST = 0.2f;
88	97	public static float TITLE_PHRASE_BOOST = 2;
89		~~- public static float STEM_TITLE_BOOST = 0.8f;~~
	98	+ public static float STEM_TITLE_BOOST = 0.8f;
90	99	public static float STEM_TITLE_ALIAS_BOOST = 0.4f;
91	100	public static float ALT_TITLE_BOOST = 4;
92	101	public static float ALT_TITLE_ALIAS_BOOST = 0.4f;
93	102	public static float CONTENTS_BOOST = 0.2f;
94		-
	103	+
95	104	public static float STEM_WORD_BOOST = 0.01f;
96	105	public static float SINGULAR_WORD_BOOST = 0.5f;
97		-
	106	+
98	107	// main phrase stuff:
99	108	public static int MAINPHRASE_SLOP = 100;
100	109	public static float MAINPHRASE_BOOST = 2f;
101		~~- public static float RELEVANCE_RELATED_BOOST = 12f;~~
	110	+ public static float RELEVANCE_RELATED_BOOST = 12f;
102	111	public static float RELEVANCE_ALTTITLE_BOOST = 2.5f;
103	112	public static float SECTIONS_BOOST = 0.25f;
104	113	public static float ALTTITLE_BOOST = 0.5f;
105	114	public static float RELATED_BOOST = 1f;
106	115	// additional to main phrase:
107	116	public static float ADD_RELATED_BOOST = 4f;
108		-
	117	+
109	118	public static float WILDCARD_BOOST = 2f;
110	119	public static float FUZZY_BOOST = 4f;
111		-
	120	+
112	121	public static boolean ADD_STEM_TITLE = true;
113	122	public static boolean ADD_TITLE_PHRASES = true;
114		-
115		~~- /** Policies in treating field names:~~
116		- *
	123	+
	124	+ /**
	125	+ * Policies in treating field names:
117	126	* LEAVE - don't mess with field rewriting
118	127	* IGNORE - convert all field names to contents (except category)
119		~~- * REWRITE - rewrite (help:searchterm) => (+namespace:12 contents:searchterm)~~
	128	+ * REWRITE - rewrite (help:searchterm) => (+namespace:12
	129	+ * contents:searchterm)
120	130	*/
121		~~- public enum NamespacePolicy { LEAVE, IGNORE, REWRITE };~~
	131	+ public enum NamespacePolicy {
	132	+ LEAVE, IGNORE, REWRITE
	133	+ };
	134	+
122	135	/** Rewritten namespace queries. prefix => query */
123		~~- static protected Hashtable<String,Query> namespaceQueries = null;~~
	136	+ static protected Hashtable<String, Query> namespaceQueries = null;
124	137	/** The 'all' keyword */
125	138	static protected String namespaceAllKeyword = null;
126	139	/** Prefixes and associated filters. prefix -> filter */
127		~~- static protected Hashtable<String,NamespaceFilter> namespaceFilters = null;~~
	140	+ static protected Hashtable<String, NamespaceFilter> namespaceFilters = null;
128	141	/** nsfilter -> prefix (reverse table to namespaceFilters */
129		~~- static protected Hashtable<NamespaceFilter,String> namespacePrefixes = null;~~
	142	+ static protected Hashtable<NamespaceFilter, String> namespacePrefixes = null;
130	143	private String defaultNamespaceName;
131	144	private Query namespaceRewriteQuery;
132	145	private NamespacePolicy namespacePolicy;
133	146	protected NamespaceFilter defaultNamespaceFilter;
134		~~- protected static GlobalConfiguration global=null;~~
	147	+ protected static GlobalConfiguration global = null;
135	148	protected FieldBuilder.BuilderSet builder;
136	149	protected FieldNameFactory fields;
137	150	protected FilterFactory filters;
—	—	@@ -140,219 +153,246 @@
141	154	protected IndexId iid;
142	155	protected boolean isInTitle = false;
143	156	protected int isInTitleLevel = 0;
144		-
	157	+
145	158	/** Raw fields to append to queries like ondiscussionpage */
146		~~- protected HashMap<String,String> rawFields = new HashMap<String,String>();~~
147		-
148		~~- Hashtable<String,String> keywordFieldMapping = new Hashtable<String,String>();~~
149		-
150		~~- protected Pattern urlPattern = Pattern.compile("(\\w+:{0,1}\\w*@)?(\\S+)(:[0-9]+)?(\\/\|\\/([\\w#!:.?+=&%@!\\-\\/]))?");~~
151		-
	159	+ protected HashMap<String, String> rawFields = new HashMap<String, String>();
	160	+
	161	+ Hashtable<String, String> keywordFieldMapping = new Hashtable<String, String>();
	162	+
	163	+ protected Pattern urlPattern = Pattern
	164	+ .compile("(\\w+:{0,1}\\w*@)?(\\S+)(:[0-9]+)?(\\/\|\\/([\\w#!:.?+=&%@!\\-\\/]))?");
	165	+
152	166	/** default operator (must = AND, should = OR) for boolean queries */
153	167	public BooleanClause.Occur boolDefault = BooleanClause.Occur.MUST;
154		-
	168	+
155	169	/** Word + boost for expanded term */
156	170	static class WordBoost {
157	171	String word;
158	172	float boost;
	173	+
159	174	public WordBoost(String word, float boost) {
160	175	this.word = word;
161	176	this.boost = boost;
162	177	}
163	178	}
164		-
	179	+
165	180	/** Descriptor for words within queries */
166	181	static class WordsDesc {
167	182	/** original term text */
168		~~- String original = null;~~
	183	+ String original = null;
169	184	/** words in which the term is expaned to */
170		~~- ArrayList<WordBoost> expanded = new ArrayList<WordBoost>();~~
	185	+ ArrayList<WordBoost> expanded = new ArrayList<WordBoost>();
171	186	ExpandedType type = ExpandedType.WORD;
172	187	int position;
173		-
	188	+
174	189	public WordsDesc(String original, ExpandedType type, int position) {
175	190	this.original = original;
176	191	this.type = type;
177	192	this.position = position;
178	193	}
179	194
180		~~- void add(WordBoost wb){~~
	195	+ void add(WordBoost wb) {
181	196	expanded.add(wb);
182	197	}
183		-
184		~~- String first(){~~
	198	+
	199	+ String first() {
185	200	return expanded.get(0).word;
186	201	}
187		-
188		~~- WordBoost firstWordBoost(){~~
	202	+
	203	+ WordBoost firstWordBoost() {
189	204	return expanded.get(0);
190	205	}
	206	+
191	207	/** new word desc with first word extracted only */
192		~~- WordsDesc firstWordsDesc(){~~
193		~~- WordsDesc d = new WordsDesc(original,type,position);~~
	208	+ WordsDesc firstWordsDesc() {
	209	+ WordsDesc d = new WordsDesc(original, type, position);
194	210	d.add(firstWordBoost());
195	211	return d;
196	212	}
197		-
	213	+
198	214	/** create search terms */
199		~~- Term[] getTerms(String field){~~
	215	+ Term[] getTerms(String field) {
200	216	Term[] terms = new Term[expanded.size()];
201		~~- for(int i=0;i<expanded.size();i++)~~
202		~~- terms[i] = new Term(field,expanded.get(i).word);~~
	217	+ for (int i = 0; i < expanded.size(); i++)
	218	+ terms[i] = new Term(field, expanded.get(i).word);
203	219	return terms;
204	220	}
205		-
206		~~- ArrayList<Float> getBoosts(){~~
	221	+
	222	+ ArrayList<Float> getBoosts() {
207	223	ArrayList<Float> boosts = new ArrayList<Float>();
208		~~- for(WordBoost w : expanded)~~
	224	+ for (WordBoost w : expanded)
209	225	boosts.add(w.boost);
210	226	return boosts;
211	227	}
212		-
213		~~- int getPosition(){~~
	228	+
	229	+ int getPosition() {
214	230	return position;
215	231	}
216		-
217		~~- boolean isWildcardOrFuzzy(){~~
218		~~- return type == ExpandedType.WILDCARD \|\| type == ExpandedType.FUZZY;~~
	232	+
	233	+ boolean isWildcardOrFuzzy() {
	234	+ return type == ExpandedType.WILDCARD \|\| type == ExpandedType.FUZZY;
219	235	}
220		-
	236	+
221	237	}
222		-
	238	+
223	239	/** Words from parser */
224	240	static class ParsedWords {
225	241	ArrayList<WordsDesc> words = new ArrayList<WordsDesc>();
226		-
227		~~- void add(String original, ArrayList<String> words, ArrayList<Float> boosts, ExpandedType type){~~
	242	+
	243	+ void add(String original, ArrayList<String> words,
	244	+ ArrayList<Float> boosts, ExpandedType type) {
228	245	int pos = this.words.size();
229		~~- WordsDesc wd = new WordsDesc(original,type,pos);~~
230		~~- for(int i=0;i<words.size();i++){~~
231		~~- wd.add(new WordBoost(words.get(i),boosts.get(i)));~~
	246	+ WordsDesc wd = new WordsDesc(original, type, pos);
	247	+ for (int i = 0; i < words.size(); i++) {
	248	+ wd.add(new WordBoost(words.get(i), boosts.get(i)));
232	249	}
233	250	this.words.add(wd);
234	251	}
235		-
236		~~- void add(String original, ArrayList<String> words, float boost, ExpandedType type){~~
	252	+
	253	+ void add(String original, ArrayList<String> words, float boost,
	254	+ ExpandedType type) {
237	255	int pos = this.words.size();
238		~~- WordsDesc wd = new WordsDesc(original,type,pos);~~
239		~~- for(int i=0;i<words.size();i++){~~
240		~~- wd.add(new WordBoost(words.get(i),boost));~~
	256	+ WordsDesc wd = new WordsDesc(original, type, pos);
	257	+ for (int i = 0; i < words.size(); i++) {
	258	+ wd.add(new WordBoost(words.get(i), boost));
241	259	}
242	260	this.words.add(wd);
243	261	}
244		-
245		~~- void add(String original, String word, float boost, ExpandedType type){~~
	262	+
	263	+ void add(String original, String word, float boost, ExpandedType type) {
246	264	int pos = this.words.size();
247		~~- WordsDesc wd = new WordsDesc(original,type,pos);~~
248		~~- wd.add(new WordBoost(word,boost));~~
	265	+ WordsDesc wd = new WordsDesc(original, type, pos);
	266	+ wd.add(new WordBoost(word, boost));
249	267	this.words.add(wd);
250	268	}
251		-
252		~~- WordsDesc last(){~~
253		~~- return words.get(words.size()-1);~~
	269	+
	270	+ WordsDesc last() {
	271	+ return words.get(words.size() - 1);
254	272	}
255		-
	273	+
256	274	/** Extract the main stream of words, excludes wildcards and such */
257		~~- ArrayList<String> extractFirst(){~~
	275	+ ArrayList<String> extractFirst() {
258	276	ArrayList<String> ret = new ArrayList<String>();
259		~~- for(WordsDesc d : words){~~
260		~~- if(d.type==ExpandedType.WORD \|\| d.type==ExpandedType.PHRASE)~~
	277	+ for (WordsDesc d : words) {
	278	+ if (d.type == ExpandedType.WORD
	279	+ \|\| d.type == ExpandedType.PHRASE)
261	280	ret.add(d.first());
262	281	}
263	282	return ret;
264	283	}
265		-
	284	+
266	285	/** First string at index of expanded */
267		~~- String firstAt(int index){~~
	286	+ String firstAt(int index) {
268	287	return words.get(index).first();
269	288	}
270		-
271		~~- int size(){~~
	289	+
	290	+ int size() {
272	291	return words.size();
273	292	}
274		-
	293	+
275	294	/** get ParsedWords with only a single word on given position */
276		~~- ParsedWords cloneSingleWord(int index){~~
277		~~- return cloneRange(index,index);~~
	295	+ ParsedWords cloneSingleWord(int index) {
	296	+ return cloneRange(index, index);
278	297	}
	298	+
279	299	/** get ParsedWords with a range of words (both i1, i2 inclusive) */
280		~~- ParsedWords cloneRange(int i1, int i2){~~
	300	+ ParsedWords cloneRange(int i1, int i2) {
281	301	ParsedWords ret = new ParsedWords();
282		~~- for(int i=i1;i<=i2;i++)~~
	302	+ for (int i = i1; i <= i2; i++)
283	303	ret.words.add(words.get(i));
284	304	return ret;
285	305	}
	306	+
286	307	/** Get ParsedWords of first words */
287		~~- ParsedWords cloneFirst(){~~
	308	+ ParsedWords cloneFirst() {
288	309	ParsedWords ret = new ParsedWords();
289		~~- for(WordsDesc d : words){~~
290		~~- if(d.type==ExpandedType.WORD \|\| d.type==ExpandedType.PHRASE)~~
	310	+ for (WordsDesc d : words) {
	311	+ if (d.type == ExpandedType.WORD
	312	+ \|\| d.type == ExpandedType.PHRASE)
291	313	ret.add(d.firstWordsDesc());
292	314	}
293	315	return ret;
294	316	}
295		-
296		~~- /** Get ParsedWords of first words, or whole ParsedWords if wildcard/fuzzy */~~
297		~~- ParsedWords cloneFirstWithWildcards(){~~
	317	+
	318	+ /**
	319	+ * Get ParsedWords of first words, or whole ParsedWords if
	320	+ * wildcard/fuzzy
	321	+ */
	322	+ ParsedWords cloneFirstWithWildcards() {
298	323	ParsedWords ret = new ParsedWords();
299		~~- for(WordsDesc d : words){~~
300		~~- if(d.type==ExpandedType.WORD \|\| d.type==ExpandedType.PHRASE)~~
	324	+ for (WordsDesc d : words) {
	325	+ if (d.type == ExpandedType.WORD
	326	+ \|\| d.type == ExpandedType.PHRASE)
301	327	ret.add(d.firstWordsDesc());
302		~~- else if(d.isWildcardOrFuzzy())~~
	328	+ else if (d.isWildcardOrFuzzy())
303	329	ret.add(d);
304	330	}
305	331	return ret;
306	332	}
307		-
308		~~- void add(WordsDesc desc){~~
	333	+
	334	+ void add(WordsDesc desc) {
309	335	words.add(desc);
310	336	}
311		-
	337	+
312	338	}
313		-
	339	+
314	340	/** Init namespace queries */
315		~~- protected void initNamespaces(){~~
316		~~- if(namespaceQueries != null)~~
	341	+ protected void initNamespaces() {
	342	+ if (namespaceQueries != null)
317	343	return;
318		~~- if(global == null)~~
319		~~- global = GlobalConfiguration.getInstance();~~
	344	+ if (global == null)
	345	+ global = GlobalConfiguration.getInstance();
320	346	namespaceAllKeyword = global.getNamespacePrefixAll();
321		~~- namespaceQueries = new Hashtable<String,Query>();~~
322		~~- namespacePrefixes = new Hashtable<NamespaceFilter,String>();~~
	347	+ namespaceQueries = new Hashtable<String, Query>();
	348	+ namespacePrefixes = new Hashtable<NamespaceFilter, String>();
323	349	namespaceFilters = global.getNamespacePrefixes();
324		~~- for(Entry<String,NamespaceFilter> prefix : namespaceFilters.entrySet()){~~
325		~~- namespaceQueries.put(prefix.getKey(),generateRewrite(prefix.getValue()));~~
326		~~- namespacePrefixes.put(prefix.getValue(),prefix.getKey());~~
	350	+ for (Entry<String, NamespaceFilter> prefix : namespaceFilters
	351	+ .entrySet()) {
	352	+ namespaceQueries.put(prefix.getKey(),
	353	+ generateRewrite(prefix.getValue()));
	354	+ namespacePrefixes.put(prefix.getValue(), prefix.getKey());
327	355	}
328	356	}
329		-
	357	+
330	358	/**
331	359	* Construct using default policy (LEAVE), without any namespace rewriting
332		~~- * @param field default field name~~
	360	+ *
	361	+ * @param field
	362	+ * default field name
333	363	* @param analyzer
334	364	*/
335		~~- public WikiQueryParser(String field, Analyzer analyzer, FieldBuilder.BuilderSet builder, Collection<String> stopWords){~~
336		~~- this(field,(NamespaceFilter)null,analyzer,builder,NamespacePolicy.LEAVE,stopWords);~~
	365	+ public WikiQueryParser(String field, Analyzer analyzer,
	366	+ FieldBuilder.BuilderSet builder, Collection<String> stopWords) {
	367	+ this(field, (NamespaceFilter) null, analyzer, builder,
	368	+ NamespacePolicy.LEAVE, stopWords);
337	369	}
338		-
	370	+
339	371	/**
340	372	* Construct with default field (e.g. contents), with default namespace
341	373	* (e.g. main), and with analyzer and namespace policy
	374	+ *
342	375	* @param field
343	376	* @param namespace
344	377	* @param analyzer
345	378	* @param nsPolicy
346	379	*/
347		~~- public WikiQueryParser(String field, String namespace, Analyzer analyzer, FieldBuilder.BuilderSet builder, NamespacePolicy nsPolicy, Collection<String> stopWords){~~
348		~~- this(field,new NamespaceFilter(namespace),analyzer,builder,nsPolicy,stopWords);~~
	380	+ public WikiQueryParser(String field, String namespace, Analyzer analyzer,
	381	+ FieldBuilder.BuilderSet builder, NamespacePolicy nsPolicy,
	382	+ Collection<String> stopWords) {
	383	+ this(field, new NamespaceFilter(namespace), analyzer, builder,
	384	+ nsPolicy, stopWords);
349	385	}
350		-
351		~~- public WikiQueryParser(String field, String namespace, Analyzer analyzer, FieldBuilder.BuilderSet builder, NamespacePolicy nsPolicy){~~
352		~~- this(field,new NamespaceFilter(namespace),analyzer,builder,nsPolicy,null);~~
	386	+
	387	+ public WikiQueryParser(String field, String namespace, Analyzer analyzer,
	388	+ FieldBuilder.BuilderSet builder, NamespacePolicy nsPolicy) {
	389	+ this(field, new NamespaceFilter(namespace), analyzer, builder,
	390	+ nsPolicy, null);
353	391	}
354		-
355		~~- public WikiQueryParser(String field, NamespaceFilter nsfilter, Analyzer analyzer, FieldBuilder.BuilderSet builder, NamespacePolicy nsPolicy, Collection<String> stopWords){~~
356		~~- defaultField = field;~~
	392	+
	393	+ public WikiQueryParser(String field, NamespaceFilter nsfilter,
	394	+ Analyzer analyzer, FieldBuilder.BuilderSet builder,
	395	+ NamespacePolicy nsPolicy, Collection<String> stopWords) {
	396	+ defaultField = field;
357	397	this.analyzer = analyzer;
358	398	this.builder = builder;
359	399	this.fields = builder.getFields();
—	—	@@ -361,299 +401,313 @@
362	402	tokens = new ArrayList<Token>();
363	403	this.namespacePolicy = nsPolicy;
364	404	disableTitleAliases = true;
365		~~- keywordFieldMapping = new Hashtable<String,String>();~~
366		~~- keywordFieldMapping.put("inthread", "ThreadAncestor");~~
	405	+ keywordFieldMapping = new Hashtable<String, String>();
	406	+ keywordFieldMapping.put("inthread", "ThreadAncestor");
367	407	keywordFieldMapping.put("ondiscussionpage", "ThreadPage");
368	408	initNamespaces();
369	409	this.stopWords = new HashSet<String>();
370		~~- if(stopWords != null)~~
	410	+ if (stopWords != null)
371	411	this.stopWords.addAll(stopWords);
372		~~- this.defaultNamespaceFilter=nsfilter;~~
373		~~- if(nsfilter != null){~~
374		~~- namespaceRewriteQuery = generateRewrite(nsfilter);~~
375		~~- if(namespaceRewriteQuery != null && namespacePrefixes.containsKey(nsfilter))~~
	412	+ this.defaultNamespaceFilter = nsfilter;
	413	+ if (nsfilter != null) {
	414	+ namespaceRewriteQuery = generateRewrite(nsfilter);
	415	+ if (namespaceRewriteQuery != null
	416	+ && namespacePrefixes.containsKey(nsfilter))
376	417	defaultNamespaceName = namespacePrefixes.get(nsfilter);
377	418	else
378	419	defaultNamespaceName = null;
379		~~- }~~
380		~~- else{~~
	420	+ } else {
381	421	namespaceRewriteQuery = null;
382	422	defaultNamespaceName = null;
383	423	}
384	424	}
385		-
	425	+
386	426	/** Generate a rewrite query for a collection of namespaces */
387		~~- public static Query generateRewrite(NamespaceFilter nsfilter){~~
388		~~- if(nsfilter.cardinality() == 0)~~
	427	+ public static Query generateRewrite(NamespaceFilter nsfilter) {
	428	+ if (nsfilter.cardinality() == 0)
389	429	return null;
390		~~- else if(nsfilter.cardinality() == 1)~~
391		~~- return new TermQuery(new Term("namespace",Integer.toString(nsfilter.getNamespace())));~~
392		-
	430	+ else if (nsfilter.cardinality() == 1)
	431	+ return new TermQuery(new Term("namespace",
	432	+ Integer.toString(nsfilter.getNamespace())));
	433	+
393	434	BooleanQuery bq = new BooleanQuery();
394	435	BitSet bs = nsfilter.getIncluded();
395	436	// iterate over set bits
396		~~- for(int i=bs.nextSetBit(0); i>=0; i=bs.nextSetBit(i+1)){~~
397		~~- bq.add(new TermQuery(new Term("namespace",Integer.toString(i))),~~
	437	+ for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) {
	438	+ bq.add(new TermQuery(new Term("namespace", Integer.toString(i))),
398	439	BooleanClause.Occur.SHOULD);
399		~~- bq.add(new TermQuery(new Term("redirect_namespace",Integer.toString(i))),~~
400		~~- BooleanClause.Occur.MUST_NOT);~~
	440	+ bq.add(new TermQuery(new Term("redirect_namespace", Integer
	441	+ .toString(i))), BooleanClause.Occur.MUST_NOT);
401	442	}
402	443	return bq;
403	444	}
404		-
	445	+
405	446	/** Generate a rewrite query for a collection of namespaces */
406		~~- public static Query generateRedirectRewrite(NamespaceFilter nsfilter){~~
407		~~- if(nsfilter.cardinality() == 0)~~
	447	+ public static Query generateRedirectRewrite(NamespaceFilter nsfilter) {
	448	+ if (nsfilter.cardinality() == 0)
408	449	return null;
409		~~- else if(nsfilter.cardinality() == 1)~~
410		~~- return new TermQuery(new Term("redirect_namespace",Integer.toString(nsfilter.getNamespace())));~~
411		-
	450	+ else if (nsfilter.cardinality() == 1)
	451	+ return new TermQuery(new Term("redirect_namespace",
	452	+ Integer.toString(nsfilter.getNamespace())));
	453	+
412	454	BooleanQuery bq = new BooleanQuery();
413	455	BitSet bs = nsfilter.getIncluded();
414	456	// iterate over set bits
415		~~- for(int i=bs.nextSetBit(0); i>=0; i=bs.nextSetBit(i+1)){~~
416		~~- bq.add(new TermQuery(new Term("redirect_namespace",Integer.toString(i))),~~
417		~~- BooleanClause.Occur.SHOULD);~~
	457	+ for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) {
	458	+ bq.add(new TermQuery(new Term("redirect_namespace", Integer
	459	+ .toString(i))), BooleanClause.Occur.SHOULD);
418	460	}
419	461	return bq;
420	462	}
421		-
422		- /**
	463	+
	464	+ /**
423	465	* Get a hashset of namespace numbers for fields that are
424		~~- * valid namespace keys.~~
	466	+ * valid namespace keys.
	467	+ *
425	468	* @param queryText
426	469	* @return
427	470	*/
428		~~- public HashSet<NamespaceFilter> getFieldNamespaces(String queryText){~~
	471	+ public HashSet<NamespaceFilter> getFieldNamespaces(String queryText) {
429	472	HashSet<String> fields = getFields(queryText);
430	473	HashSet<NamespaceFilter> ret = new HashSet<NamespaceFilter>();
431	474	List ThreadingKeywords = new ArrayList();
432		~~- ThreadingKeywords.add("inthread");~~
433		-
434		~~- for(String field : fields){~~
	475	+ ThreadingKeywords.add("inthread");
	476	+
	477	+ for (String field : fields) {
435	478	field = field.toLowerCase();
436		~~- if(namespaceFilters.containsKey(field))~~
	479	+ if (namespaceFilters.containsKey(field))
437	480	ret.add(namespaceFilters.get(field));
438		~~- else if(field.equals(namespaceAllKeyword))~~
	481	+ else if (field.equals(namespaceAllKeyword))
439	482	ret.add(new NamespaceFilter());
440		~~- else if(field.equals(defaultField) && defaultNamespaceFilter != null)~~
	483	+ else if (field.equals(defaultField)
	484	+ && defaultNamespaceFilter != null)
441	485	ret.add(defaultNamespaceFilter);
442		~~- else if(field.startsWith("[")){~~
443		~~- ret.add(new NamespaceFilter(field.substring(1,field.length()-1)));~~
	486	+ else if (field.startsWith("[")) {
	487	+ ret.add(new NamespaceFilter(field.substring(1,
	488	+ field.length() - 1)));
444	489	} else if (ThreadingKeywords.contains(field)) {
445		~~- ret.add( new NamespaceFilter(90) );~~
	490	+ ret.add(new NamespaceFilter(90));
446	491	}
447	492	}
448		-
	493	+
449	494	return ret;
450	495	}
451		-
	496	+
452	497	/** get all fields that appear in a query */
453		~~- public HashSet<String> getFields(String queryText){~~
	498	+ public HashSet<String> getFields(String queryText) {
454	499	int level = 0; // parenthesis count
455	500	HashSet<String> fields = new HashSet<String>();
456	501	int fieldLevel = -1;
457	502	TokenType tokenType;
458	503	boolean inPhrase = false;
459		-
	504	+
460	505	reset();
461		-
462		~~- queryLength = queryText.length();~~
	506	+
	507	+ queryLength = queryText.length();
463	508	text = queryText.toCharArray();
464		-
465		~~- for(cur = 0; cur < text.length; cur++ ){~~
	509	+
	510	+ for (cur = 0; cur < text.length; cur++) {
466	511	c = text[cur];
467		~~- if(c == '"'){~~
	512	+ if (c == '"') {
468	513	inPhrase = !inPhrase;
469		~~- if(inPhrase && fieldLevel == -1)~~
	514	+ if (inPhrase && fieldLevel == -1)
470	515	fields.add(defaultField);
471	516	}
472		-
473		~~- if(inPhrase)~~
	517	+
	518	+ if (inPhrase)
474	519	continue; // ignore stuff between ""
475		-
476		~~- if(c == ')'){~~
	520	+
	521	+ if (c == ')') {
477	522	level--;
478		~~- if(level < fieldLevel)~~
	523	+ if (level < fieldLevel)
479	524	fieldLevel = -1;
480	525	continue;
481		~~- } else if(c == '('){~~
482		~~- level++;~~
	526	+ } else if (c == '(') {
	527	+ level++;
483	528	continue;
484		~~- } else if(fieldLevel != -1 && level>fieldLevel)~~
	529	+ } else if (fieldLevel != -1 && level > fieldLevel)
485	530	continue;
486		-
487		~~- if(Character.isLetterOrDigit(c)){~~
	531	+
	532	+ if (Character.isLetterOrDigit(c)) {
488	533	tokenType = fetchToken();
489		~~- if(tokenType == TokenType.FIELD){~~
	534	+ if (tokenType == TokenType.FIELD) {
490	535	fieldLevel = level;
491		~~- fields.add(new String(buffer,0,length));~~
492		~~- } else if(tokenType == TokenType.WORD){~~
493		~~- if(fieldLevel == -1)~~
	536	+ fields.add(new String(buffer, 0, length));
	537	+ } else if (tokenType == TokenType.WORD) {
	538	+ if (fieldLevel == -1)
494	539	fields.add(defaultField);
495	540	}
496		~~- } else if(c == '['){~~
497		~~- if(fetchGenericPrefix()){~~
	541	+ } else if (c == '[') {
	542	+ if (fetchGenericPrefix()) {
498	543	fieldLevel = level;
499		~~- fields.add(new String(buffer,0,length));~~
	544	+ fields.add(new String(buffer, 0, length));
500	545	}
501	546	}
502	547	}
503		-
504		-
	548	+
505	549	return fields;
506	550	}
507		-
	551	+
508	552	/** Find and delete all valid prefixes, return search terms in tokens */
509		~~- public ArrayList<Token> tokenizeForSpellCheck(String queryText){~~
	553	+ public ArrayList<Token> tokenizeForSpellCheck(String queryText) {
510	554	int level = 0; // parenthesis count
511	555	int fieldLevel = -1;
512	556	TokenType tokenType;
513	557	boolean inPhrase = false;
514		-
	558	+
515	559	Analyzer oldAnalyzer = this.analyzer;
516		~~- this.analyzer = Analyzers.getReusableAnalyzer(filters,new TokenizerOptions.SpellCheckSearch());~~
517		-
	560	+ this.analyzer = Analyzers.getReusableAnalyzer(filters,
	561	+ new TokenizerOptions.SpellCheckSearch());
	562	+
518	563	ArrayList<Token> ret = new ArrayList<Token>();
519		-
	564	+
520	565	reset();
521		-
522		~~- queryLength = queryText.length();~~
	566	+
	567	+ queryLength = queryText.length();
523	568	text = queryText.toCharArray();
524	569	String oldDefault = defaultField;
525	570	defaultField = "title"; // no stemming
526		-
527		~~- for(cur = 0; cur < text.length; cur++ ){~~
	571	+
	572	+ for (cur = 0; cur < text.length; cur++) {
528	573	c = text[cur];
529		~~- if(c == '"'){~~
	574	+ if (c == '"') {
530	575	inPhrase = !inPhrase;
531	576	}
532		-
533		~~- if(inPhrase) // skip words in phrases~~
534		~~- continue;~~
535		~~- else if(c == ')'){~~
	577	+
	578	+ if (inPhrase) // skip words in phrases
	579	+ continue;
	580	+ else if (c == ')') {
536	581	level--;
537		~~- if(level < fieldLevel)~~
	582	+ if (level < fieldLevel)
538	583	fieldLevel = -1;
539	584	continue;
540		~~- } else if(c == '('){~~
541		~~- level++;~~
	585	+ } else if (c == '(') {
	586	+ level++;
542	587	continue;
543		~~- } else if(fieldLevel != -1 && level>fieldLevel)~~
	588	+ } else if (fieldLevel != -1 && level > fieldLevel)
544	589	continue;
545		-
	590	+
546	591	// include exclusion/inclusion marks
547		~~- if(isTermChar(c) && text[cur]!='-' && text[cur]!='+'){~~
	592	+ if (isTermChar(c) && text[cur] != '-' && text[cur] != '+') {
548	593	int start = cur;
549	594	tokenType = fetchToken(inPhrase);
550	595	// ignore excluded words
551		~~- if(tokenType == TokenType.WORD && (start==0 \|\| text[start-1]!='-')){~~
	596	+ if (tokenType == TokenType.WORD
	597	+ && (start == 0 \|\| text[start - 1] != '-')) {
552	598	String type = "word";
553		~~- if(bufferIsWildCard())~~
	599	+ if (bufferIsWildCard())
554	600	type = "wildcard";
555		~~- else if(bufferIsFuzzy())~~
	601	+ else if (bufferIsFuzzy())
556	602	type = "fuzzy";
557	603	analyzeBuffer();
558		~~- for(Token t : tokens){~~
559		~~- if(t.getPositionIncrement() > 0){~~
560		~~- ret.add(new Token(t.termText(),start+t.startOffset(),start+t.endOffset(),type));~~
	604	+ for (Token t : tokens) {
	605	+ if (t.getPositionIncrement() > 0) {
	606	+ ret.add(new Token(t.termText(), start
	607	+ + t.startOffset(), start + t.endOffset(),
	608	+ type));
561	609	}
562		~~- }~~
	610	+ }
563	611	}
564		~~- } else if(c == '[' && !inPhrase){~~
	612	+ } else if (c == '[' && !inPhrase) {
565	613	fetchGenericPrefix();
566	614	}
567	615	}
568		-
	616	+
569	617	this.analyzer = oldAnalyzer;
570	618	defaultField = oldDefault;
571		-
	619	+
572	620	return ret;
573		-
	621	+
574	622	}
575		-
	623	+
576	624	/** rewrite field name (e.g. help) into a term query like namespace:12 */
577		~~- private Query getNamespaceQuery(String fieldName){~~
578		~~- if(fieldName == null \|\| namespacePolicy != NamespacePolicy.REWRITE)~~
	625	+ private Query getNamespaceQuery(String fieldName) {
	626	+ if (fieldName == null \|\| namespacePolicy != NamespacePolicy.REWRITE)
579	627	return null;
580		-
	628	+
581	629	Query q;
582		~~- if((q = namespaceQueries.get(fieldName))!=null){~~
	630	+ if ((q = namespaceQueries.get(fieldName)) != null) {
583	631	return q;
584		~~- } else if(fieldName.startsWith("[")){~~
585		~~- return generateRewrite(new NamespaceFilter(fieldName.substring(1,fieldName.length()-1)));~~
	632	+ } else if (fieldName.startsWith("[")) {
	633	+ return generateRewrite(new NamespaceFilter(fieldName.substring(1,
	634	+ fieldName.length() - 1)));
586	635	} else
587	636	return null;
588	637	}
589		-
590		~~- private NamespaceFilter getNamespaceFilter(String fieldName){~~
591		~~- if(fieldName == null)~~
	638	+
	639	+ private NamespaceFilter getNamespaceFilter(String fieldName) {
	640	+ if (fieldName == null)
592	641	return defaultNamespaceFilter;
593		~~- else if(namespaceFilters.contains(fieldName))~~
	642	+ else if (namespaceFilters.contains(fieldName))
594	643	return namespaceFilters.get(fieldName);
595		~~- else if(fieldName.startsWith("["))~~
596		~~- return new NamespaceFilter(fieldName.substring(1,fieldName.length()-1));~~
	644	+ else if (fieldName.startsWith("["))
	645	+ return new NamespaceFilter(fieldName.substring(1,
	646	+ fieldName.length() - 1));
597	647	else
598	648	return defaultNamespaceFilter;
599	649	}
600		-
601		~~- private final boolean isTermChar(char ch){~~
602		~~- return !Character.isWhitespace(ch) && ch != ':' && ch != '(' && ch != ')' && ch !='[' && ch != ']' && ch != ',' && ch != ';' && ch != '"';~~
	650	+
	651	+ private final boolean isTermChar(char ch) {
	652	+ return !Character.isWhitespace(ch) && ch != ':' && ch != '('
	653	+ && ch != ')' && ch != '[' && ch != ']' && ch != ','
	654	+ && ch != ';' && ch != '"';
603	655	}
604		-
	656	+
605	657	/**
606		~~- * Fetch token into <code>buffer</code> starting from current position (<code>cur</code>)~~
	658	+ * Fetch token into <code>buffer</code> starting from current position (
	659	+ * <code>cur</code>)
607	660	*
608	661	* @return type of the token in buffer
609	662	*/
610		~~- private TokenType fetchToken(){~~
	663	+ private TokenType fetchToken() {
611	664	return fetchToken(false);
612	665	}
613		~~- private TokenType fetchToken(boolean termOnly){~~
	666	+
	667	+ private TokenType fetchToken(boolean termOnly) {
614	668	char ch;
615	669	prev_cur = cur;
616		~~- for(length = 0; cur < queryLength; cur++){~~
	670	+ for (length = 0; cur < queryLength; cur++) {
617	671	ch = text[cur];
618		~~- if(length == 0 && ch == ' ')~~
	672	+ if (length == 0 && ch == ' ')
619	673	continue; // ignore whitespaces
620		-
621		~~- // pluses and minuses, underscores can be within words (to prevent to be missinterpeted), *,? are for wildcard queries~~
622		~~- if(isTermChar(ch)){~~
623		~~- if(length<buffer.length)~~
	674	+
	675	+ // pluses and minuses, underscores can be within words (to prevent
	676	+ // to be missinterpeted), *,? are for wildcard queries
	677	+ if (isTermChar(ch)) {
	678	+ if (length < buffer.length)
624	679	buffer[length++] = ch;
625		~~- } else{~~
	680	+ } else {
626	681	cur--; // position before the nonletter character
627	682	break;
628	683	}
629	684	}
630		~~- if(length == 0)~~
	685	+ if (length == 0)
631	686	return TokenType.EOF;
632		-
633		~~- if(termOnly)~~
634		~~- return TokenType.WORD;~~
635		-
	687	+
	688	+ if (termOnly)
	689	+ return TokenType.WORD;
	690	+
636	691	// check for keywords
637		~~- if(length == 3 && buffer[0]=='A' && buffer[1]=='N' && buffer[2]=='D')~~
	692	+ if (length == 3 && buffer[0] == 'A' && buffer[1] == 'N'
	693	+ && buffer[2] == 'D')
638	694	return TokenType.AND;
639		~~- else if(length == 2 && buffer[0]=='O' && buffer[1]=='R')~~
	695	+ else if (length == 2 && buffer[0] == 'O' && buffer[1] == 'R')
640	696	return TokenType.OR;
641		-
642		-
	697	+
643	698	// lookahead to see if this is a field
644		~~- for(lookup = cur+1; lookup < queryLength; lookup++ ){~~
	699	+ for (lookup = cur + 1; lookup < queryLength; lookup++) {
645	700	ch = text[lookup];
646		~~- if(ch == ' ')~~
	701	+ if (ch == ' ')
647	702	continue;
648		~~- else if(ch == ':'){~~
	703	+ else if (ch == ':') {
649	704	// check if it's a valid field
650		~~- String f = new String(buffer,0,length);~~
651		-
	705	+ String f = new String(buffer, 0, length);
	706	+
652	707	List<String> fieldOperators = getFieldOperators();
653		-
654		~~- if( f.equals(namespaceAllKeyword)~~
655		~~- \|\| fieldOperators.contains(f)~~
	708	+
	709	+ if (f.equals(namespaceAllKeyword) \|\| fieldOperators.contains(f)
656	710	\|\| namespaceFilters.containsKey(f)
657		~~- \|\| namespacePolicy == NamespacePolicy.LEAVE){~~
	711	+ \|\| namespacePolicy == NamespacePolicy.LEAVE) {
658	712	cur = lookup;
659	713	return TokenType.FIELD;
660	714	} else
—	—	@@ -661,35 +715,35 @@
662	716	} else
663	717	break;
664	718	}
665		-
666		~~- return TokenType.WORD;~~
	719	+
	720	+ return TokenType.WORD;
667	721	}
668		-
	722	+
669	723	private List<String> getFieldOperators() {
670	724	List<String> fieldOperators = new ArrayList<String>();
671	725	fieldOperators.add("intitle");
672	726	fieldOperators.add("incategory");
673		~~- fieldOperators.add("inthread");~~
674		-
	727	+ fieldOperators.add("inthread");
	728	+
675	729	return fieldOperators;
676	730	}
677		-
	731	+
678	732	/**
679		~~- * Fetches prefixes like [0,1,2] (in [0,1,2]:query)~~
	733	+ * Fetches prefixes like [0,1,2] (in [0,1,2]:query)
680	734	*
681	735	* @return true if search prefixes is successfully fetched
682	736	*/
683		~~- private boolean fetchGenericPrefix(){~~
	737	+ private boolean fetchGenericPrefix() {
684	738	char ch;
685	739	prev_cur = cur;
686		~~- if(text[cur] != '[')~~
	740	+ if (text[cur] != '[')
687	741	return false; // sanity check
688	742	buffer[0] = '[';
689		~~- for(length = 1, cur++; cur < queryLength; cur++){~~
	743	+ for (length = 1, cur++; cur < queryLength; cur++) {
690	744	ch = text[cur];
691		~~- if(Character.isDigit(ch) \|\| ch ==',')~~
	745	+ if (Character.isDigit(ch) \|\| ch == ',')
692	746	buffer[length++] = ch;
693		~~- else if(ch == ']' && cur+1 < queryLength && text[cur+1]==':'){~~
	747	+ else if (ch == ']' && cur + 1 < queryLength && text[cur + 1] == ':') {
694	748	cur++; // position on :
695	749	buffer[length++] = ch;
696	750	return true;
—	—	@@ -698,323 +752,359 @@
699	753	}
700	754	cur = prev_cur; // traceback
701	755	return false;
702		-
	756	+
703	757	}
704		-
	758	+
705	759	/** Go back one token */
706		~~- private void backToken(){~~
	760	+ private void backToken() {
707	761	cur = prev_cur;
708	762	}
709	763
710	764	/** analyzer buffer into tokens using default analyzer */
711		~~- private void analyzeBuffer(){~~
	765	+ private void analyzeBuffer() {
712	766	String analysisField = defaultField;
713		~~- if(defaultField.equals("contents") && isInTitle)~~
	767	+ if (defaultField.equals("contents") && isInTitle)
714	768	analysisField = "title";
715		~~- tokenStream = analyzer.tokenStream(analysisField,~~
716		~~- new String(buffer,0,length));~~
717		-
	769	+ tokenStream = analyzer.tokenStream(analysisField, new String(buffer, 0,
	770	+ length));
	771	+
718	772	Token token;
719	773	tokens.clear();
720		~~- try{~~
721		~~- while((token = tokenStream.next()) != null){~~
	774	+ try {
	775	+ while ((token = tokenStream.next()) != null) {
722	776	tokens.add(token);
723	777	}
724		~~- } catch (IOException e){~~
	778	+ } catch (IOException e) {
725	779	e.printStackTrace();
726		~~- }~~
	780	+ }
727	781	}
728		-
729		~~- /** Analyze a string, and return tokens (doesn't use any of the object storage attributes) */~~
730		~~- private ArrayList<Token> analyzeString(String input){~~
	782	+
	783	+ /**
	784	+ * Analyze a string, and return tokens (doesn't use any of the object
	785	+ * storage attributes)
	786	+ */
	787	+ private ArrayList<Token> analyzeString(String input) {
731	788	tokenStream = analyzer.tokenStream("contents", input);
732		-
	789	+
733	790	ArrayList<Token> ret = new ArrayList<Token>();
734	791	Token token;
735		~~- try{~~
736		~~- while((token = tokenStream.next()) != null){~~
	792	+ try {
	793	+ while ((token = tokenStream.next()) != null) {
737	794	ret.add(token);
738	795	}
739		~~- } catch (IOException e){~~
	796	+ } catch (IOException e) {
740	797	e.printStackTrace();
741	798	}
742	799	return ret;
743	800	}
744		-
745		-
	801	+
746	802	/** Make term form lucene token */
747		~~- private Term makeTerm(Token token){~~
	803	+ private Term makeTerm(Token token) {
748	804	return makeTerm(token.termText());
749	805	}
750		-
	806	+
751	807	/** Make term from <code>buffer</code> */
752		~~- private Term makeTerm(){~~
753		~~- return makeTerm(new String(buffer,0,length));~~
	808	+ private Term makeTerm() {
	809	+ return makeTerm(new String(buffer, 0, length));
754	810	}
755		-
	811	+
756	812	/** Make a lucene term from string */
757		~~- private Term makeTerm(String t){~~
758		-
759		-
760		~~- if(currentField == null)~~
761		~~- return new Term(defaultField,builder.isExactCase()? t : t.toLowerCase());~~
762		~~- else if(defaultField.equals("contents") && isInTitle)~~
763		~~- return new Term("title",builder.isExactCase()? t : t.toLowerCase());~~
764		~~- else if(currentField.equals("incategory")){~~
765		~~- String norm = t.replace("_"," "); // bug 10822~~
766		~~- return new Term("category",builder.isExactCase()? norm : norm.toLowerCase());~~
767		~~- } else if( keywordFieldMapping.containsKey(currentField) ) {~~
	813	+ private Term makeTerm(String t) {
	814	+
	815	+ if (currentField == null)
	816	+ return new Term(defaultField, builder.isExactCase() ? t
	817	+ : t.toLowerCase());
	818	+ else if (defaultField.equals("contents") && isInTitle)
	819	+ return new Term("title", builder.isExactCase() ? t
	820	+ : t.toLowerCase());
	821	+ else if (currentField.equals("incategory")) {
	822	+ String norm = t.replace("_", " "); // bug 10822
	823	+ return new Term("category", builder.isExactCase() ? norm
	824	+ : norm.toLowerCase());
	825	+ } else if (keywordFieldMapping.containsKey(currentField)) {
768	826	String field = keywordFieldMapping.get(currentField);
769		-
	827	+
770	828	return new Term(field, t);
771		~~- } else if(!"incategory".equals(currentField) &&~~
772		~~- (namespacePolicy == NamespacePolicy.IGNORE \|\|~~
773		~~- namespacePolicy == NamespacePolicy.REWRITE))~~
774		~~- return new Term(defaultField,t);~~
	829	+ } else if (!"incategory".equals(currentField)
	830	+ && (namespacePolicy == NamespacePolicy.IGNORE \|\| namespacePolicy == NamespacePolicy.REWRITE))
	831	+ return new Term(defaultField, t);
775	832	else
776		~~- return new Term(currentField,t);~~
	833	+ return new Term(currentField, t);
777	834	}
778		-
779		- /**
	835	+
	836	+ /**
780	837	* Parses a phrase query (i.e. between ""), the cur
781		~~- * should be set to the char just after the first~~
782		~~- * quotation mark~~
783		- *
	838	+ * should be set to the char just after the first
	839	+ * quotation mark
	840	+ *
784	841	* @return a query, or null if the query is empty
785	842	*/
786		~~- private Query parsePhrase(){~~
787		~~- // special case for incategory~~
788		~~- if(currentField!=null && currentField.equals("incategory")){~~
	843	+ private Query parsePhrase() {
	844	+ // special case for incategory
	845	+ if (currentField != null && currentField.equals("incategory")) {
789	846	length = 0;
790		~~- for(; cur < queryLength ; cur++ ){~~
791		~~- if(text[cur] == '"')~~
	847	+ for (; cur < queryLength; cur++) {
	848	+ if (text[cur] == '"')
792	849	break;
793		~~- else if(length < buffer.length)~~
	850	+ else if (length < buffer.length)
794	851	buffer[length++] = text[cur];
795	852	}
796		~~- if(length > 0){~~
	853	+ if (length > 0) {
797	854	// no tokenization, we want whole category name
798	855	return new TermQuery(makeTerm());
799	856	}
800	857	return null;
801		~~- }~~
802		~~- //PositionalMultiQuery query = new PositionalMultiQuery(new PositionalOptions.PhraseQueryFallback());~~
	858	+ }
	859	+ // PositionalMultiQuery query = new PositionalMultiQuery(new
	860	+ // PositionalOptions.PhraseQueryFallback());
803	861	MultiPhraseQuery query = new MultiPhraseQuery();
804		~~- for(; cur < queryLength ; cur++ ){~~
	862	+ for (; cur < queryLength; cur++) {
805	863	length = 0;
806	864	// fetch next word
807		~~- while(cur<queryLength && isTermChar(text[cur]) && length<buffer.length){~~
	865	+ while (cur < queryLength && isTermChar(text[cur])
	866	+ && length < buffer.length) {
808	867	buffer[length++] = text[cur++];
809	868	}
810		-
	869	+
811	870	// add to phrase
812		~~- if(length > 0){~~
	871	+ if (length > 0) {
813	872	boolean added = false;
814		~~- if(bufferIsWildCard()){~~
	873	+ if (bufferIsWildCard()) {
815	874	Term term = makeTerm();
816		~~- Term[] terms = wildcards.makeTerms(term.text(),term.field());~~
817		~~- if(terms != null){~~
	875	+ Term[] terms = wildcards.makeTerms(term.text(),
	876	+ term.field());
	877	+ if (terms != null) {
818	878	query.add(terms);
819		~~- ArrayList<String> words = wildcards.getWords(term.text());~~
820		~~- parsedWords.add(term.text(),words,1f,ExpandedType.WILDCARD);~~
	879	+ ArrayList<String> words = wildcards.getWords(term
	880	+ .text());
	881	+ parsedWords.add(term.text(), words, 1f,
	882	+ ExpandedType.WILDCARD);
821	883	added = true;
822	884	}
823	885	}
824		~~- if(bufferIsFuzzy()){~~
	886	+ if (bufferIsFuzzy()) {
825	887	Term term = makeTerm();
826	888	NamespaceFilter nsf = getNamespaceFilter(currentField);
827		~~- Term[] terms = fuzzy.makeTerms(term.text(),term.field(),nsf);~~
828		~~- if(terms != null){~~
829		~~- //query.add(terms,fuzzy.getBoosts(term.text(),nsf,terms));~~
	889	+ Term[] terms = fuzzy.makeTerms(term.text(), term.field(),
	890	+ nsf);
	891	+ if (terms != null) {
	892	+ // query.add(terms,fuzzy.getBoosts(term.text(),nsf,terms));
830	893	query.add(terms);
831		~~- ArrayList<String> words = fuzzy.getWords(term.text(),nsf);~~
832		~~- parsedWords.add(term.text(),words,fuzzy.getBoosts(term.text(),nsf,words),ExpandedType.FUZZY);~~
	894	+ ArrayList<String> words = fuzzy.getWords(term.text(),
	895	+ nsf);
	896	+ parsedWords.add(term.text(), words,
	897	+ fuzzy.getBoosts(term.text(), nsf, words),
	898	+ ExpandedType.FUZZY);
833	899	added = true;
834	900	}
835	901	}
836		~~- if(!added){~~
	902	+ if (!added) {
837	903	// fallback to ordinary words
838	904	analyzeBuffer();
839		~~- for(Token token : tokens){~~
840		~~- if(token.getPositionIncrement()>0){ // ignore aliases and stemmed words~~
	905	+ for (Token token : tokens) {
	906	+ if (token.getPositionIncrement() > 0) { // ignore
	907	+ // aliases and
	908	+ // stemmed words
841	909	Term t = makeTerm(token);
842		~~- addToWords(t,1,ExpandedType.PHRASE);~~
	910	+ addToWords(t, 1, ExpandedType.PHRASE);
843	911	query.add(t);
844	912	}
845		~~- }~~
	913	+ }
846	914	}
847		~~- }~~
	915	+ }
848	916	// end of phrase query
849		~~- if(cur < queryLength && text[cur] == '"')~~
	917	+ if (cur < queryLength && text[cur] == '"')
850	918	break;
851	919	}
852		~~- if(query.getPositions().length > 0){~~
	920	+ if (query.getPositions().length > 0) {
853	921	query.setBoost(defaultBoost);
854	922	return query;
855	923	} else
856	924	return null;
857	925	}
858		-
859		~~- final private Query parseClause(int level){~~
860		~~- return parseClause(level,false,null);~~
	926	+
	927	+ final private Query parseClause(int level) {
	928	+ return parseClause(level, false, null);
861	929	}
862		-
863		~~- private final boolean needsRewrite(){~~
864		~~- return namespaceRewriteQuery != null && namespacePolicy == NamespacePolicy.REWRITE;~~
	930	+
	931	+ private final boolean needsRewrite() {
	932	+ return namespaceRewriteQuery != null
	933	+ && namespacePolicy == NamespacePolicy.REWRITE;
865	934	}
866		-
867		~~- /** Parses a clause: (in regexp-like notation)~~
	935	+
	936	+ /**
	937	+ * Parses a clause: (in regexp-like notation)
	938	+ * Clause := ([+-]? (<field>:)? <term> \| [AND,OR] \| \( Clause \) )+
868	939	*
869		~~- * Clause := ([+-]? (<field>:)? <term> \| [AND,OR] \| \( Clause \) )+~~
870		- *
871		~~- * @param level - level of recurstion~~
872		~~- * @param returnOnFieldDef - if this is a nested field rewrite call~~
	940	+ * @param level
	941	+ * - level of recurstion
	942	+ * @param returnOnFieldDef
	943	+ * - if this is a nested field rewrite call
873	944	* @return
874	945	*/
875		~~- private Query parseClause(int level, boolean returnOnFieldDef, String topFieldName){~~
	946	+ private Query parseClause(int level, boolean returnOnFieldDef,
	947	+ String topFieldName) {
876	948	// the whole query
877		~~- Query query = null;~~
	949	+ Query query = null;
878	950	// reference to boolean query if one is constructed
879	951	BooleanQuery boolquery = null;
880	952	BooleanClause.Occur occur = boolDefault;
881	953	// the first query
882		~~- BooleanClause.Occur firstOccur = boolDefault;~~
	954	+ BooleanClause.Occur firstOccur = boolDefault;
883	955	// state
884	956	TokenType tokenType;
885		~~- Query subquery = null;~~
	957	+ Query subquery = null;
886	958	boolean definedField = false;
887	959	boolean definedExplicitField = false;
888	960	Query fieldQuery = null; // the namespace term, e.g. namespace:0
889		~~- Query fieldsubquery = null; // e.g. 'all:something else' will be parsed 'something else'~~
890		-
	961	+ Query fieldsubquery = null; // e.g. 'all:something else' will be parsed
	962	+ // 'something else'
	963	+
891	964	// assume default namespace value on rewrite
892		~~- if(!returnOnFieldDef && currentField == null && needsRewrite()){~~
893		~~- fieldQuery = namespaceRewriteQuery;~~
	965	+ if (!returnOnFieldDef && currentField == null && needsRewrite()) {
	966	+ fieldQuery = namespaceRewriteQuery;
894	967	}
895		-
896		~~- mainloop: for( ; cur < queryLength; cur++ ){~~
	968	+
	969	+ mainloop: for (; cur < queryLength; cur++) {
897	970	c = text[cur];
898		-
899		~~- if(c == ' ')~~
	971	+
	972	+ if (c == ' ')
900	973	continue;
901		-
	974	+
902	975	// terms, fields
903		~~- if(Character.isLetterOrDigit(c) \|\| c=='.' \|\| c == '[' \|\| c=='*'){~~
	976	+ if (Character.isLetterOrDigit(c) \|\| c == '.' \|\| c == '['
	977	+ \|\| c == '*') {
904	978	// check for generic namespace prefixes, e.g. [0,1]:
905		~~- if(c == '['){~~
906		~~- if(fetchGenericPrefix())~~
	979	+ if (c == '[') {
	980	+ if (fetchGenericPrefix())
907	981	tokenType = TokenType.FIELD;
908	982	else
909	983	continue;
910		~~- } else // fetch next token~~
	984	+ } else
	985	+ // fetch next token
911	986	tokenType = fetchToken();
912		-
913		~~- switch(tokenType){~~
	987	+
	988	+ switch (tokenType) {
914	989	case FIELD:
915	990	// this is where the function returns if called from the
916	991	// next if (i.e. some 10 lines down)
917		~~- if(returnOnFieldDef){~~
918		~~- String newfield = new String(buffer,0,length);~~
919		~~- if(!newfield.equals("incategory") && !newfield.equals(topFieldName)){~~
920		~~- backToken(); cur--;~~
	992	+ if (returnOnFieldDef) {
	993	+ String newfield = new String(buffer, 0, length);
	994	+ if (!newfield.equals("incategory")
	995	+ && !newfield.equals(topFieldName)) {
	996	+ backToken();
	997	+ cur--;
921	998	break mainloop;
922	999	}
923	1000	}
924		~~- if(currentField == null \|\| definedExplicitField){~~
	1001	+ if (currentField == null \|\| definedExplicitField) {
925	1002	// set field name
926		~~- currentField = new String(buffer,0,length);~~
927		~~- if("intitle".equals(currentField)){~~
	1003	+ currentField = new String(buffer, 0, length);
	1004	+ if ("intitle".equals(currentField)) {
928	1005	isInTitle = true;
929	1006	isInTitleLevel = level;
930	1007	}
931		~~- if((defaultNamespaceName!=null && currentField.equals(defaultNamespaceName)) \|\| currentField.equals(defaultField)){~~
	1008	+ if ((defaultNamespaceName != null && currentField
	1009	+ .equals(defaultNamespaceName))
	1010	+ \|\| currentField.equals(defaultField)) {
932	1011	currentField = null;
933	1012	break; // repeated definition of field, ignore
934	1013	}
935	1014	definedExplicitField = true;
936		-
937		~~- fieldQuery = getNamespaceQuery(currentField); // depending on policy rewrite this field~~
938		~~- if(fieldQuery != null){~~
939		~~- // save field, we will need it to be set to null to fetch categories~~
	1015	+
	1016	+ fieldQuery = getNamespaceQuery(currentField); // depending
	1017	+ // on
	1018	+ // policy
	1019	+ // rewrite
	1020	+ // this
	1021	+ // field
	1022	+ if (fieldQuery != null) {
	1023	+ // save field, we will need it to be set to null to
	1024	+ // fetch categories
940	1025	String myfield = currentField;
941	1026	currentField = null;
942	1027	// fetch the clause until the next field
943		~~- fieldsubquery = parseClause(level+1,true,myfield);~~
	1028	+ fieldsubquery = parseClause(level + 1, true,
	1029	+ myfield);
944	1030	currentField = myfield;
945	1031	}
946		~~- } else{~~
	1032	+ } else {
947	1033	// nested field names, don't allow, just add to query
948	1034	analyzeBuffer();
949	1035	subquery = makeQueryFromTokens(occur);
950	1036	}
951	1037	break;
952	1038	case WORD:
953		~~- if(fieldQuery != null){~~
	1039	+ if (fieldQuery != null) {
954	1040	backToken();
955		~~- String myfield = (topFieldName != null)? topFieldName : (currentField !=null)? currentField : (defaultNamespaceName!=null)? defaultNamespaceName : defaultField;~~
956		~~- fieldsubquery = parseClause(level+1,true,myfield);~~
957		~~- } else{~~
	1041	+ String myfield = (topFieldName != null) ? topFieldName
	1042	+ : (currentField != null) ? currentField
	1043	+ : (defaultNamespaceName != null) ? defaultNamespaceName
	1044	+ : defaultField;
	1045	+ fieldsubquery = parseClause(level + 1, true, myfield);
	1046	+ } else {
958	1047	analyzeBuffer();
959		~~- subquery = makeQueryFromTokens(explicitOccur!=null? explicitOccur : occur);~~
	1048	+ subquery = makeQueryFromTokens(explicitOccur != null ? explicitOccur
	1049	+ : occur);
960	1050	}
961	1051	break;
962	1052	case AND:
963	1053	firstOccur = BooleanClause.Occur.MUST;
964	1054	occur = BooleanClause.Occur.MUST;
965		~~- if(returnOnFieldDef)~~
	1055	+ if (returnOnFieldDef)
966	1056	explicitOccur = BooleanClause.Occur.MUST;
967	1057	continue;
968	1058	case OR:
969	1059	firstOccur = BooleanClause.Occur.SHOULD;
970	1060	occur = BooleanClause.Occur.SHOULD;
971		~~- if(returnOnFieldDef)~~
	1061	+ if (returnOnFieldDef)
972	1062	explicitOccur = BooleanClause.Occur.SHOULD;
973	1063	continue;
974	1064	case EOF:
975		~~- break mainloop;~~
976		~~- }~~
	1065	+ break mainloop;
	1066	+ }
977	1067	}
978		-
	1068	+
979	1069	// field subquery, the fetched clause while doing rewriting
980		~~- if(fieldsubquery != null){~~
	1070	+ if (fieldsubquery != null) {
981	1071	// this not the first field definition at this level
982		~~- if(definedField){~~
	1072	+ if (definedField) {
983	1073	// embed the old query
984	1074	BooleanQuery bq = new BooleanQuery();
985		~~- bq.add(query,BooleanClause.Occur.SHOULD);~~
	1075	+ bq.add(query, BooleanClause.Occur.SHOULD);
986	1076	query = boolquery = bq;
987	1077	}
988		-
	1078	+
989	1079	BooleanQuery bq = new BooleanQuery();
990		~~- bq.add(fieldQuery,BooleanClause.Occur.MUST);~~
991		~~- bq.add(fieldsubquery,BooleanClause.Occur.MUST);~~
992		-
	1080	+ bq.add(fieldQuery, BooleanClause.Occur.MUST);
	1081	+ bq.add(fieldsubquery, BooleanClause.Occur.MUST);
	1082	+
993	1083	// add to existing queries
994		~~- if(boolquery != null)~~
995		~~- boolquery.add(bq,BooleanClause.Occur.SHOULD);~~
996		~~- else if(query != null){~~
	1084	+ if (boolquery != null)
	1085	+ boolquery.add(bq, BooleanClause.Occur.SHOULD);
	1086	+ else if (query != null) {
997	1087	boolquery = new BooleanQuery();
998		~~- boolquery.add(query,firstOccur);~~
999		~~- boolquery.add(bq,BooleanClause.Occur.SHOULD);~~
	1088	+ boolquery.add(query, firstOccur);
	1089	+ boolquery.add(bq, BooleanClause.Occur.SHOULD);
1000	1090	query = boolquery;
1001	1091	} else
1002	1092	query = bq;
1003		-
	1093	+
1004	1094	fieldQuery = null;
1005	1095	definedField = true;
1006	1096	fieldsubquery = null;
1007	1097	}
1008		-
	1098	+
1009	1099	// modifiers
1010		~~- switch(c){~~
	1100	+ switch (c) {
1011	1101	case '+':
1012	1102	occur = BooleanClause.Occur.MUST;
1013		~~- if(returnOnFieldDef)~~
1014		~~- explicitOccur = BooleanClause.Occur.MUST;~~
	1103	+ if (returnOnFieldDef)
	1104	+ explicitOccur = BooleanClause.Occur.MUST;
1015	1105	continue;
1016	1106	case '-':
1017	1107	occur = BooleanClause.Occur.MUST_NOT;
1018		~~- if(returnOnFieldDef)~~
	1108	+ if (returnOnFieldDef)
1019	1109	explicitOccur = BooleanClause.Occur.MUST_NOT;
1020	1110	continue;
1021	1111	case '"':
—	—	@@ -1023,12 +1113,12 @@
1024	1114	break;
1025	1115	case '(':
1026	1116	cur++;
1027		~~- subquery = parseClause(level+1);~~
	1117	+ subquery = parseClause(level + 1);
1028	1118	break;
1029	1119	case ')':
1030		~~- if(level > 0){~~
	1120	+ if (level > 0) {
1031	1121	// get out of titles on appropriate level of parenthesis
1032		~~- if(isInTitle && level <= isInTitleLevel)~~
	1122	+ if (isInTitle && level <= isInTitleLevel)
1033	1123	isInTitle = false;
1034	1124	break mainloop;
1035	1125	}
—	—	@@ -1036,23 +1126,22 @@
1037	1127	}
1038	1128
1039	1129	// if we fetched some tokens or a subquery add it to main query
1040		~~- if(subquery != null){~~
1041		~~- if(query == null){~~
	1130	+ if (subquery != null) {
	1131	+ if (query == null) {
1042	1132	query = subquery;
1043	1133	firstOccur = occur; // save the boolean modifier
1044	1134	occur = boolDefault; // return to default
1045		~~- }~~
1046		~~- else{~~
1047		~~- if(explicitOccur != null)~~
	1135	+ } else {
	1136	+ if (explicitOccur != null)
1048	1137	occur = explicitOccur;
1049		~~- if(boolquery == null){~~
	1138	+ if (boolquery == null) {
1050	1139	// we have found the second term, make boolean query
1051	1140	boolquery = new BooleanQuery();
1052		~~- boolquery.add(query,firstOccur);~~
1053		~~- boolquery.add(subquery,occur);~~
	1141	+ boolquery.add(query, firstOccur);
	1142	+ boolquery.add(subquery, occur);
1054	1143	query = boolquery;
1055		~~- } else{~~
1056		~~- boolquery.add(subquery,occur);~~
	1144	+ } else {
	1145	+ boolquery.add(subquery, occur);
1057	1146	}
1058	1147	occur = boolDefault; // return to default
1059	1148	explicitOccur = null;
—	—	@@ -1060,193 +1149,199 @@
1061	1150	subquery = null;
1062	1151	}
1063	1152	}
1064		-
1065		~~- if(definedExplicitField)~~
	1153	+
	1154	+ if (definedExplicitField)
1066	1155	currentField = null;
1067	1156	return query;
1068	1157	}
1069		-
1070		- /**
	1158	+
	1159	+ /**
1071	1160	* return true if buffer is wildcard
1072		~~- * the only allowed patterns are q and q and not other combinations like q or q*r~~
1073		- *
	1161	+ * the only allowed patterns are q and q and not other combinations like
	1162	+ * q or q*r
1074	1163	*/
1075		~~- private boolean bufferIsWildCard(){~~
1076		~~- if(length < 2)~~
	1164	+ private boolean bufferIsWildCard() {
	1165	+ if (length < 2)
1077	1166	return false;
1078	1167	boolean wild = false;
1079	1168	int index = -1;
1080	1169	// only allow '*' at begin and end
1081		~~- if(buffer[0] == '*'){~~
	1170	+ if (buffer[0] == '*') {
1082	1171	index = 0;
1083	1172	wild = true;
1084		~~- } else if( buffer[length-1] == '*' ){~~
1085		~~- index = length-1;~~
	1173	+ } else if (buffer[length - 1] == '*') {
	1174	+ index = length - 1;
1086	1175	wild = true;
1087	1176	}
1088	1177
1089	1178	// check if it's a valid wildcard
1090		~~- if(wild){~~
	1179	+ if (wild) {
1091	1180	// check if this is the only asterix
1092		~~- for(int i=0;i<length;i++){~~
1093		~~- if( i!= index && buffer[i] == '*'){~~
	1181	+ for (int i = 0; i < length; i++) {
	1182	+ if (i != index && buffer[i] == '*') {
1094	1183	return false; // more than one '*'
1095	1184	}
1096	1185	}
1097		-
	1186	+
1098	1187	// require at least one letter besides the wildcard sign
1099		~~- for(int i=0;i<length;i++){~~
1100		~~- if(Character.isLetterOrDigit(buffer[i]))~~
	1188	+ for (int i = 0; i < length; i++) {
	1189	+ if (Character.isLetterOrDigit(buffer[i]))
1101	1190	return true; // found it!
1102	1191	}
1103	1192	}
1104	1193	return false;
1105	1194	}
1106		-
1107		~~- private boolean bufferIsFuzzy(){~~
1108		~~- return length>1 && (buffer[0]=='~' \|\| buffer[length-1]=='~');~~
	1195	+
	1196	+ private boolean bufferIsFuzzy() {
	1197	+ return length > 1 && (buffer[0] == '~' \|\| buffer[length - 1] == '~');
1109	1198	}
1110		-
1111		~~- private boolean bufferContains(char c){~~
1112		~~- for(int i=0;i<length;i++){~~
1113		~~- if(buffer[i] == c)~~
	1199	+
	1200	+ private boolean bufferContains(char c) {
	1201	+ for (int i = 0; i < length; i++) {
	1202	+ if (buffer[i] == c)
1114	1203	return true;
1115	1204	}
1116	1205	return false;
1117	1206	}
1118		-
1119		~~- private void addToWords(Term t){~~
1120		~~- addToWords(t,1,ExpandedType.WORD);~~
	1207	+
	1208	+ private void addToWords(Term t) {
	1209	+ addToWords(t, 1, ExpandedType.WORD);
1121	1210	}
1122		~~- private void addToWords(Term t, float boost, ExpandedType type){~~
1123		~~- parsedWords.add(t.text(),t.text(),boost,type);~~
	1211	+
	1212	+ private void addToWords(Term t, float boost, ExpandedType type) {
	1213	+ parsedWords.add(t.text(), t.text(), boost, type);
1124	1214	}
1125		-
1126		~~- private void addToWordsAsAlias(Token t){~~
	1215	+
	1216	+ private void addToWordsAsAlias(Token t) {
1127	1217	float boost = STEM_WORD_BOOST;
1128		~~- if(t.type().equals("singular"))~~
	1218	+ if (t.type().equals("singular"))
1129	1219	boost = SINGULAR_WORD_BOOST;
1130		~~- parsedWords.last().add(new WordBoost(t.termText(),boost));~~
	1220	+ parsedWords.last().add(new WordBoost(t.termText(), boost));
1131	1221	}
1132		-
1133		- /**
	1222	+
	1223	+ /**
1134	1224	* Constructs either a termquery or a boolean query depending on
1135	1225	* analysis of the fetched token. A single "word" might be analyzed
1136		~~- * into many tokens, and some of them might be aliases~~
	1226	+ * into many tokens, and some of them might be aliases
	1227	+ *
1137	1228	* @return
1138	1229	*/
1139		~~- private Query makeQueryFromTokens(BooleanClause.Occur toplevelOccur){~~
	1230	+ private Query makeQueryFromTokens(BooleanClause.Occur toplevelOccur) {
1140	1231	BooleanQuery bq = null;
1141	1232	TermQuery t;
1142	1233	boolean addAliases = true;
1143		-
	1234	+
1144	1235	// check for urls
1145		~~- Matcher urlMatcher = urlPattern.matcher(new String(buffer,0,length));~~
1146		~~- while(bufferContains('.') && urlMatcher.find()){~~
	1236	+ Matcher urlMatcher = urlPattern.matcher(new String(buffer, 0, length));
	1237	+ while (bufferContains('.') && urlMatcher.find()) {
1147	1238	ArrayList<Token> urlTokens = analyzeString(urlMatcher.group());
1148	1239	ArrayList<Term> urlTerms = new ArrayList<Term>();
1149		~~- for(Token tt : urlTokens)~~
	1240	+ for (Token tt : urlTokens)
1150	1241	urlTerms.add(makeTerm(tt.termText()));
1151		~~- urls.add(urlTerms);~~
	1242	+ urls.add(urlTerms);
1152	1243	}
1153		-
	1244	+
1154	1245	// categories should not be analyzed
1155		~~- if(currentField != null && currentField.equals("incategory")){~~
	1246	+ if (currentField != null && currentField.equals("incategory")) {
1156	1247	return new TermQuery(makeTerm());
1157	1248	}
1158		-
1159		~~- // check for wildcard seaches, they are also not analyzed/stemmed, only for titles~~
1160		~~- // wildcard signs are allowed only at the end of the word, minimum one letter word~~
1161		~~- if(length>1 && wildcards != null && bufferIsWildCard()){~~
1162		~~- Term term = makeTerm();~~
1163		~~- Query ret = wildcards.makeQuery(term.text(),term.field());~~
1164		~~- if(ret != null){~~
	1249	+
	1250	+ // check for wildcard seaches, they are also not analyzed/stemmed, only
	1251	+ // for titles
	1252	+ // wildcard signs are allowed only at the end of the word, minimum one
	1253	+ // letter word
	1254	+ if (length > 1 && wildcards != null && bufferIsWildCard()) {
	1255	+ Term term = makeTerm();
	1256	+ Query ret = wildcards.makeQuery(term.text(), term.field());
	1257	+ if (ret != null) {
1165	1258	ArrayList<String> words = wildcards.getWords(term.text());
1166		~~- parsedWords.add(term.text(),words,1,ExpandedType.WILDCARD);~~
	1259	+ parsedWords.add(term.text(), words, 1, ExpandedType.WILDCARD);
1167	1260	ret.setBoost(WILDCARD_BOOST);
1168	1261	return ret;
1169		~~- } else{~~
	1262	+ } else {
1170	1263	// something is wrong, try making normal query
1171	1264	addToWords(term);
1172	1265	return new TermQuery(term);
1173	1266	}
1174	1267	}
1175	1268	// parse fuzzy queries
1176		~~- if(length>1 && fuzzy != null && bufferIsFuzzy()){~~
	1269	+ if (length > 1 && fuzzy != null && bufferIsFuzzy()) {
1177	1270	Term term = makeTerm();
1178		~~- String termText = term.text().replaceAll("~","");~~
	1271	+ String termText = term.text().replaceAll("~", "");
1179	1272	NamespaceFilter nsf = getNamespaceFilter(currentField);
1180		~~- Query ret = fuzzy.makeQuery(termText,term.field(),nsf);~~
1181		~~- if(ret != null){~~
1182		~~- ArrayList<String> words = fuzzy.getWords(termText,nsf);~~
1183		~~- parsedWords.add(term.text(),words,fuzzy.getBoosts(termText,nsf,words),ExpandedType.FUZZY);~~
	1273	+ Query ret = fuzzy.makeQuery(termText, term.field(), nsf);
	1274	+ if (ret != null) {
	1275	+ ArrayList<String> words = fuzzy.getWords(termText, nsf);
	1276	+ parsedWords.add(term.text(), words,
	1277	+ fuzzy.getBoosts(termText, nsf, words),
	1278	+ ExpandedType.FUZZY);
1184	1279	ret.setBoost(FUZZY_BOOST);
1185	1280	return ret;
1186	1281	}
1187	1282	}
1188		-
1189		~~- if(toplevelOccur == BooleanClause.Occur.MUST_NOT)~~
	1283	+
	1284	+ if (toplevelOccur == BooleanClause.Occur.MUST_NOT)
1190	1285	addAliases = false;
1191	1286
1192		~~- if(tokens.size() == 1){~~
	1287	+ if (tokens.size() == 1) {
1193	1288	t = new TermQuery(makeTerm(tokens.get(0)));
1194	1289	t.setBoost(defaultBoost);
1195		~~- if(toplevelOccur != Occur.MUST_NOT)~~
	1290	+ if (toplevelOccur != Occur.MUST_NOT)
1196	1291	addToWords(t.getTerm());
1197	1292	return t;
1198		~~- } else{~~
	1293	+ } else {
1199	1294	// make a nested boolean query
1200	1295	ArrayList<BooleanQuery> queries = new ArrayList<BooleanQuery>();
1201	1296	ArrayList<Token> aliases = new ArrayList<Token>();
1202		~~- for(int i=0; i<tokens.size(); i++){~~
	1297	+ for (int i = 0; i < tokens.size(); i++) {
1203	1298	BooleanQuery query = new BooleanQuery();
1204	1299	// main token
1205	1300	Token token = tokens.get(i);
1206	1301	t = new TermQuery(makeTerm(token));
1207	1302	t.setBoost(defaultBoost);
1208		~~- if(toplevelOccur != Occur.MUST_NOT)~~
	1303	+ if (toplevelOccur != Occur.MUST_NOT)
1209	1304	addToWords(t.getTerm());
1210		~~- query.add(t,Occur.SHOULD);~~
	1305	+ query.add(t, Occur.SHOULD);
1211	1306	// group aliases together
1212	1307	aliases.clear();
1213		~~- for(int j=i+1;j<tokens.size();j++){~~
1214		~~- if(tokens.get(j).getPositionIncrement() == 0){~~
	1308	+ for (int j = i + 1; j < tokens.size(); j++) {
	1309	+ if (tokens.get(j).getPositionIncrement() == 0) {
1215	1310	aliases.add(tokens.get(j));
1216	1311	i = j;
1217	1312	} else
1218	1313	break;
1219		~~- }~~
1220		~~- if(addAliases){~~
1221		~~- for(Token alias : aliases){~~
	1314	+ }
	1315	+ if (addAliases) {
	1316	+ for (Token alias : aliases) {
1222	1317	t = new TermQuery(makeTerm(alias));
1223		~~- t.setBoost(defaultAliasBoost*defaultBoost);~~
1224		~~- query.add(t,Occur.SHOULD);~~
	1318	+ t.setBoost(defaultAliasBoost * defaultBoost);
	1319	+ query.add(t, Occur.SHOULD);
1225	1320	addToWordsAsAlias(alias);
1226	1321	}
1227	1322	}
1228	1323	queries.add(query);
1229	1324	}
1230	1325	// don't returned nested if one query only
1231		~~- if(queries.size() == 1){~~
1232		~~- BooleanQuery q = (BooleanQuery)queries.get(0);~~
	1326	+ if (queries.size() == 1) {
	1327	+ BooleanQuery q = (BooleanQuery) queries.get(0);
1233	1328	// one nested clause
1234		~~- if(q.getClauses().length == 1)~~
	1329	+ if (q.getClauses().length == 1)
1235	1330	return q.getClauses()[0].getQuery();
1236	1331	return queries.get(0);
1237	1332	}
1238	1333	// multiple tokens, e.g. super-hero -> +super +hero
1239	1334	bq = new BooleanQuery();
1240		~~- for(BooleanQuery q : queries){~~
1241		~~- if(q.getClauses().length == 1)~~
1242		~~- bq.add(q.getClauses()[0].getQuery(),boolDefault);~~
	1335	+ for (BooleanQuery q : queries) {
	1336	+ if (q.getClauses().length == 1)
	1337	+ bq.add(q.getClauses()[0].getQuery(), boolDefault);
1243	1338	else
1244		~~- bq.add(q,boolDefault);~~
	1339	+ bq.add(q, boolDefault);
1245	1340	}
1246	1341	return bq;
1247		-
	1342	+
1248	1343	}
1249	1344	}
1250		-
	1345	+
1251	1346	/**
1252	1347	* Extract prefix: field from the query and put it into prefixFilter
1253	1348	* variable for later retrieval
—	—	@@ -1254,94 +1349,101 @@
1255	1350	* @param queryText
1256	1351	* @return queryText with prefix part deleted
1257	1352	*/
1258		~~- public String extractPrefixFilter(String queryText){~~
	1353	+ public String extractPrefixFilter(String queryText) {
1259	1354	this.prefixFilters = null;
1260		~~- ArrayList<String> filters = new ArrayList<String>();~~
	1355	+ ArrayList<String> filters = new ArrayList<String>();
1261	1356	int start = 0;
1262		~~- while(start < queryText.length()){~~
1263		~~- int end = indexOf(queryText,'"',start); // begin of phrase~~
1264		~~- int inx = queryText.indexOf("prefix:");~~
1265		~~- if(inx >=0 && inx < end){~~
1266		~~- String[] prefixes = queryText.substring(inx+"prefix:".length()).split("\\\|");~~
	1357	+ while (start < queryText.length()) {
	1358	+ int end = indexOf(queryText, '"', start); // begin of phrase
	1359	+ int inx = queryText.indexOf("prefix:");
	1360	+ if (inx >= 0 && inx < end) {
	1361	+ String[] prefixes = queryText.substring(
	1362	+ inx + "prefix:".length()).split("\\\|");
1267	1363
1268		~~- for(String prefix : prefixes){~~
	1364	+ for (String prefix : prefixes) {
1269	1365	String full = null;
1270		~~- if(prefix.startsWith("[") && prefix.contains("]:")){~~
	1366	+ if (prefix.startsWith("[") && prefix.contains("]:")) {
1271	1367	// convert from [2]:query to 2:query form
1272		~~- full = prefix.replace("[","").replace("]:",":");~~
1273		~~- } else // default to main namespace~~
1274		~~- full = "0:"+prefix ;~~
1275		-
	1368	+ full = prefix.replace("[", "").replace("]:", ":");
	1369	+ } else
	1370	+ // default to main namespace
	1371	+ full = "0:" + prefix;
	1372	+
1276	1373	// add lowercase nonempty prefixes
1277		~~- if(full != null && full.length()>0)~~
	1374	+ if (full != null && full.length() > 0)
1278	1375	filters.add(full.toLowerCase());
1279		-
	1376	+
1280	1377	}
1281		~~- this.prefixFilters = filters.toArray(new String[]{});~~
	1378	+ this.prefixFilters = filters.toArray(new String[] {});
1282	1379	// return the actual query without prefix
1283		~~- return queryText.substring(0,inx);~~
	1380	+ return queryText.substring(0, inx);
1284	1381	}
1285		~~- start = end+1;~~
1286		~~- if(start < queryText.length()){~~
	1382	+ start = end + 1;
	1383	+ if (start < queryText.length()) {
1287	1384	// skip phrase
1288		~~- start = indexOf(queryText,'"',start) + 1;~~
	1385	+ start = indexOf(queryText, '"', start) + 1;
1289	1386	}
1290	1387	}
1291		-
	1388	+
1292	1389	return queryText;
1293	1390	}
1294		-
	1391	+
1295	1392	/**
1296	1393	* Extract prefix: field from the query and put it into prefixFilter
1297	1394	* variable for later retrieval
1298	1395	*
1299	1396	* @param queryText
1300		~~- * @param field (like "ondiscussionthread:")~~
	1397	+ * @param field
	1398	+ * (like "ondiscussionthread:")
1301	1399	* @return [0] - queryText with field part deleted
1302	1400	* [1] - the field part
1303	1401	*/
1304		~~- public static String[] extractRawField(String queryText, String field){~~
1305		~~- ArrayList<String> filters = new ArrayList<String>();~~
	1402	+ public static String[] extractRawField(String queryText, String field) {
	1403	+ ArrayList<String> filters = new ArrayList<String>();
1306	1404	int start = 0;
1307		~~- while(start < queryText.length()){~~
1308		~~- int end = indexOf(queryText,'"',start); // begin of phrase~~
1309		~~- int inx = queryText.indexOf(field);~~
1310		~~- if(inx >=0 && inx < end){~~
1311		~~- String prefix = queryText.substring(inx+field.length());~~
	1405	+ while (start < queryText.length()) {
	1406	+ int end = indexOf(queryText, '"', start); // begin of phrase
	1407	+ int inx = queryText.indexOf(field);
	1408	+ if (inx >= 0 && inx < end) {
	1409	+ String prefix = queryText.substring(inx + field.length());
1312	1410
1313	1411	String full = null;
1314		~~- if(prefix.startsWith("[") && prefix.contains("]:")){~~
	1412	+ if (prefix.startsWith("[") && prefix.contains("]:")) {
1315	1413	// convert from [2]:query to 2:query form
1316		~~- full = prefix.replace("[","").replace("]:",":");~~
1317		~~- } else // default to main namespace~~
1318		~~- full = "0:"+prefix ;~~
1319		-
	1414	+ full = prefix.replace("[", "").replace("]:", ":");
	1415	+ } else
	1416	+ // default to main namespace
	1417	+ full = "0:" + prefix;
	1418	+
1320	1419	// add lowercase nonempty prefixes
1321		~~- if(full != null && full.length()>0)~~
	1420	+ if (full != null && full.length() > 0)
1322	1421	filters.add(full);
1323		-
1324		~~- return new String[]{ queryText.substring(0,inx), full };~~
1325		-
	1422	+
	1423	+ return new String[] { queryText.substring(0, inx), full };
	1424	+
1326	1425	}
1327		~~- start = end+1;~~
1328		~~- if(start < queryText.length()){~~
	1426	+ start = end + 1;
	1427	+ if (start < queryText.length()) {
1329	1428	// skip phrase
1330		~~- start = indexOf(queryText,'"',start) + 1;~~
	1429	+ start = indexOf(queryText, '"', start) + 1;
1331	1430	}
1332	1431	}
1333		-
1334		~~- return new String[]{ queryText, null };~~
	1432	+
	1433	+ return new String[] { queryText, null };
1335	1434	}
1336		-
1337		~~- /** Like string.indexOf but return end of string instead of -1 when needle is not found */~~
1338		~~- protected static int indexOf(String string, char needle, int start){~~
1339		~~- int inx = string.indexOf(needle,start);~~
1340		~~- if(inx == -1)~~
	1435	+
	1436	+ /**
	1437	+ * Like string.indexOf but return end of string instead of -1 when needle is
	1438	+ * not found
	1439	+ */
	1440	+ protected static int indexOf(String string, char needle, int start) {
	1441	+ int inx = string.indexOf(needle, start);
	1442	+ if (inx == -1)
1341	1443	return string.length();
1342	1444	else
1343	1445	return inx;
1344	1446	}
1345		-
	1447	+
1346	1448	public boolean isDisableTitleAliases() {
1347	1449	return disableTitleAliases;
1348	1450	}
—	—	@@ -1351,183 +1453,207 @@
1352	1454	}
1353	1455
1354	1456	/** Reset the parser state */
1355		~~- private void reset(){~~
1356		~~- cur = 0;~~
	1457	+ private void reset() {
	1458	+ cur = 0;
1357	1459	length = 0;
1358		~~- currentField = null;~~
	1460	+ currentField = null;
1359	1461	prev_cur = 0;
1360	1462	explicitOccur = null;
1361	1463	parsedWords = new ParsedWords();
1362	1464	urls = new ArrayList<ArrayList<Term>>();
1363	1465	isInTitle = false;
1364	1466	}
1365		-
1366		~~- /** Init parsing, call this function to parse text */~~
1367		~~- private Query startParsing(){~~
1368		~~- reset();~~
	1467	+
	1468	+ /** Init parsing, call this function to parse text */
	1469	+ private Query startParsing() {
	1470	+ reset();
1369	1471	return parseClause(0);
1370	1472	}
1371		-
1372		- /**
	1473	+
	1474	+ /**
1373	1475	* Simple parse on one default field, no rewrites.
1374	1476	*
1375	1477	* @param queryText
1376	1478	* @return
1377	1479	*/
1378		~~- public Query parseRaw(String queryText){~~
	1480	+ public Query parseRaw(String queryText) {
1379	1481	queryText = extractPrefixFilter(queryText);
1380		~~- if(queryText.trim().length()==0 && hasPrefixFilters())~~
	1482	+ if (queryText.trim().length() == 0 && hasPrefixFilters())
1381	1483	return new MatchAllTitlesQuery(fields.title());
1382		~~- queryLength = queryText.length();~~
	1484	+ queryLength = queryText.length();
1383	1485	text = queryText.toCharArray();
1384		-
	1486	+
1385	1487	Query query = null;
1386	1488	query = startParsing();
1387		-
1388		~~- return query;~~
	1489	+
	1490	+ return query;
1389	1491	}
1390	1492
1391	1493	/* ======================= FULL-QUERY PARSING ========================= */
1392		-
	1494	+
1393	1495	public static class ParsingOptions {
1394	1496	/** use a custom namespace-transformation policy */
1395	1497	NamespacePolicy policy = null;
1396		~~- /** only parse the main query (on contents and title) without relevance stuff */~~
	1498	+ /**
	1499	+ * only parse the main query (on contents and title) without relevance
	1500	+ * stuff
	1501	+ */
1397	1502	boolean coreQueryOnly = false;
1398	1503	/** interface to fetch wildcard hits */
1399	1504	Wildcards wildcards = null;
1400	1505	/** fuzzy queries interface */
1401	1506	Fuzzy fuzzy = null;
1402		-
1403		~~- public ParsingOptions() {}~~
1404		~~- public ParsingOptions(NamespacePolicy policy){~~
	1507	+
	1508	+ public ParsingOptions() {
	1509	+ }
	1510	+
	1511	+ public ParsingOptions(NamespacePolicy policy) {
1405	1512	this.policy = policy;
1406	1513	}
1407		~~- public ParsingOptions(boolean coreQueryOnly){~~
	1514	+
	1515	+ public ParsingOptions(boolean coreQueryOnly) {
1408	1516	this.coreQueryOnly = coreQueryOnly;
1409	1517	}
1410		~~- public ParsingOptions(Wildcards wildcards){~~
	1518	+
	1519	+ public ParsingOptions(Wildcards wildcards) {
1411	1520	this.wildcards = wildcards;
1412	1521	}
1413		~~- public ParsingOptions(NamespacePolicy policy, Wildcards wildcards, Fuzzy fuzzy){~~
	1522	+
	1523	+ public ParsingOptions(NamespacePolicy policy, Wildcards wildcards,
	1524	+ Fuzzy fuzzy) {
1414	1525	this.policy = policy;
1415	1526	this.wildcards = wildcards;
1416	1527	this.fuzzy = fuzzy;
1417	1528	}
1418	1529	}
1419		-
	1530	+
1420	1531	/** Parse a full query with default options */
1421		~~- public Query parse(String queryText){~~
1422		~~- return parse(queryText,new ParsingOptions());~~
	1532	+ public Query parse(String queryText) {
	1533	+ return parse(queryText, new ParsingOptions());
1423	1534	}
1424		-
	1535	+
1425	1536	/**
1426	1537	* Construct a full query on all the fields in the index from search text
1427		- *
1428	1538	*/
1429	1539	@SuppressWarnings("unchecked")
1430		~~- public Query parse(String queryText, ParsingOptions options){~~
	1540	+ public Query parse(String queryText, ParsingOptions options) {
1431	1541	this.wildcards = options.wildcards;
1432	1542	this.fuzzy = options.fuzzy;
1433	1543	queryText = quoteCJK(queryText);
1434	1544	NamespacePolicy defaultPolicy = this.namespacePolicy;
1435		~~- if(options.policy != null)~~
1436		~~- this.namespacePolicy = options.policy;~~
	1545	+ if (options.policy != null)
	1546	+ this.namespacePolicy = options.policy;
1437	1547	defaultBoost = CONTENTS_BOOST;
1438	1548	defaultAliasBoost = ALIAS_BOOST;
1439		-
1440		~~- this.rawFields = new HashMap<String,String>();~~
	1549	+
	1550	+ this.rawFields = new HashMap<String, String>();
1441	1551	// parse out raw queries
1442		~~- for(String field : new String[] {"ondiscussionpage:"}){~~
	1552	+ for (String field : new String[] { "ondiscussionpage:" }) {
1443	1553	String[] ret = extractRawField(queryText, field);
1444	1554	queryText = ret[0];
1445		~~- if( ret[1] != null )~~
1446		~~- this.rawFields.put(field,ret[1]);~~
	1555	+ if (ret[1] != null)
	1556	+ this.rawFields.put(field, ret[1]);
1447	1557	}
1448		-
1449		-
1450		~~- Query qc = parseRaw(queryText);~~
	1558	+
	1559	+ Query qc = parseRaw(queryText);
1451	1560	ParsedWords words = parsedWords;
1452	1561	this.namespacePolicy = defaultPolicy;
1453		~~- if(qc == null) // empty~~
	1562	+ if (qc == null) // empty
1454	1563	return null;
1455		-
1456		~~- highlightTerms = extractHighlightTerms(qc);~~
1457		-
1458		~~- if(options.coreQueryOnly \|\| words.words.size()==0)~~
	1564	+
	1565	+ highlightTerms = extractHighlightTerms(qc);
	1566	+
	1567	+ if (options.coreQueryOnly \|\| words.words.size() == 0)
1459	1568	return qc;
1460		-
	1569	+
1461	1570	ParsedWords nostopWords = filterStopWords(words);
1462		-
	1571	+
1463	1572	// main phrase combined with relevance meatrics
1464		~~- Query mainPhrase = makeMainPhraseWithRelevance(words,nostopWords);~~
1465		~~- if(mainPhrase == null)~~
	1573	+ Query mainPhrase = makeMainPhraseWithRelevance(words, nostopWords);
	1574	+ if (mainPhrase == null)
1466	1575	return qc;
1467	1576
1468	1577	// additional queries
1469		~~- //Query related = new LogTransformScore(makeRelatedRelevance(words,ADD_RELATED_BOOST));~~
	1578	+ // Query related = new
	1579	+ // LogTransformScore(makeRelatedRelevance(words,ADD_RELATED_BOOST));
1470	1580	// Query related = makeRelatedRelevance(words,ADD_RELATED_BOOST);
1471		-
	1581	+
1472	1582	// mainphrase + related
1473		~~- /* BooleanQuery additional = new BooleanQuery(true);~~
1474		~~- additional.add(mainPhrase,Occur.MUST);~~
1475		~~- if(related != null)~~
1476		~~- additional.add(related,Occur.SHOULD); */~~
1477		-
1478		~~- /* BooleanQuery full = new BooleanQuery(true);~~
1479		~~- full.add(bq,Occur.MUST);~~
1480		~~- full.add(additional,Occur.SHOULD); */~~
1481		-
	1583	+ /*
	1584	+ * BooleanQuery additional = new BooleanQuery(true);
	1585	+ * additional.add(mainPhrase,Occur.MUST);
	1586	+ * if(related != null)
	1587	+ * additional.add(related,Occur.SHOULD);
	1588	+ */
	1589	+
	1590	+ /*
	1591	+ * BooleanQuery full = new BooleanQuery(true);
	1592	+ * full.add(bq,Occur.MUST);
	1593	+ * full.add(additional,Occur.SHOULD);
	1594	+ */
	1595	+
1482	1596	// redirect match (when redirect is not contained in contents or title)
1483		~~- Query redirectMatch = makeAlttitleForRedirectsMulti(makeFirstAndSingular(words),20,1f);~~
1484		-
	1597	+ Query redirectMatch = makeAlttitleForRedirectsMulti(
	1598	+ makeFirstAndSingular(words), 20, 1f);
	1599	+
1485	1600	BooleanQuery full = new BooleanQuery(true);
1486	1601	full.add(qc, Occur.MUST);
1487		~~- if(mainPhrase != null)~~
	1602	+ if (mainPhrase != null)
1488	1603	full.add(mainPhrase, Occur.SHOULD);
1489		~~- if(redirectMatch != null)~~
	1604	+ if (redirectMatch != null)
1490	1605	full.add(redirectMatch, Occur.SHOULD);
1491		-
	1606	+
1492	1607	// add raw fields as global constrains
1493		~~- for(Entry<String,String> e : rawFields.entrySet()){~~
	1608	+ for (Entry<String, String> e : rawFields.entrySet()) {
1494	1609	String field = e.getKey();
1495		~~- if(field.endsWith(":"))~~
1496		~~- field = field.substring(0, field.length()-1);~~
1497		~~- // find target field in the index, e.g. ondiscussionpage -> ThreadPage~~
	1610	+ if (field.endsWith(":"))
	1611	+ field = field.substring(0, field.length() - 1);
	1612	+ // find target field in the index, e.g. ondiscussionpage ->
	1613	+ // ThreadPage
1498	1614	String targetField = keywordFieldMapping.get(field);
1499		~~- if( targetField != null)~~
1500		~~- full.add(new TermQuery(new Term(targetField, e.getValue())),Occur.MUST);~~
	1615	+ if (targetField != null)
	1616	+ full.add(new TermQuery(new Term(targetField, e.getValue())),
	1617	+ Occur.MUST);
1501	1618	}
1502		-
1503		~~- // init global scaling of articles~~
	1619	+
	1620	+ // init global scaling of articles
1504	1621	ArticleScaling scale = new ArticleScaling.None();
1505	1622	// based on age
1506	1623	AgeScaling age = iid.getAgeScaling();
1507		~~- if(age != AgeScaling.NONE){~~
1508		~~- switch(age){~~
1509		~~- case STRONG: scale = new ArticleScaling.StepScale(0.3f,1); break;~~
1510		~~- case MEDIUM: scale = new ArticleScaling.StepScale(0.6f,1); break;~~
1511		~~- case WEAK: scale = new ArticleScaling.StepScale(0.9f,1); break;~~
1512		~~- default: throw new RuntimeException("Unsupported age scaling "+age);~~
1513		~~- }~~
1514		-
	1624	+ if (age != AgeScaling.NONE) {
	1625	+ switch (age) {
	1626	+ case STRONG:
	1627	+ scale = new ArticleScaling.StepScale(0.3f, 1);
	1628	+ break;
	1629	+ case MEDIUM:
	1630	+ scale = new ArticleScaling.StepScale(0.6f, 1);
	1631	+ break;
	1632	+ case WEAK:
	1633	+ scale = new ArticleScaling.StepScale(0.9f, 1);
	1634	+ break;
	1635	+ default:
	1636	+ throw new RuntimeException("Unsupported age scaling " + age);
	1637	+ }
	1638	+
1515	1639	}
1516		-
	1640	+
1517	1641	// additional rank
1518		~~- AggregateInfo rank = iid.useAdditionalRank()? new AggregateInfoImpl() : null;~~
	1642	+ AggregateInfo rank = iid.useAdditionalRank() ? new AggregateInfoImpl()
	1643	+ : null;
1519	1644	ArticleNamespaceScaling nsScale = iid.getNamespaceScaling();
1520		~~- return new ArticleQueryWrap(full,new ArticleInfoImpl(),scale,rank,nsScale);~~
1521		-
	1645	+ return new ArticleQueryWrap(full, new ArticleInfoImpl(), scale, rank,
	1646	+ nsScale);
	1647	+
1522	1648	}
1523		-
	1649	+
1524	1650	/** Return terms that should be highlighted in snippets */
1525	1651	private Term[] extractHighlightTerms(Query query) {
1526	1652	HashSet<Term> terms = new HashSet<Term>();
1527	1653	query.extractTerms(terms);
1528		-
	1654	+
1529	1655	// substract forbidden terms
1530	1656	BooleanQuery forbidden = extractForbidden(query);
1531		~~- if(forbidden != null){~~
	1657	+ if (forbidden != null) {
1532	1658	HashSet<Term> forbiddenTerms = new HashSet<Term>();
1533	1659	forbidden.extractTerms(forbiddenTerms);
1534	1660	terms.removeAll(forbiddenTerms);
—	—	@@ -1536,18 +1662,19 @@
1537	1663	}
1538	1664
1539	1665	/** Generate singular parsed words coupled with first() words */
1540		~~- private ParsedWords makeFirstAndSingular(ParsedWords words){~~
	1666	+ private ParsedWords makeFirstAndSingular(ParsedWords words) {
1541	1667	ParsedWords ret = words.cloneFirstWithWildcards();
1542		~~- if(filters.hasSingular()){~~
	1668	+ if (filters.hasSingular()) {
1543	1669	Singular singular = filters.getSingular();
1544	1670	// generate singular forms if any
1545		~~- for(WordsDesc wd : ret.words){~~
1546		~~- if(wd.isWildcardOrFuzzy())~~
	1671	+ for (WordsDesc wd : ret.words) {
	1672	+ if (wd.isWildcardOrFuzzy())
1547	1673	continue;
1548	1674	String w = wd.first();
1549	1675	String sw = singular.getSingular(w);
1550		~~- if( sw!=null && !w.equals(sw) ){~~
1551		~~- wd.add( new WordBoost( sw, wd.firstWordBoost().boost * SINGULAR_WORD_BOOST ) );~~
	1676	+ if (sw != null && !w.equals(sw)) {
	1677	+ wd.add(new WordBoost(sw, wd.firstWordBoost().boost
	1678	+ * SINGULAR_WORD_BOOST));
1552	1679	}
1553	1680	}
1554	1681	}
—	—	@@ -1556,7 +1683,7 @@
1557	1684
1558	1685	private ArrayList<String> cleanupWords(ArrayList<String> words) {
1559	1686	ArrayList<String> ret = new ArrayList<String>();
1560		~~- for(String w : words){~~
	1687	+ for (String w : words) {
1561	1688	ret.add(FastWikiTokenizerEngine.clearTrailing(w));
1562	1689	}
1563	1690	return ret;
—	—	@@ -1564,85 +1691,89 @@
1565	1692
1566	1693	/** Recursively transverse queries and put stop words to SHOULD */
1567	1694	private void filterStopWords(BooleanQuery bq) {
1568		~~- if(stopWords==null && stopWords.size()==0)~~
	1695	+ if (stopWords == null && stopWords.size() == 0)
1569	1696	return;
1570		~~- for(BooleanClause cl : bq.getClauses()){~~
	1697	+ for (BooleanClause cl : bq.getClauses()) {
1571	1698	Query q = cl.getQuery();
1572	1699	Occur o = cl.getOccur();
1573		~~- if(q instanceof BooleanQuery){~~
1574		~~- filterStopWords((BooleanQuery)q);~~
1575		~~- } else if(q instanceof TermQuery && o.equals(Occur.MUST)~~
1576		~~- && stopWords.contains(((TermQuery)q).getTerm().text())){~~
	1700	+ if (q instanceof BooleanQuery) {
	1701	+ filterStopWords((BooleanQuery) q);
	1702	+ } else if (q instanceof TermQuery && o.equals(Occur.MUST)
	1703	+ && stopWords.contains(((TermQuery) q).getTerm().text())) {
1577	1704	cl.setOccur(Occur.SHOULD);
1578	1705	}
1579	1706	}
1580	1707	}
1581		-
	1708	+
1582	1709	/** @return new ParsedWords with stop words deleted */
1583		~~- private ParsedWords filterStopWords(ParsedWords words){~~
	1710	+ private ParsedWords filterStopWords(ParsedWords words) {
1584	1711	// if all stop words, don't filter
1585	1712	boolean allStop = true;
1586		~~- for(WordsDesc d : words.words){~~
1587		~~- if(!stopWords.contains(d.first())){~~
	1713	+ for (WordsDesc d : words.words) {
	1714	+ if (!stopWords.contains(d.first())) {
1588	1715	allStop = false;
1589	1716	break;
1590	1717	}
1591	1718	}
1592	1719	ParsedWords ret = new ParsedWords();
1593		~~- for(WordsDesc d : words.words){~~
1594		~~- if(allStop \|\| !stopWords.contains(d.first()))~~
	1720	+ for (WordsDesc d : words.words) {
	1721	+ if (allStop \|\| !stopWords.contains(d.first()))
1595	1722	ret.words.add(d);
1596	1723	}
1597	1724	return ret;
1598	1725	}
1599	1726
1600	1727	/** Quote CJK chars to avoid frequency-based analysis */
1601		~~- protected String quoteCJK(String queryText){~~
1602		~~- if(!builder.filters.isUsingCJK())~~
	1728	+ protected String quoteCJK(String queryText) {
	1729	+ if (!builder.filters.isUsingCJK())
1603	1730	return queryText;
1604		-
	1731	+
1605	1732	StringBuilder sb = new StringBuilder();
1606	1733	int c;
1607	1734	boolean prevCJK = false;
1608	1735	int offset = 0;
1609	1736	boolean closeQuote = false;
1610	1737	boolean inQuotes = false;
1611		~~- for(int i=0;i<queryText.length();i++){~~
	1738	+ for (int i = 0; i < queryText.length(); i++) {
1612	1739	c = queryText.codePointAt(i);
1613		~~- if(c == '"') inQuotes = !inQuotes;~~
1614		~~- if(inQuotes)~~
	1740	+ if (c == '"')
	1741	+ inQuotes = !inQuotes;
	1742	+ if (inQuotes)
1615	1743	continue;
1616		~~- if(CJKFilter.isCJKChar(c)){~~
1617		~~- if(!prevCJK){ // begin of CJK stream~~
1618		~~- if(i!=0)~~
1619		~~- sb.append(queryText.substring(offset,i));~~
	1744	+ if (CJKFilter.isCJKChar(c)) {
	1745	+ if (!prevCJK) { // begin of CJK stream
	1746	+ if (i != 0)
	1747	+ sb.append(queryText.substring(offset, i));
1620	1748	offset = i;
1621	1749	sb.append('"');
1622	1750	closeQuote = true;
1623	1751	prevCJK = true;
1624	1752	}
1625		~~- } else if(prevCJK){~~
	1753	+ } else if (prevCJK) {
1626	1754	// end of CJK stream
1627		~~- sb.append(queryText.substring(offset,i));~~
	1755	+ sb.append(queryText.substring(offset, i));
1628	1756	offset = i;
1629	1757	sb.append('"');
1630	1758	closeQuote = true;
1631	1759	prevCJK = false;
1632	1760	}
1633	1761	}
1634		~~- if(offset == 0 && !closeQuote)~~
	1762	+ if (offset == 0 && !closeQuote)
1635	1763	return queryText;
1636		~~- else{~~
1637		~~- sb.append(queryText.substring(offset,queryText.length()));~~
1638		~~- if(closeQuote)~~
	1764	+ else {
	1765	+ sb.append(queryText.substring(offset, queryText.length()));
	1766	+ if (closeQuote)
1639	1767	sb.append('"');
1640	1768	return sb.toString();
1641	1769	}
1642	1770	}
1643		-
1644		~~- /** Make title query in format: title:query stemtitle:stemmedquery~~
1645		~~- * Also extract words from query (to be used for phrases additional scores)~~
1646		~~- * @return query */~~
	1771	+
	1772	+ /**
	1773	+ * Make title query in format: title:query stemtitle:stemmedquery
	1774	+ * Also extract words from query (to be used for phrases additional scores)
	1775	+ *
	1776	+ * @return query
	1777	+ */
1647	1778	protected Query makeTitlePart(String queryText) {
1648	1779	// push on stack
1649	1780	String contentField = defaultField;
—	—	@@ -1650,437 +1781,488 @@
1651	1782
1652	1783	// stemmed title
1653	1784	Query qs = null;
1654		~~- if(ADD_STEM_TITLE && builder.getFilters().hasStemmer()){~~
1655		~~- defaultField = fields.stemtitle();~~
	1785	+ if (ADD_STEM_TITLE && builder.getFilters().hasStemmer()) {
	1786	+ defaultField = fields.stemtitle();
1656	1787	defaultBoost = STEM_TITLE_BOOST;
1657	1788	defaultAliasBoost = STEM_TITLE_ALIAS_BOOST;
1658	1789	qs = parseRaw(queryText);
1659	1790	}
1660	1791	// title
1661		~~- defaultField = fields.title();~~
1662		~~- defaultBoost = (qs!= null)? TITLE_BOOST : TITLE_BOOST+STEM_TITLE_BOOST;~~
1663		~~- defaultAliasBoost = TITLE_ALIAS_BOOST;~~
	1792	+ defaultField = fields.title();
	1793	+ defaultBoost = (qs != null) ? TITLE_BOOST : TITLE_BOOST
	1794	+ + STEM_TITLE_BOOST;
	1795	+ defaultAliasBoost = TITLE_ALIAS_BOOST;
1664	1796	Query qt = parseRaw(queryText);
1665		-
	1797	+
1666	1798	// pop stack
1667	1799	defaultField = contentField;
1668	1800	defaultBoost = olfDefaultBoost;
1669	1801	defaultAliasBoost = ALIAS_BOOST;
1670	1802
1671		-
1672		~~- if(qt==qs \|\| qt.equals(qs)) // either null, or category query~~
	1803	+ if (qt == qs \|\| qt.equals(qs)) // either null, or category query
1673	1804	return qt;
1674		~~- if(qt == null)~~
	1805	+ if (qt == null)
1675	1806	return qs;
1676		~~- if(qs == null)~~
	1807	+ if (qs == null)
1677	1808	return qt;
1678	1809	BooleanQuery bq = new BooleanQuery(true);
1679		~~- bq.add(qt,Occur.SHOULD);~~
1680		~~- bq.add(qs,Occur.SHOULD);~~
	1810	+ bq.add(qt, Occur.SHOULD);
	1811	+ bq.add(qs, Occur.SHOULD);
1681	1812	return bq;
1682	1813	}
1683		-
	1814	+
1684	1815	/** Extract MUST_NOT clauses form a query */
1685		~~- protected static BooleanQuery extractForbidden(Query q){~~
	1816	+ protected static BooleanQuery extractForbidden(Query q) {
1686	1817	BooleanQuery bq = new BooleanQuery();
1687		~~- extractForbiddenRecursive(bq,q);~~
1688		~~- if(bq.getClauses().length == 0)~~
	1818	+ extractForbiddenRecursive(bq, q);
	1819	+ if (bq.getClauses().length == 0)
1689	1820	return null;
1690		-
	1821	+
1691	1822	return bq;
1692	1823	}
1693		~~- /** Recursivily extract all MUST_NOT clauses from query */~~
1694		~~- protected static void extractForbiddenRecursive(BooleanQuery forbidden, Query q){~~
1695		~~- if(q instanceof BooleanQuery){~~
1696		~~- BooleanQuery bq = (BooleanQuery)q;~~
1697		~~- for(BooleanClause cl : bq.getClauses()){~~
1698		~~- if(cl.getOccur() == Occur.MUST_NOT)~~
1699		~~- forbidden.add(cl.getQuery(),Occur.SHOULD);~~
	1824	+
	1825	+ /** Recursivily extract all MUST_NOT clauses from query */
	1826	+ protected static void extractForbiddenRecursive(BooleanQuery forbidden,
	1827	+ Query q) {
	1828	+ if (q instanceof BooleanQuery) {
	1829	+ BooleanQuery bq = (BooleanQuery) q;
	1830	+ for (BooleanClause cl : bq.getClauses()) {
	1831	+ if (cl.getOccur() == Occur.MUST_NOT)
	1832	+ forbidden.add(cl.getQuery(), Occur.SHOULD);
1700	1833	else
1701		~~- extractForbiddenRecursive(forbidden,cl.getQuery());~~
	1834	+ extractForbiddenRecursive(forbidden, cl.getQuery());
1702	1835	}
1703	1836	}
1704	1837	}
1705		~~- /** Extract forbidden terms from a query into a hashset */~~
1706		~~- public static void extractForbiddenInto(Query q, HashSet<Term> forbidden){~~
	1838	+
	1839	+ /** Extract forbidden terms from a query into a hashset */
	1840	+ public static void extractForbiddenInto(Query q, HashSet<Term> forbidden) {
1707	1841	BooleanQuery bq = extractForbidden(q);
1708		~~- if(bq != null)~~
	1842	+ if (bq != null)
1709	1843	bq.extractTerms(forbidden);
1710	1844	}
1711		-
	1845	+
1712	1846	/** Valid after parse(), returns if the last query had phrases in it */
1713		~~- public boolean hasPhrases(){~~
1714		~~- for(WordsDesc wd : parsedWords.words){~~
1715		~~- if(wd.type == ExpandedType.PHRASE)~~
	1847	+ public boolean hasPhrases() {
	1848	+ for (WordsDesc wd : parsedWords.words) {
	1849	+ if (wd.type == ExpandedType.PHRASE)
1716	1850	return true;
1717	1851	}
1718	1852	return false;
1719	1853	}
1720		-
1721		~~- /** Make the main phrases with relevance metrics */~~
1722		~~- protected Query makeMainPhraseWithRelevance(ParsedWords words, ParsedWords noStopWords){~~
	1854	+
	1855	+ /** Make the main phrases with relevance metrics */
	1856	+ protected Query makeMainPhraseWithRelevance(ParsedWords words,
	1857	+ ParsedWords noStopWords) {
1723	1858	Query main = null;
1724	1859	String field = fields.contents(); // put to begin() for performance
1725		-
	1860	+
1726	1861	// all words as entered into the query
1727		~~- Query phrase = makePositionalMulti(noStopWords,field,new PositionalOptions.Sloppy(),MAINPHRASE_SLOP,1);~~
1728		-
1729		~~- Query sections = makeSectionsQuery(noStopWords,SECTIONS_BOOST);~~
	1862	+ Query phrase = makePositionalMulti(noStopWords, field,
	1863	+ new PositionalOptions.Sloppy(), MAINPHRASE_SLOP, 1);
	1864	+
	1865	+ Query sections = makeSectionsQuery(noStopWords, SECTIONS_BOOST);
1730	1866	// wordnet synonyms
1731		~~- ArrayList<ArrayList<String>> wordnet = WordNet.replaceOne(words.extractFirst(),iid.getLangCode());~~
1732		-
	1867	+ ArrayList<ArrayList<String>> wordnet = WordNet.replaceOne(
	1868	+ words.extractFirst(), iid.getLangCode());
	1869	+
1733	1870	BooleanQuery combined = new BooleanQuery(true);
1734		~~- // combined various queries into mainphrase~~
1735		~~- if(phrase != null){~~
1736		~~- combined.add(phrase,Occur.SHOULD);~~
1737		~~- // wordnet~~
1738		~~- if(wordnet != null){~~
1739		~~- for(ArrayList<String> wnwords : wordnet){~~
1740		~~- if(!allStopWords(wnwords))~~
1741		~~- combined.add(makePositional(wnwords,field,new PositionalOptions.Sloppy(),MAINPHRASE_SLOP,1),Occur.SHOULD);~~
	1871	+ // combined various queries into mainphrase
	1872	+ if (phrase != null) {
	1873	+ combined.add(phrase, Occur.SHOULD);
	1874	+ // wordnet
	1875	+ if (wordnet != null) {
	1876	+ for (ArrayList<String> wnwords : wordnet) {
	1877	+ if (!allStopWords(wnwords))
	1878	+ combined.add(
	1879	+ makePositional(wnwords, field,
	1880	+ new PositionalOptions.Sloppy(),
	1881	+ MAINPHRASE_SLOP, 1), Occur.SHOULD);
1742	1882	}
1743	1883	}
1744	1884	// urls
1745		~~- if(urls.size() > 0){~~
1746		~~- for(ArrayList<Term> terms : urls){~~
1747		~~- combined.add(makePositional(extractTermText(terms), extractField(terms), new PositionalOptions.Sloppy(),0,1), Occur.SHOULD);~~
	1885	+ if (urls.size() > 0) {
	1886	+ for (ArrayList<Term> terms : urls) {
	1887	+ combined.add(
	1888	+ makePositional(extractTermText(terms),
	1889	+ extractField(terms),
	1890	+ new PositionalOptions.Sloppy(), 0, 1),
	1891	+ Occur.SHOULD);
1748	1892	}
1749	1893	}
1750	1894	}
1751		~~- if(sections!=null)~~
1752		~~- combined.add(sections,Occur.SHOULD);~~
1753		-
1754		~~- if(combined.getClauses().length == 1)~~
	1895	+ if (sections != null)
	1896	+ combined.add(sections, Occur.SHOULD);
	1897	+
	1898	+ if (combined.getClauses().length == 1)
1755	1899	main = combined.getClauses()[0].getQuery();
1756	1900	else
1757	1901	main = combined;
1758		-
1759		-
	1902	+
1760	1903	main.setBoost(MAINPHRASE_BOOST);
1761		-
	1904	+
1762	1905	// relevance: alttitle
1763		~~- Query alttitle = makeAlttitleRelevance(words,RELEVANCE_ALTTITLE_BOOST);~~
	1906	+ Query alttitle = makeAlttitleRelevance(words, RELEVANCE_ALTTITLE_BOOST);
1764	1907	ArrayList<Query> altAdd = new ArrayList<Query>();
1765		~~- if(wordnet!=null)~~
1766		~~- for(ArrayList<String> wnwords : wordnet)~~
1767		~~- if(!allStopWords(wnwords))~~
1768		~~- altAdd.add(makeAlttitleRelevance(wnwords,RELEVANCE_ALTTITLE_BOOST));~~
1769		~~- alttitle = simplify(combine(alttitle,altAdd));~~
1770		-
	1908	+ if (wordnet != null)
	1909	+ for (ArrayList<String> wnwords : wordnet)
	1910	+ if (!allStopWords(wnwords))
	1911	+ altAdd.add(makeAlttitleRelevance(wnwords,
	1912	+ RELEVANCE_ALTTITLE_BOOST));
	1913	+ alttitle = simplify(combine(alttitle, altAdd));
	1914	+
1771	1915	// relevance: related
1772		~~- Query related = makeRelatedRelevance(words,RELEVANCE_RELATED_BOOST);~~
	1916	+ Query related = makeRelatedRelevance(words, RELEVANCE_RELATED_BOOST);
1773	1917	ArrayList<Query> relAdd = new ArrayList<Query>();
1774		~~- if(wordnet!=null)~~
1775		~~- for(ArrayList<String> wnwords : wordnet)~~
1776		~~- if(!allStopWords(wnwords))~~
1777		~~- relAdd.add(makeRelatedRelevance(wnwords,RELEVANCE_RELATED_BOOST));~~
1778		~~- related = simplify(combine(related,relAdd));~~
1779		-
	1918	+ if (wordnet != null)
	1919	+ for (ArrayList<String> wnwords : wordnet)
	1920	+ if (!allStopWords(wnwords))
	1921	+ relAdd.add(makeRelatedRelevance(wnwords,
	1922	+ RELEVANCE_RELATED_BOOST));
	1923	+ related = simplify(combine(related, relAdd));
	1924	+
1780	1925	BooleanQuery relevances = new BooleanQuery(true);
1781		~~- relevances.add(alttitle,Occur.SHOULD);~~
1782		~~- relevances.add(related,Occur.SHOULD);~~
1783		-
	1926	+ relevances.add(alttitle, Occur.SHOULD);
	1927	+ relevances.add(related, Occur.SHOULD);
	1928	+
1784	1929	RelevanceQuery whole = new RelevanceQuery(main);
1785	1930	whole.addRelevanceMeasure(relevances);
1786		-
	1931	+
1787	1932	return whole;
1788	1933	}
1789		-
	1934	+
1790	1935	private String extractField(ArrayList<Term> terms) {
1791		~~- if(terms.size() > 0)~~
	1936	+ if (terms.size() > 0)
1792	1937	return terms.get(0).field();
1793	1938	else
1794		~~- throw new RuntimeException("Trying to extract field from zero-length list of terms");~~
	1939	+ throw new RuntimeException(
	1940	+ "Trying to extract field from zero-length list of terms");
1795	1941	}
1796	1942
1797	1943	private ArrayList<String> extractTermText(ArrayList<Term> terms) {
1798	1944	ArrayList<String> tt = new ArrayList<String>();
1799		~~- for(Term t : terms)~~
	1945	+ for (Term t : terms)
1800	1946	tt.add(t.text());
1801	1947	return tt;
1802	1948	}
1803	1949
1804		~~- /** Combine one main query with a number of other queries into a boolean query */~~
	1950	+ /**
	1951	+ * Combine one main query with a number of other queries into a boolean
	1952	+ * query
	1953	+ */
1805	1954	private Query combine(Query query, ArrayList<Query> additional) {
1806		~~- if(additional.size()==0)~~
	1955	+ if (additional.size() == 0)
1807	1956	return query;
1808	1957	BooleanQuery bq = new BooleanQuery(true);
1809		~~- bq.add(query,Occur.SHOULD);~~
1810		~~- for(Query q : additional){~~
1811		~~- if(q != null)~~
1812		~~- bq.add(q,Occur.SHOULD);~~
	1958	+ bq.add(query, Occur.SHOULD);
	1959	+ for (Query q : additional) {
	1960	+ if (q != null)
	1961	+ bq.add(q, Occur.SHOULD);
1813	1962	}
1814		~~- if(bq.clauses().size()==1)~~
	1963	+ if (bq.clauses().size() == 1)
1815	1964	return query;
1816	1965	return bq;
1817		~~- }~~
1818		-
	1966	+ }
	1967	+
1819	1968	/** Convert multiple OR-like queries into one with larger boost */
1820		~~- protected Query simplify(Query q){~~
1821		~~- if(q instanceof BooleanQuery){~~
1822		~~- BooleanQuery bq = (BooleanQuery)q;~~
1823		~~- if(!allShould(bq))~~
	1969	+ protected Query simplify(Query q) {
	1970	+ if (q instanceof BooleanQuery) {
	1971	+ BooleanQuery bq = (BooleanQuery) q;
	1972	+ if (!allShould(bq))
1824	1973	return q;
1825	1974	// query -> boost
1826		~~- HashMap<Query,Float> map = new HashMap<Query,Float>();~~
1827		~~- extractAndSimplify(bq,map,1);~~
1828		-
	1975	+ HashMap<Query, Float> map = new HashMap<Query, Float>();
	1976	+ extractAndSimplify(bq, map, 1);
	1977	+
1829	1978	// simplify
1830	1979	BooleanQuery ret = new BooleanQuery(true);
1831		~~- for(Entry<Query,Float> e : map.entrySet()){~~
	1980	+ for (Entry<Query, Float> e : map.entrySet()) {
1832	1981	Query qt = (Query) e.getKey();
1833	1982	qt.setBoost(e.getValue());
1834		~~- ret.add(qt,Occur.SHOULD);~~
	1983	+ ret.add(qt, Occur.SHOULD);
1835	1984	}
1836	1985	return ret;
1837	1986	}
1838	1987	return q;
1839	1988	}
1840		-
1841		~~- private boolean allShould(BooleanQuery bq){~~
1842		~~- for(BooleanClause cl : bq.getClauses()){~~
1843		~~- if(!cl.getOccur().equals(Occur.SHOULD))~~
	1989	+
	1990	+ private boolean allShould(BooleanQuery bq) {
	1991	+ for (BooleanClause cl : bq.getClauses()) {
	1992	+ if (!cl.getOccur().equals(Occur.SHOULD))
1844	1993	return false;
1845		~~- if(cl.getQuery() instanceof BooleanQuery){~~
1846		~~- if(!allShould((BooleanQuery)cl.getQuery()))~~
	1994	+ if (cl.getQuery() instanceof BooleanQuery) {
	1995	+ if (!allShould((BooleanQuery) cl.getQuery()))
1847	1996	return false;
1848	1997	}
1849	1998	}
1850	1999	return true;
1851	2000	}
1852		-
1853		~~- private void extractAndSimplify(BooleanQuery bq, HashMap<Query,Float> map, float parentBoost){~~
1854		~~- for(BooleanClause cl : bq.getClauses()){~~
	2001	+
	2002	+ private void extractAndSimplify(BooleanQuery bq, HashMap<Query, Float> map,
	2003	+ float parentBoost) {
	2004	+ for (BooleanClause cl : bq.getClauses()) {
1855	2005	Query q = cl.getQuery();
1856		~~- if(q instanceof BooleanQuery)~~
1857		~~- extractAndSimplify((BooleanQuery)q,map,parentBoost*bq.getBoost());~~
1858		~~- else{~~
	2006	+ if (q instanceof BooleanQuery)
	2007	+ extractAndSimplify((BooleanQuery) q, map,
	2008	+ parentBoost * bq.getBoost());
	2009	+ else {
1859	2010	Float boost = map.get(q);
1860		~~- float b = boost==null? 0 : boost;~~
1861		~~- b += q.getBoost()bq.getBoost()parentBoost;~~
1862		~~- map.put(q,b);~~
	2011	+ float b = boost == null ? 0 : boost;
	2012	+ b += q.getBoost() * bq.getBoost() * parentBoost;
	2013	+ map.put(q, b);
1863	2014	}
1864	2015	}
1865	2016	}
1866		-
	2017	+
1867	2018	/** Make positional query by including all of the stop words */
1868		~~- protected PositionalQuery makePositional(ArrayList<String> words, String field, PositionalOptions options, int slop, float boost){~~
1869		~~- return makePositional(words,field,options,slop,boost,true);~~
	2019	+ protected PositionalQuery makePositional(ArrayList<String> words,
	2020	+ String field, PositionalOptions options, int slop, float boost) {
	2021	+ return makePositional(words, field, options, slop, boost, true);
1870	2022	}
1871		-
	2023	+
1872	2024	/** Make generic positional query */
1873		~~- protected PositionalQuery makePositional(ArrayList<String> words, String field, PositionalOptions options, int slop, float boost, boolean includeStopWords){~~
	2025	+ protected PositionalQuery makePositional(ArrayList<String> words,
	2026	+ String field, PositionalOptions options, int slop, float boost,
	2027	+ boolean includeStopWords) {
1874	2028	PositionalQuery pq = new PositionalQuery(options);
1875	2029	int pos = 0;
1876		~~- for(String w : words){~~
	2030	+ for (String w : words) {
1877	2031	boolean isStop = stopWords.contains(w);
1878		~~- if(!(isStop && !includeStopWords))~~
1879		~~- pq.add(new Term(field,w),pos,isStop);~~
	2032	+ if (!(isStop && !includeStopWords))
	2033	+ pq.add(new Term(field, w), pos, isStop);
1880	2034	pos++;
1881	2035	}
1882		~~- if(slop != 0)~~
	2036	+ if (slop != 0)
1883	2037	pq.setSlop(slop);
1884	2038	pq.setBoost(boost);
1885		~~- if(pq.getPositions().length > 0)~~
	2039	+ if (pq.getPositions().length > 0)
1886	2040	return pq;
1887		~~- else return null;~~
	2041	+ else
	2042	+ return null;
1888	2043	}
1889		-
1890		~~- protected Query makePositionalMulti(ParsedWords parsed, String field, PositionalOptions options, int slop, float boost){~~
	2044	+
	2045	+ protected Query makePositionalMulti(ParsedWords parsed, String field,
	2046	+ PositionalOptions options, int slop, float boost) {
1891	2047	PositionalMultiQuery mq = new PositionalMultiQuery(options);
1892		~~- for(WordsDesc wd : parsed.words){~~
1893		~~- mq.addWithBoost(wd.getTerms(field),wd.getPosition(),wd.getBoosts());~~
	2048	+ for (WordsDesc wd : parsed.words) {
	2049	+ mq.addWithBoost(wd.getTerms(field), wd.getPosition(),
	2050	+ wd.getBoosts());
1894	2051	}
1895	2052	mq.setSlop(slop);
1896	2053	mq.setBoost(boost);
1897		~~- if(mq.getPositions().length > 0)~~
	2054	+ if (mq.getPositions().length > 0)
1898	2055	return mq;
1899		~~- else~~
	2056	+ else
1900	2057	return null;
1901	2058	}
1902	2059
1903	2060	/** Make query with short subphrases anchored in non-stop words */
1904		~~- protected Query makeAnchoredQuery(ArrayList<String> words, String field,~~
1905		~~- PositionalOptions options, PositionalOptions whole, PositionalOptions wholeSloppy,~~
1906		~~- float boost, int slop){~~
	2061	+ protected Query makeAnchoredQuery(ArrayList<String> words, String field,
	2062	+ PositionalOptions options, PositionalOptions whole,
	2063	+ PositionalOptions wholeSloppy, float boost, int slop) {
1907	2064	BooleanQuery bq = new BooleanQuery(true);
1908		~~- if(words.size() == 1){~~
1909		~~- PositionalQuery pq = makePositional(words,field,options,0,1f);~~
1910		~~- bq.add(pq,Occur.SHOULD);~~
1911		~~- } else{~~
	2065	+ if (words.size() == 1) {
	2066	+ PositionalQuery pq = makePositional(words, field, options, 0, 1f);
	2067	+ bq.add(pq, Occur.SHOULD);
	2068	+ } else {
1912	2069	// add words
1913		~~- for(String w : words){~~
	2070	+ for (String w : words) {
1914	2071	PositionalQuery pq = new PositionalQuery(options);
1915		~~- pq.add(new Term(field,w));~~
1916		~~- bq.add(pq,Occur.SHOULD);~~
	2072	+ pq.add(new Term(field, w));
	2073	+ bq.add(pq, Occur.SHOULD);
1917	2074	}
1918	2075	// phrases
1919		~~- int i =0;~~
	2076	+ int i = 0;
1920	2077	ArrayList<String> phrase = new ArrayList<String>();
1921		~~- while(i < words.size()){~~
	2078	+ while (i < words.size()) {
1922	2079	phrase.clear();
1923		~~- for(;i<words.size();i++){~~
	2080	+ for (; i < words.size(); i++) {
1924	2081	String w = words.get(i);
1925		~~- if(phrase.size() == 0 \|\| stopWords.contains(w))~~
	2082	+ if (phrase.size() == 0 \|\| stopWords.contains(w))
1926	2083	phrase.add(w);
1927		~~- else{~~
1928		~~- phrase.add(w);~~
	2084	+ else {
	2085	+ phrase.add(w);
1929	2086	break;
1930	2087	}
1931	2088	}
1932		~~- if(phrase.size() > 1)~~
1933		~~- bq.add(makePositional(phrase,field,options,0,phrase.size()),Occur.SHOULD);~~
	2089	+ if (phrase.size() > 1)
	2090	+ bq.add(makePositional(phrase, field, options, 0,
	2091	+ phrase.size()), Occur.SHOULD);
1934	2092	}
1935	2093	}
1936	2094	// add the whole-only query
1937		~~- if(whole != null)~~
1938		~~- bq.add(makePositional(words,field,whole,slop,1),Occur.SHOULD);~~
1939		~~- if(wholeSloppy != null){~~
1940		~~- Query ws = makePositional(words,field,wholeSloppy,slop,1,false);~~
1941		~~- if(ws != null)~~
1942		~~- bq.add(ws,Occur.SHOULD);~~
	2095	+ if (whole != null)
	2096	+ bq.add(makePositional(words, field, whole, slop, 1), Occur.SHOULD);
	2097	+ if (wholeSloppy != null) {
	2098	+ Query ws = makePositional(words, field, wholeSloppy, slop, 1, false);
	2099	+ if (ws != null)
	2100	+ bq.add(ws, Occur.SHOULD);
1943	2101	}
1944	2102	bq.setBoost(boost);
1945		-
	2103	+
1946	2104	return bq;
1947	2105	}
1948		-
	2106	+
1949	2107	/** Make query with short subphrases anchored in non-stop words */
1950		~~- protected Query makeAnchoredQueryMulti(ParsedWords words, String field,~~
1951		~~- PositionalOptions options, PositionalOptions whole, int slopWhole, float boost){~~
	2108	+ protected Query makeAnchoredQueryMulti(ParsedWords words, String field,
	2109	+ PositionalOptions options, PositionalOptions whole, int slopWhole,
	2110	+ float boost) {
1952	2111	BooleanQuery bq = new BooleanQuery(true);
1953	2112	// for one word will make whole only
1954		~~- if(words.size() >= 2){~~
	2113	+ if (words.size() >= 2) {
1955	2114	// add single words
1956		~~- for(int i=0;i<words.size();i++){~~
1957		~~- if(!stopWords.contains(words.firstAt(i))) // skip single stop words~~
1958		~~- bq.add(makePositionalMulti(words.cloneSingleWord(i),field,options,0,1),Occur.SHOULD);~~
	2115	+ for (int i = 0; i < words.size(); i++) {
	2116	+ if (!stopWords.contains(words.firstAt(i))) // skip single stop
	2117	+ // words
	2118	+ bq.add(makePositionalMulti(words.cloneSingleWord(i), field,
	2119	+ options, 0, 1), Occur.SHOULD);
1959	2120	}
1960	2121	// add two words to score higher two-word correlations
1961		~~- if(words.size() >= 3){~~
1962		~~- for(int i=0;i<words.size()-1;){~~
	2122	+ if (words.size() >= 3) {
	2123	+ for (int i = 0; i < words.size() - 1;) {
1963	2124	int i1 = i; // first word
1964	2125	int i2 = i1 + 1; // second non-stop word
1965		~~- for(; i2<words.size()-1; i2++){~~
1966		~~- if(!stopWords.contains(words.firstAt(i2)))~~
1967		~~- break;~~
	2126	+ for (; i2 < words.size() - 1; i2++) {
	2127	+ if (!stopWords.contains(words.firstAt(i2)))
	2128	+ break;
1968	2129	}
1969		~~- bq.add(makePositionalMulti(words.cloneRange(i1,i2),field,options,10,2),Occur.SHOULD);~~
	2130	+ bq.add(makePositionalMulti(words.cloneRange(i1, i2), field,
	2131	+ options, 10, 2), Occur.SHOULD);
1970	2132	i = i2;
1971	2133	}
1972		~~- }~~
	2134	+ }
1973	2135	}
1974	2136	// add the whole-only query
1975		~~- if(whole != null)~~
1976		~~- bq.add(makePositionalMulti(words,field,whole,slopWhole,1),Occur.SHOULD);~~
1977		-
	2137	+ if (whole != null)
	2138	+ bq.add(makePositionalMulti(words, field, whole, slopWhole, 1),
	2139	+ Occur.SHOULD);
	2140	+
1978	2141	bq.setBoost(boost);
1979		-
	2142	+
1980	2143	return bq;
1981	2144	}
1982		-
	2145	+
1983	2146	/** Query for section headings */
1984		~~- protected Query makeSectionsQuery(ParsedWords words, float boost){~~
1985		~~- return makeAnchoredQueryMulti(words,fields.sections(),new PositionalOptions.Sections(),new PositionalOptions.SectionsWhole(),0,boost);~~
	2147	+ protected Query makeSectionsQuery(ParsedWords words, float boost) {
	2148	+ return makeAnchoredQueryMulti(words, fields.sections(),
	2149	+ new PositionalOptions.Sections(),
	2150	+ new PositionalOptions.SectionsWhole(), 0, boost);
1986	2151	}
1987		-
	2152	+
1988	2153	/** Relevance metrics based on rank (of titles and redirects) */
1989		~~- protected Query makeAlttitleRelevance(ParsedWords words, float boost){~~
1990		~~- return makeAnchoredQueryMulti(words,fields.alttitle(),new PositionalOptions.Alttitle(),new PositionalOptions.AlttitleWholeSloppy(),20,boost);~~
	2154	+ protected Query makeAlttitleRelevance(ParsedWords words, float boost) {
	2155	+ return makeAnchoredQueryMulti(words, fields.alttitle(),
	2156	+ new PositionalOptions.Alttitle(),
	2157	+ new PositionalOptions.AlttitleWholeSloppy(), 20, boost);
1991	2158	}
1992		-
	2159	+
1993	2160	/** Make relevance metrics based on context via related articles */
1994		~~- protected Query makeRelatedRelevance(ParsedWords words, float boost){~~
1995		~~- return makeAnchoredQueryMulti(words,fields.related(),new PositionalOptions.Related(),new PositionalOptions.RelatedWhole(),0,boost);~~
	2161	+ protected Query makeRelatedRelevance(ParsedWords words, float boost) {
	2162	+ return makeAnchoredQueryMulti(words, fields.related(),
	2163	+ new PositionalOptions.Related(),
	2164	+ new PositionalOptions.RelatedWhole(), 0, boost);
1996	2165	}
1997		-
	2166	+
1998	2167	/** Relevance metrics based on rank (of titles and redirects) */
1999		~~- protected Query makeAlttitleRelevance(ArrayList<String> words, float boost){~~
2000		~~- return makeAnchoredQuery(words,fields.alttitle(),new PositionalOptions.Alttitle(),new PositionalOptions.AlttitleWhole(), new PositionalOptions.AlttitleWholeSloppy(),boost,20);~~
	2168	+ protected Query makeAlttitleRelevance(ArrayList<String> words, float boost) {
	2169	+ return makeAnchoredQuery(words, fields.alttitle(),
	2170	+ new PositionalOptions.Alttitle(),
	2171	+ new PositionalOptions.AlttitleWhole(),
	2172	+ new PositionalOptions.AlttitleWholeSloppy(), boost, 20);
2001	2173	}
2002	2174
2003		-
2004	2175	/** Make relevance metrics based on context via related articles */
2005		~~- protected Query makeRelatedRelevance(ArrayList<String> words, float boost){~~
2006		~~- return makeAnchoredQuery(words,fields.related(),new PositionalOptions.Related(),null,null,boost,0);~~
	2176	+ protected Query makeRelatedRelevance(ArrayList<String> words, float boost) {
	2177	+ return makeAnchoredQuery(words, fields.related(),
	2178	+ new PositionalOptions.Related(), null, null, boost, 0);
2007	2179	}
2008	2180
2009		-
2010		~~- /** Additional query to match words in redirects that are not in title or article */~~
2011		~~- protected Query makeAlttitleForRedirects(ArrayList<String> words, int slop, float boost){~~
2012		~~- return makePositional(words,fields.alttitle(),new PositionalOptions.RedirectMatch(),slop,boost);~~
	2181	+ /**
	2182	+ * Additional query to match words in redirects that are not in title or
	2183	+ * article
	2184	+ */
	2185	+ protected Query makeAlttitleForRedirects(ArrayList<String> words, int slop,
	2186	+ float boost) {
	2187	+ return makePositional(words, fields.alttitle(),
	2188	+ new PositionalOptions.RedirectMatch(), slop, boost);
2013	2189	}
2014	2190
2015		~~- protected Query makeAlttitleForRedirectsMulti(ParsedWords words, int slop, float boost){~~
2016		~~- return makePositionalMulti(words,fields.alttitle(),new PositionalOptions.RedirectMatch(),slop,boost);~~
	2191	+ protected Query makeAlttitleForRedirectsMulti(ParsedWords words, int slop,
	2192	+ float boost) {
	2193	+ return makePositionalMulti(words, fields.alttitle(),
	2194	+ new PositionalOptions.RedirectMatch(), slop, boost);
2017	2195	}
2018		-
2019		~~- /** Make alttitle phrase for titles indexes */~~
2020		~~- public Query makeAlttitleForTitles(List<String> words){~~
	2196	+
	2197	+ /** Make alttitle phrase for titles indexes */
	2198	+ public Query makeAlttitleForTitles(List<String> words) {
2021	2199	BooleanQuery main = new BooleanQuery(true);
2022	2200
2023		~~- PositionalQuery exact = new PositionalQuery(new PositionalOptions.AlttitleExact());~~
2024		~~- PositionalQuery sloppy = new PositionalQuery(new PositionalOptions.AlttitleSloppy());~~
	2201	+ PositionalQuery exact = new PositionalQuery(
	2202	+ new PositionalOptions.AlttitleExact());
	2203	+ PositionalQuery sloppy = new PositionalQuery(
	2204	+ new PositionalOptions.AlttitleSloppy());
2025	2205
2026	2206	// make exact + sloppy
2027	2207	int pos = 0;
2028		~~- for(String w : words){~~
2029		~~- Term term = new Term(fields.alttitle(),w);~~
	2208	+ for (String w : words) {
	2209	+ Term term = new Term(fields.alttitle(), w);
2030	2210	boolean isStop = stopWords.contains(w);
2031		~~- exact.add(term,isStop);~~
2032		~~- if(!isStop)~~
2033		~~- sloppy.add(term,pos,isStop); // maintain gaps~~
	2211	+ exact.add(term, isStop);
	2212	+ if (!isStop)
	2213	+ sloppy.add(term, pos, isStop); // maintain gaps
2034	2214	pos++;
2035	2215	}
2036		~~- if(sloppy.getTerms().length == 0)~~
	2216	+ if (sloppy.getTerms().length == 0)
2037	2217	return exact;
2038		-
	2218	+
2039	2219	sloppy.setSlop(10);
2040		~~- main.add(exact,Occur.SHOULD);~~
2041		~~- main.add(sloppy,Occur.SHOULD);~~
	2220	+ main.add(exact, Occur.SHOULD);
	2221	+ main.add(sloppy, Occur.SHOULD);
2042	2222	main.setBoost(1);
2043	2223	return main;
2044		-
	2224	+
2045	2225	}
2046		-
	2226	+
2047	2227	/** Make a query to search grouped titles indexes */
2048		~~- public Query parseForTitles(String queryText){~~
	2228	+ public Query parseForTitles(String queryText) {
2049	2229	String oldDefaultField = this.defaultField;
2050	2230	NamespacePolicy oldPolicy = this.namespacePolicy;
2051	2231	FieldBuilder.BuilderSet oldBuilder = this.builder;
2052	2232	this.defaultField = "alttitle";
2053	2233	this.namespacePolicy = NamespacePolicy.IGNORE;
2054		-
	2234	+
2055	2235	Query q = parseRaw(queryText);
2056	2236
2057	2237	ParsedWords words = parsedWords;
2058		-
2059		~~- this.builder = oldBuilder;~~
	2238	+
	2239	+ this.builder = oldBuilder;
2060	2240	this.defaultField = oldDefaultField;
2061	2241	this.namespacePolicy = oldPolicy;
2062		-
	2242	+
2063	2243	BooleanQuery full = new BooleanQuery(true);
2064		~~- full.add(q,Occur.MUST);~~
	2244	+ full.add(q, Occur.MUST);
2065	2245
2066		~~- if(words.size() == 0)~~
	2246	+ if (words.size() == 0)
2067	2247	return q;
2068		-
2069		~~- // match whole titles~~
2070		~~- Query redirectsMulti = makeAlttitleForRedirectsMulti(makeFirstAndSingular(words),20,1f);~~
2071		~~- if(redirectsMulti != null)~~
2072		~~- full.add(redirectsMulti,Occur.SHOULD);~~
2073		-
	2248	+
	2249	+ // match whole titles
	2250	+ Query redirectsMulti = makeAlttitleForRedirectsMulti(
	2251	+ makeFirstAndSingular(words), 20, 1f);
	2252	+ if (redirectsMulti != null)
	2253	+ full.add(redirectsMulti, Occur.SHOULD);
	2254	+
2074	2255	ArticleNamespaceScaling nsScale = iid.getNamespaceScaling();
2075		~~- return new ArticleQueryWrap(full,new ArticleInfoImpl(),null,null,nsScale);~~
2076		-
	2256	+ return new ArticleQueryWrap(full, new ArticleInfoImpl(), null, null,
	2257	+ nsScale);
	2258	+
2077	2259	}
2078		-
	2260	+
2079	2261	/** check if all the words in the array are stop words */
2080		~~- private boolean allStopWords(ArrayList<String> words){~~
2081		~~- if(words == null \|\| words.size() == 0)~~
	2262	+ private boolean allStopWords(ArrayList<String> words) {
	2263	+ if (words == null \|\| words.size() == 0)
2082	2264	return false;
2083		~~- for(String w : words){~~
2084		~~- if(!stopWords.contains(w)){~~
	2265	+ for (String w : words) {
	2266	+ if (!stopWords.contains(w)) {
2085	2267	return false;
2086	2268	}
2087	2269	}
—	—	@@ -2091,16 +2273,17 @@
2092	2274	public Term[] getHighlightTerms() {
2093	2275	return highlightTerms;
2094	2276	}
2095		-
	2277	+
2096	2278	/** @return if last parsed query had wildcards in it */
2097		~~- public boolean hasWildcards(){~~
2098		~~- return wildcards!=null && wildcards.hasWildcards();~~
	2279	+ public boolean hasWildcards() {
	2280	+ return wildcards != null && wildcards.hasWildcards();
2099	2281	}
	2282	+
2100	2283	/** @return if last parsed query has fuzzy words in it */
2101		~~- public boolean hasFuzzy(){~~
2102		~~- return fuzzy!=null && fuzzy.hasFuzzy();~~
	2284	+ public boolean hasFuzzy() {
	2285	+ return fuzzy != null && fuzzy.hasFuzzy();
2103	2286	}
2104		-
	2287	+
2105	2288	public void setNamespacePolicy(NamespacePolicy namespacePolicy) {
2106	2289	this.namespacePolicy = namespacePolicy;
2107	2290	}
—	—	@@ -2108,13 +2291,13 @@
2109	2292	public ArrayList<String> getWordsClean() {
2110	2293	return cleanupWords(parsedWords.extractFirst());
2111	2294	}
2112		-
2113		~~- public boolean hasPrefixFilters(){~~
2114		~~- return prefixFilters != null && prefixFilters.length>0;~~
	2295	+
	2296	+ public boolean hasPrefixFilters() {
	2297	+ return prefixFilters != null && prefixFilters.length > 0;
2115	2298	}
2116		-
	2299	+
2117	2300	/** Gets the raw prefix text, e.g. project:npov */
2118		~~- public String[] getPrefixFilters(){~~
	2301	+ public String[] getPrefixFilters() {
2119	2302	return prefixFilters;
2120	2303	}
2121	2304
—	—	@@ -2123,7 +2306,4 @@
2124	2307	return urls;
2125	2308	}
2126	2309
2127		-
2128		-
2129		-
2130	2310	}

Status & tagging log

15:16, 23 January 2012 Reedy (talk | contribs) changed the status of r109800 [removed: new added: deferred]