r25052 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r25051‎ | r25052 | r25053 >
Date:15:01, 22 August 2007
Author:rainman
Status:old
Tags:
Comment:
Fix bug 11021. Query parser was splitting up words on combining characters.
Modified paths:
  • /trunk/lucene-search-2/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java (modified) (history)
  • /trunk/lucene-search-2/src/org/wikimedia/lsearch/test/WikiQueryParserTest.java (modified) (history)

Diff [purge]

Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/test/WikiQueryParserTest.java
@@ -366,6 +366,14 @@
367367 q = parser.parseFourPass("\"うろパン\"",NamespacePolicy.IGNORE,false);
368368 assertEquals("contents:\"うろ ろハ ハン\" title:\"うろ ろハ ハン\"^2.0 (alttitle1:\"うろ ろハ ハン\"^6.0 alttitle2:\"うろ ろハ ハン\"^6.0 alttitle3:\"うろ ろハ ハン\"^6.0)",q.toString());
369369
 370+ // Malayalam
 371+ analyzer = Analyzers.getSearcherAnalyzer("ml");
 372+ bs = new FieldBuilder("ml").getBuilder();
 373+ parser = new WikiQueryParser(bs.getFields().contents(),"0",analyzer,bs,NamespacePolicy.IGNORE);
 374+ q = parser.parseFourPass("കൊറിയ ",NamespacePolicy.IGNORE,false);
 375+ assertEquals("contents:കറയ title:കറയ^2.0 (alttitle1:കറയ^6.0 alttitle2:കറയ^6.0 alttitle3:കറയ^6.0)",q.toString());
 376+
 377+
370378 // Test field extraction
371379 HashSet<NamespaceFilter> fs = parser.getFieldNamespaces("main:something [1]:else all:oh []:nja");
372380 assertEquals(3,fs.size());
Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java
@@ -296,7 +296,7 @@
297297 continue; // ignore whitespaces
298298
299299 // pluses and minuses, underscores can be within words, *,? are for wildcard queries
300 - if(Character.isLetterOrDigit(ch) || ch=='-' || ch=='+' || ch=='_' || ch=='*'){
 300+ if(!Character.isWhitespace(ch) && ch != ':' && ch != '(' && ch != ')' && ch !='[' && ch != ']' && ch != '.' && ch != ',' && ch != ';' && ch != '"'){
301301 if(length<buffer.length)
302302 buffer[length++] = ch;
303303 } else{

Status & tagging log