r24043 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r24042‎ | r24043 | r24044 >
Date:18:51, 12 July 2007
Author:rainman
Status:old
Tags:
Comment:
Don't try to quote already quoted query text in CJK
Modified paths:
  • /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java (modified) (history)
  • /trunk/lucene-search-2.0/src/org/wikimedia/lsearch/test/WikiQueryParserTest.java (modified) (history)

Diff [purge]

Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/test/WikiQueryParserTest.java
@@ -355,8 +355,11 @@
356356 assertEquals("contents:\"うろ ろハ ハン\" title:\"うろ ろハ ハン\"^2.0 (alttitle1:\"うろ ろハ ハン\"^6.0 alttitle2:\"うろ ろハ ハン\"^6.0 alttitle3:\"うろ ろハ ハン\"^6.0)",q.toString());
357357
358358 q = parser.parseFourPass("ナイロン100C other ャポン! ",NamespacePolicy.IGNORE,false);
359 - assertEquals("(+contents:\"ナイ イロ ロン\" +(+contents:100 +contents:c) +contents:other +contents:\"ャホ ホン\") (+title:\"ナイ イロ ロン\"^2.0 +(+title:100^2.0 +title:c^2.0) +title:other^2.0 +title:\"ャホ ホン\"^2.0) ((+alttitle1:\"ナイ イロ ロン\"^6.0 +(+alttitle1:100^6.0 +alttitle1:c^6.0) +alttitle1:other^6.0 +alttitle1:\"ャホ ホン\"^6.0) (+alttitle2:\"ナイ イロ ロン\"^6.0 +(+alttitle2:100^6.0 +alttitle2:c^6.0) +alttitle2:other^6.0 +alttitle2:\"ャホ ホン\"^6.0) (+alttitle3:\"ナイ イロ ロン\"^6.0 +(+alttitle3:100^6.0 +alttitle3:c^6.0) +alttitle3:other^6.0 +alttitle3:\"ャホ ホン\"^6.0))",q.toString());
 359+ assertEquals("(+contents:\"ナイ イロ ロン\" +(+contents:100 +contents:c) +contents:other +contents:\"ャホ ホン\") (+title:\"ナイ イロ ロン\"^2.0 +(+title:100^2.0 +title:c^2.0) +title:other^2.0 +title:\"ャホ ホン\"^2.0) ((+alttitle1:\"ナイ イロ ロン\"^6.0 +(+alttitle1:100^6.0 +alttitle1:c^6.0) +alttitle1:other^6.0 +alttitle1:\"ャホ ホン\"^6.0) (+alttitle2:\"ナイ イロ ロン\"^6.0 +(+alttitle2:100^6.0 +alttitle2:c^6.0) +alttitle2:other^6.0 +alttitle2:\"ャホ ホン\"^6.0) (+alttitle3:\"ナイ イロ ロン\"^6.0 +(+alttitle3:100^6.0 +alttitle3:c^6.0) +alttitle3:other^6.0 +alttitle3:\"ャホ ホン\"^6.0))",q.toString());
360360
 361+ q = parser.parseFourPass("\"うろパン\"",NamespacePolicy.IGNORE,false);
 362+ assertEquals("contents:\"うろ ろハ ハン\" title:\"うろ ろハ ハン\"^2.0 (alttitle1:\"うろ ろハ ハン\"^6.0 alttitle2:\"うろ ろハ ハン\"^6.0 alttitle3:\"うろ ろハ ハン\"^6.0)",q.toString());
 363+
361364 // Test field extraction
362365 HashSet<NamespaceFilter> fs = parser.getFieldNamespaces("main:something [1]:else all:oh []:nja");
363366 assertEquals(3,fs.size());
Index: trunk/lucene-search-2.0/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java
@@ -1139,8 +1139,12 @@
11401140 boolean prevCJK = false;
11411141 int offset = 0;
11421142 boolean closeQuote = false;
 1143+ boolean inQuotes = false;
11431144 for(int i=0;i<queryText.length();i++){
11441145 c = queryText.codePointAt(i);
 1146+ if(c == '"') inQuotes = !inQuotes;
 1147+ if(inQuotes)
 1148+ continue;
11451149 if(CJKFilter.isCJKChar(c)){
11461150 if(!prevCJK){ // begin of CJK stream
11471151 if(i!=0)

Status & tagging log