r50209 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r50208‎ | r50209 | r50210 >
Date:23:12, 4 May 2009
Author:rainman
Status:deferred
Tags:
Comment:
Minor stuff:
* more restrictive wildcards
* support searchall query parameter
Modified paths:
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearchEngine.java (modified) (history)

Diff [purge]

Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearchEngine.java
@@ -96,7 +96,7 @@
9797 if (what.equals("search") || what.equals("explain")) {
9898 int offset = 0, limit = 20; boolean exactCase = false;
9999 int iwlimit = 10; int iwoffset = 0;
100 - boolean searchOnly = false;
 100+ boolean searchOnly = false, searchAll = false;
101101 if (query.containsKey("offset"))
102102 offset = Math.max(Integer.parseInt((String)query.get("offset")), 0);
103103 if (query.containsKey("limit"))
@@ -109,10 +109,17 @@
110110 exactCase = true;
111111 if(query.containsKey("searchonly"))
112112 searchOnly = Boolean.parseBoolean((String)query.get("searchonly"));
 113+ if(query.containsKey("searchall"))
 114+ searchAll = Boolean.parseBoolean((String)query.get("searchall")) ||
 115+ ((String)query.get("searchall")).equals("1");
113116 if(version <= 2)
114117 searchOnly = true;
115 - NamespaceFilter namespaces = new NamespaceFilter((String)query.get("namespaces"));
116 - SearchResults res = search(iid, searchterm, offset, limit, iwoffset, iwlimit, namespaces, what.equals("explain"), exactCase, false, searchOnly);
 118+ NamespaceFilter namespaces = null;
 119+ if(searchAll)
 120+ namespaces = new NamespaceFilter("");
 121+ else
 122+ namespaces = new NamespaceFilter((String)query.get("namespaces"));
 123+ SearchResults res = search(iid, searchterm, offset, limit, iwoffset, iwlimit, namespaces, what.equals("explain"), exactCase, false, searchOnly, searchAll);
117124 if(!res.isSuccess()){
118125 // note failed search
119126 if(SearchServer.stats != null)
@@ -132,6 +139,7 @@
133140 } else if (what.equals("raw") || what.equals("rawexplain")) {
134141 int offset = 0, limit = 20; boolean exactCase = false;
135142 int iwlimit = 10; int iwoffset = 0;
 143+ boolean searchAll = false;
136144 if (query.containsKey("offset"))
137145 offset = Math.max(Integer.parseInt((String)query.get("offset")), 0);
138146 if (query.containsKey("limit"))
@@ -142,8 +150,10 @@
143151 iwlimit = Math.min(Integer.parseInt((String)query.get("iwlimit")), MAXLINES);
144152 if (query.containsKey("case") && global.exactCaseIndex(iid.getDBname()) && ((String)query.get("case")).equalsIgnoreCase("exact"))
145153 exactCase = true;
 154+ if(query.containsKey("searchall"))
 155+ searchAll = Boolean.parseBoolean((String)query.get("searchall"));
146156 NamespaceFilter namespaces = new NamespaceFilter((String)query.get("namespaces"));
147 - return search(iid, searchterm, offset, limit, iwoffset, iwlimit, namespaces, what.equals("rawexplain"), exactCase, true, true);
 157+ return search(iid, searchterm, offset, limit, iwoffset, iwlimit, namespaces, what.equals("rawexplain"), exactCase, true, true, searchAll);
148158 } else if (what.equals("prefix")){
149159 int limit = MAXPREFIX;
150160 if (query.containsKey("limit"))
@@ -577,7 +587,8 @@
578588 * the default namespaces filter
579589 */
580590 public SearchResults search(IndexId iid, String searchterm, int offset, int limit, int iwoffset, int iwlimit,
581 - NamespaceFilter nsDefault, boolean explain, boolean exactCase, boolean raw, boolean searchOnly){
 591+ NamespaceFilter nsDefault, boolean explain, boolean exactCase, boolean raw, boolean searchOnly,
 592+ boolean searchAllFromRequest){
582593 Analyzer analyzer = Analyzers.getSearcherAnalyzer(iid,exactCase);
583594 if(nsDefault == null || nsDefault.cardinality() == 0)
584595 nsDefault = new NamespaceFilter("0"); // default to main namespace
@@ -599,8 +610,12 @@
600611 return res;
601612 }
602613
 614+ // check if request is explicitely on one field
 615+ if(searchAllFromRequest){
 616+ nsfw.setNamespaceFilter(new NamespaceFilter());
 617+ searchAll = true;
603618 // if search is over one field, try to use filters
604 - if(fields.size()==1){
 619+ } else if(fields.size()==1){
605620 if(fields.contains(new NamespaceFilter())){
606621 nsfw.setNamespaceFilter(new NamespaceFilter()); // empty filter: "all" keyword
607622 searchAll = true;
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java
@@ -1035,33 +1035,38 @@
10361036 return query;
10371037 }
10381038
1039 - /** return true if buffer is wildcard */
 1039+ /**
 1040+ * return true if buffer is wildcard
 1041+ * the only allowed patterns are *q and q* and not other combinations like *q* or q*r
 1042+ *
 1043+ */
10401044 private boolean bufferIsWildCard(){
1041 - if(length < 1)
 1045+ if(length < 2)
10421046 return false;
10431047 boolean wild = false;
10441048 int index = -1;
1045 - for(int i=0;i<length;i++){
1046 - if(buffer[i] == '*'){
1047 - wild = true;
1048 - index = i;
1049 - break;
1050 - }
 1049+ // only allow '*' at begin and end
 1050+ if(buffer[0] == '*'){
 1051+ index = 0;
 1052+ wild = true;
 1053+ } else if( buffer[length-1] == '*' ){
 1054+ index = length-1;
 1055+ wild = true;
10511056 }
 1057+
10521058 // check if it's a valid wildcard
10531059 if(wild){
1054 - if((buffer[0] == '*') && (buffer[length-1]=='*'))
1055 - return false; // don't support patterns like *a*
1056 - //if(index == length-1 && buffer[index]=='?')
1057 - // return false; // probably just an ordinary question mark
 1060+ // check if this is the only asterix
 1061+ for(int i=0;i<length;i++){
 1062+ if( i!= index && buffer[i] == '*'){
 1063+ return false; // more than one '*'
 1064+ }
 1065+ }
10581066
1059 - // don't let * be in middle for performance reasons
1060 - if(Character.isLetterOrDigit(buffer[0]) && Character.isLetterOrDigit(buffer[length-1]))
1061 - return false;
1062 -
 1067+ // require at least one letter besides the wildcard sign
10631068 for(int i=0;i<length;i++){
10641069 if(Character.isLetterOrDigit(buffer[i]))
1065 - return true; // +card :P
 1070+ return true; // found it!
10661071 }
10671072 }
10681073 return false;

Status & tagging log