r63854 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r63853‎ | r63854 | r63855 >
Date:02:23, 17 March 2010
Author:mah
Status:deferred
Tags:
Comment:
various ws changes in files I looked at
Modified paths:
  • /trunk/lucene-search-2/src/org/apache/lucene/analysis/KStemFilter.java (modified) (history)
  • /trunk/lucene-search-2/src/org/apache/lucene/analysis/KStemmer.java (modified) (history)
  • /trunk/lucene-search-2/src/org/wikimedia/lsearch/config/GlobalConfiguration.java (modified) (history)
  • /trunk/lucene-search-2/src/org/wikimedia/lsearch/search/NamespaceFilter.java (modified) (history)
  • /trunk/lucene-search-2/src/org/wikimedia/lsearch/storage/MySQLStorage.java (modified) (history)
  • /trunk/lucene-search-2/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java (modified) (history)
  • /trunk/lucene-search-2/test/org/wikimedia/lsearch/config/GlobalConfigurationTest.java (modified) (history)

Diff [purge]

Index: trunk/lucene-search-2/test/org/wikimedia/lsearch/config/GlobalConfigurationTest.java
@@ -44,7 +44,7 @@
4545
4646 public void testReadURL(){
4747 // database
48 - Hashtable database = global.database;
 48+ Hashtable database = global.database;
4949 Hashtable roles = (Hashtable) database.get("entest");
5050 assertNotNull(roles.get("mainsplit"));
5151 assertNotNull(roles.get("mainpart"));
@@ -67,7 +67,7 @@
6868
6969 // search
7070 Hashtable search = global.search;
71 - ArrayList sr = (ArrayList) search.get("192.168.0.2");
 71+ ArrayList sr = (ArrayList) search.get("192.168.0.2");
7272
7373 String[] ssr = (String[]) sr.toArray(new String [] {} );
7474
@@ -82,12 +82,12 @@
8383 Hashtable<String,ArrayList<String>> g0 = sg.get(new Integer(0));
8484 assertEquals("{192.168.0.5=[entest.mainpart, entest.restpart], 192.168.0.2=[entest.mainpart]}",g0.toString());
8585 Hashtable<String,ArrayList<String>> g1 = sg.get(new Integer(1));
86 - assertEquals("{192.168.0.6=[frtest.part3, detest], 192.168.0.4=[frtest.part1, frtest.part2]}",g1.toString());
 86+ assertEquals("{192.168.0.6=[frtest.part3, detest], 192.168.0.4=[frtest.part1, frtest.part2]}",g1.toString());
8787
8888
8989 // index
9090 Hashtable index = global.index;
91 - ArrayList ir = (ArrayList) index.get("192.168.0.5");
 91+ ArrayList ir = (ArrayList) index.get("192.168.0.5");
9292
9393 String[] sir = (String[]) ir.toArray(new String [] {} );
9494
@@ -115,7 +115,7 @@
116116 String hostName = host.getHostName();
117117 System.out.println("Verify internet IP: "+hostAddr+", and hostname: "+hostName);
118118
119 - // test prefixes
 119+ // test prefixes
120120 Hashtable<String,NamespaceFilter> p = global.namespacePrefix;
121121 assertEquals(17,p.size());
122122
@@ -298,7 +298,7 @@
299299 //assertTrue(tn.isTitleNgram());
300300
301301 }
302 -
 302+
303303 public void testComplexWildcard(){
304304 Pattern p = Pattern.compile(StringUtils.wildcardToRegexp("(?!(enwiki.|dewiki.|frwiki.|itwiki.|nlwiki|.))*.spell"));
305305 assertFalse(p.matcher("enwiki.spell").matches());
Index: trunk/lucene-search-2/src/org/apache/lucene/analysis/KStemmer.java
@@ -1233,7 +1233,7 @@
12341234 if (dict_ht == null)
12351235 initializeDictHash();
12361236 }
1237 -
 1237+
12381238 public String singular(String term) {
12391239 boolean stemIt;
12401240 String result;
@@ -1281,17 +1281,17 @@
12821282 while (true) {
12831283 entry = wordInDict();
12841284 if (entry != null) break;
1285 - plural();
 1285+ plural();
12861286 break;
12871287 }
1288 -
 1288+
12891289 if (entry != null) {
12901290 if (entry.root != null)
12911291 result = entry.root;
12921292 else
12931293 result = word.toString();
12941294 } else result = word.toString();
1295 -
 1295+
12961296 /* Enter into cache, at the place not used by the last cache hit */
12971297 if (stem_ht.size() < MaxCacheSize) {
12981298 /* Add term to cache */
Index: trunk/lucene-search-2/src/org/apache/lucene/analysis/KStemFilter.java
@@ -102,7 +102,7 @@
103103 return null;
104104 else {
105105 String s = stemmer.stem(token.termText());
106 - if (!s.equals(token.termText()))
 106+ if (!s.equals(token.termText()))
107107 return new Token(s, token.startOffset, token.endOffset, token.type);
108108 return token;
109109 }
Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/storage/MySQLStorage.java
@@ -26,8 +26,8 @@
2727
2828 /**
2929 * MySQL storage backend
30 - *
31 - *
 30+ *
 31+ *
3232 * @author rainman
3333 *
3434 */
@@ -52,10 +52,10 @@
5353 /** db where to put everything, if we are not using one db per dbname */
5454 protected String defaultDB;
5555 /** where sql stuff is, e.g. references_table.sql */
56 - protected String lib;
57 - /** table name -> create table file */
 56+ protected String lib;
 57+ /** table name -> create table file */
5858 protected Hashtable<String,String> tableDefs = new Hashtable<String,String>();
59 -
 59+
6060 protected MySQLStorage() {
6161 config = Configuration.open();
6262 try {
@@ -63,9 +63,9 @@
6464 } catch (ClassNotFoundException e) {
6565 log.error("Cannot load mysql jdbc driver, class not found: "+e.getMessage(),e);
6666 }
67 -
 67+
6868 lib = config.getString("Storage","lib","./sql");
69 -
 69+
7070 master = config.getString("Storage","master","localhost");
7171 String[] ss = config.getArray("Storage","slaves");
7272 if(ss != null){
@@ -85,15 +85,15 @@
8686 slaves = new Hashtable<String,Double>();
8787 for(Entry<String,Double> ed : rawslaves.entrySet())
8888 slaves.put(ed.getKey(),ed.getValue()/sum);
89 -
 89+
9090 }
91 -
 91+
9292 username = config.getString("Storage","username","root");
9393 password = config.getString("Storage","password","");
94 -
 94+
9595 adminUsername = config.getString("Storage","adminuser",username);
9696 adminPassword = config.getString("Storage","adminpass",password);
97 -
 97+
9898 // figure out db configuration
9999 separate = config.getBoolean("Storage","useSeparateDBs");
100100 if(!separate){
@@ -102,23 +102,23 @@
103103 log.error("Set Storage.defaultDB in local configuration.");
104104 }
105105 }
106 - }
107 -
 106+ }
 107+
108108 /** Get connection for writing stuff, i.e. on the master */
109109 protected Connection getReadConnection(String dbname) throws IOException{
110110 return openConnection(dbname,false,false);
111111 }
112 -
 112+
113113 /** Get connection for reading of (possibly lagged) stuff, i.e. on slaves (or master if there are no slaves) */
114114 protected Connection getWriteConnection(String dbname) throws IOException{
115115 return openConnection(dbname,true,false);
116116 }
117 -
 117+
118118 /** Get administrators connection for creating tables/db, etc.. (on master) */
119119 protected Connection getAdminConnection(String dbname) throws IOException {
120120 return openConnection(dbname,true,true);
121121 }
122 -
 122+
123123 /** Open connection on the master, or load-balanced on one of the slaves */
124124 protected Connection openConnection(String dbname, boolean onMaster, boolean admin) throws IOException {
125125 String host=null;
@@ -153,26 +153,26 @@
154154 public String quote(String str){
155155 return "'"+str+"'";
156156 }
157 -
 157+
158158 public String escape(String str){
159159 return str.replace("\\","\\\\").replace("'","\\'");
160160 }
161 -
 161+
162162 public String getTableName(String name, String dbname){
163163 if(!separate)
164164 return dbname+"_"+name;
165165 else
166166 return name;
167167 }
168 -
169 - // inherit javadoc
170 - public Collection<CompactArticleLinks> getPageReferences(Collection<Title> titles, String dbname) throws IOException {
 168+
 169+ // inherit javadoc
 170+ public Collection<CompactArticleLinks> getPageReferences(Collection<Title> titles, String dbname) throws IOException {
171171 String sql = "SELECT page_key, page_references from "+getTableName("page",dbname)+" WHERE ";
172172 if(titles == null || titles.size()==0)
173173 return new ArrayList<CompactArticleLinks>();
174174 else if(titles.size()==1){
175175 sql += "page_key="+quote(escape(titles.iterator().next().getKey()));
176 - } else{
 176+ } else{
177177 StringBuilder sb = new StringBuilder(sql);
178178 sb.append("page_key IN (");
179179 Iterator<Title> it = titles.iterator();
@@ -193,7 +193,7 @@
194194 ResultSet res = stmt.executeQuery(sql);
195195 ArrayList<CompactArticleLinks> ret = new ArrayList<CompactArticleLinks>();
196196 while(res.next()){
197 - ret.add(new CompactArticleLinks(res.getString("page_key"),res.getInt("page_references")));
 197+ ret.add(new CompactArticleLinks(res.getString("page_key"),res.getInt("page_references")));
198198 }
199199 conn.close();
200200 return ret;
@@ -205,12 +205,12 @@
206206
207207 // inherit javadoc
208208 public void storePageReferences(Collection<CompactArticleLinks> refs, String dbname) throws IOException {
209 - final int maxPerQuery = 10000;
 209+ final int maxPerQuery = 10000;
210210 Connection conn = getWriteConnection(dbname);
211 - verifyTable("page",dbname,conn);
 211+ verifyTable("page",dbname,conn);
212212 Iterator<CompactArticleLinks> it = refs.iterator();
213 - // send chunks of maxPerQuery referenace replacements
214 - while(it.hasNext()){
 213+ // send chunks of maxPerQuery referenace replacements
 214+ while(it.hasNext()){
215215 StringBuilder sb = new StringBuilder("INSERT INTO "+getTableName("page",dbname)+" (page_key,page_references) VALUES ");
216216 int count = 0;
217217 while(it.hasNext() && count < maxPerQuery){
@@ -228,19 +228,19 @@
229229 try {
230230 log.info("Storing "+Math.min(maxPerQuery,count)+" page ranks... ");
231231 Statement stmt = conn.createStatement();
232 - stmt.executeUpdate(sb.toString());
233 -
 232+ stmt.executeUpdate(sb.toString());
 233+
234234 } catch (SQLException e) {
235235 log.error("Cannot execute replace query "+sb+" : "+e.getMessage());
236236 throw new IOException(e.getMessage());
237 - }
 237+ }
238238 }
239239 try {
240240 conn.close(); // be sure we close the connection
241241 } catch (SQLException e) {
242242 }
243243 }
244 -
 244+
245245 /** Creates table if it doesn't exist */
246246 protected void verifyTable(String name, String dbname, Connection conn) throws IOException {
247247 // verify if table exists
@@ -251,19 +251,19 @@
252252 ResultSet res = stmt.executeQuery("SHOW TABLES LIKE '"+table+"';");
253253 if(res.next()) // table exists!
254254 return;
255 -
 255+
256256 } catch (SQLException e) {
257257 log.error("Cannot verify table "+table+" : "+e.getMessage());
258258 throw new IOException(e.getMessage());
259259 }
260 -
 260+
261261 // fetch table definition
262262 String def = tableDefs.get(name);
263263 if(def == null){
264264 if(!lib.endsWith(Configuration.PATH_SEP))
265265 lib = lib+Configuration.PATH_SEP;
266 -
267 - BufferedReader file = new BufferedReader(new FileReader(lib+name+"_table.sql"));
 266+
 267+ BufferedReader file = new BufferedReader(new FileReader(lib+name+"_table.sql"));
268268 StringBuilder sb = new StringBuilder();
269269 String line;
270270 while((line = file.readLine()) != null){
@@ -276,7 +276,7 @@
277277 if(!separate)
278278 tdef = def.replace("/*DBprefix*/",dbname+"_");
279279 else
280 - tdef = def;
 280+ tdef = def;
281281 // create
282282 try {
283283 Connection admin = getAdminConnection(dbname);
@@ -287,7 +287,7 @@
288288 } catch (SQLException e) {
289289 log.error("Cannot create table "+table+" : "+e.getMessage());
290290 throw new IOException(e.getMessage());
291 - }
 291+ }
292292 }
293293
294294 @Override
@@ -299,7 +299,7 @@
300300 return new HashMap<Title, ArrayList<RelatedTitle>>();
301301 else if(titles.size()==1){
302302 sql += "a.page_key="+quote(escape(titles.iterator().next().getKey()));
303 - } else{
 303+ } else{
304304 StringBuilder sb = new StringBuilder(sql);
305305 sb.append("a.page_key IN (");
306306 Iterator<Title> it = titles.iterator();
@@ -328,7 +328,7 @@
329329 rel = new ArrayList<RelatedTitle>();
330330 ret.put(t1,rel);
331331 }
332 - rel.add(new RelatedTitle(t2,score));
 332+ rel.add(new RelatedTitle(t2,score));
333333 }
334334 conn.close();
335335 return ret;
@@ -337,12 +337,12 @@
338338 throw new IOException(e.getMessage());
339339 }
340340 }
341 -
342 - protected HashMap<String,Integer> getPageIDs(Collection<String> keys, String dbname, Connection conn) throws IOException{
 341+
 342+ protected HashMap<String,Integer> getPageIDs(Collection<String> keys, String dbname, Connection conn) throws IOException{
343343 String sql = "SELECT page_key, page_id from "+getTableName("page",dbname)+" WHERE ";
344344 if(keys.size()==1){
345345 sql += "page_key="+quote(escape(keys.iterator().next()));
346 - } else{
 346+ } else{
347347 StringBuilder sb = new StringBuilder(sql);
348348 sb.append("page_key IN (");
349349 Iterator<String> it = keys.iterator();
@@ -380,12 +380,12 @@
381381 keys.add(r.getRelates().toString());
382382 }
383383 HashMap<String,Integer> map = getPageIDs(keys,dbname,read);
384 - final int maxPerQuery = 20000;
 384+ final int maxPerQuery = 20000;
385385 Connection write = getWriteConnection(dbname);
386 - verifyTable("related",dbname,write);
 386+ verifyTable("related",dbname,write);
387387 Iterator<Related> it = related.iterator();
388 - // send chunks of maxPerQuery referenace replacements
389 - while(it.hasNext()){
 388+ // send chunks of maxPerQuery referenace replacements
 389+ while(it.hasNext()){
390390 StringBuilder sb = new StringBuilder("INSERT INTO "+getTableName("related",dbname)+" (rel_to,rel_related,rel_score) VALUES ");
391391 int count = 0;
392392 while(it.hasNext() && count < maxPerQuery){
@@ -405,18 +405,18 @@
406406 try {
407407 log.info("Storing "+Math.min(maxPerQuery,count)+" related pages... ");
408408 Statement stmt = write.createStatement();
409 - stmt.executeUpdate(sb.toString());
410 -
 409+ stmt.executeUpdate(sb.toString());
 410+
411411 } catch (SQLException e) {
412412 log.error("Cannot execute replace query "+sb+" : "+e.getMessage());
413413 throw new IOException(e.getMessage());
414 - }
 414+ }
415415 }
416416 try {
417417 write.close(); // be sure we close the connection
418418 read.close();
419419 } catch (SQLException e) {
420420 }
421 -
422 - }
 421+
 422+ }
423423 }
Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/config/GlobalConfiguration.java
@@ -41,8 +41,8 @@
4242 /**
4343 * Read and parse the global configuration file, is also used
4444 * to discover where the indexes are. Global configuration manages
45 - * a pool of global readonly {@link IndexId} instances.
46 - *
 45+ * a pool of global readonly {@link IndexId} instances.
 46+ *
4747 * @author rainman
4848 *
4949 */
@@ -50,7 +50,7 @@
5151 /** The following hashtables are directly read from the config file: */
5252 /** dbname -> hashtable ( role -> hashtable ( param => value) ) */
5353 protected Hashtable<String, Hashtable<String, Hashtable<String, String>>> database;
54 - /** group -> host -> arraylist ( db.role ) */
 54+ /** group -> host -> arraylist ( db.role ) */
5555 protected Hashtable<Integer,Hashtable<String,ArrayList<String>>> searchGroup;
5656 /** host -> arraylist ( db.role ) */
5757 protected Hashtable<String,ArrayList<String>> search;
@@ -73,7 +73,7 @@
7474 /** prefixes, e.g. main, talk, help, and corresponding filters */
7575 protected Hashtable<String,NamespaceFilter> namespacePrefix;
7676 /** keyword for all namespaces (i.e. no filtering) */
77 - protected String namespacePrefixAll;
 77+ protected String namespacePrefixAll;
7878 /** suffx -> OAI Repo url pattern */
7979 protected Hashtable<String,String> oaiRepo;
8080 /** wgLanguageCode from InitialiseSettings, suffix -> lang code */
@@ -92,50 +92,50 @@
9393 protected String commonsWiki = null;
9494 /** dbname -> namespace scaling */
9595 protected Hashtable<String,ArticleNamespaceScaling> namespaceBoost = new Hashtable<String,ArticleNamespaceScaling>();
96 -
 96+
9797 /** info about this host */
9898 protected static InetAddress myHost;
99 - protected static String hostAddr, hostName;
100 -
 99+ protected static String hostAddr, hostName;
 100+
101101 /** Database suffix if dbname, the rest is supposed to be language, e.g srwiki => (suffix wiki) => sr */
102 - protected String[] databaseSuffixes = null;
 102+ protected String[] databaseSuffixes = null;
103103 /** Databases ending in suffix will use additional keyword scores */
104104 protected String[] keywordScoringSuffixes = null;
105105 /** Databases ending in suffix will have 2 indexes, one with lowercased words, and one with exact case words */
106106 protected String[] exactCaseSuffix = null;
107 -
 107+
108108 /** For scaling scores according how old the indexed page is */
109109 protected String[] ageScalingStrong = null;
110110 protected String[] ageScalingMedium = null;
111111 protected String[] ageScalingWeak = null;
112 -
 112+
113113 /** wikis with additional global ranking data */
114114 protected String[] additionalRank = null;
115 -
 115+
116116 protected Properties globalProperties = null;
117 -
 117+
118118 /** All identifiers of all indexes (dbrole -> IndexId) */
119119 protected static Hashtable<String,IndexId> indexIdPool = new Hashtable<String,IndexId>();
120 -
 120+
121121 protected static GlobalConfiguration instance = null;
122 -
 122+
123123 /** All the lang codes we encountered, used for "smart interwiki" */
124124 protected HashSet<String> smartInterwikiCodes = new HashSet<String>();
125125 protected boolean useSmartInterwiki = false;
126126 protected int maxSearchLimit = 1000;
127 - protected int maxSearchOffset = 1000000;
128 -
 127+ protected int maxSearchOffset = 1000000;
 128+
129129 /** Wether to report warnings and info */
130130 protected static boolean verbose = true;
131 -
 131+
132132 /** Sections in lsearch-config.conf */
133133 protected static enum Section { DATABASE, INDEX, SEARCH, INDEXPATH, NAMESPACE_PREFIX, OAI, DATABASE_GROUP, NAMESPACE_BOOST };
134134
135135
136136 /**
137 - * Use this function to override the hosts IP address which
138 - * is determined automatically when first instance is made
139 - *
 137+ * Use this function to override the hosts IP address which
 138+ * is determined automatically when first instance is made
 139+ *
140140 * @param host IP adress
141141 */
142142 public static void setHost(InetAddress host){
@@ -143,15 +143,15 @@
144144 hostAddr = myHost.getHostAddress();
145145 hostName = myHost.getHostName();
146146 }
147 -
 147+
148148 protected GlobalConfiguration(){
149149 // try to determin this hosts IP address
150150 determineInetAddress();
151 - }
152 -
 151+ }
 152+
153153 /**
154154 * Get singleton instance of this class
155 - *
 155+ *
156156 * @return
157157 */
158158 synchronized public static GlobalConfiguration getInstance() {
@@ -159,12 +159,12 @@
160160 instance = new GlobalConfiguration();
161161 return instance;
162162 }
163 -
 163+
164164 /**
165 - * Try to determine the (non-loopback) IP address of this
166 - * computer. Will work only if there is one attached
 165+ * Try to determine the (non-loopback) IP address of this
 166+ * computer. Will work only if there is one attached
167167 * network interface, otherwise @link setHost() method
168 - * need to be used
 168+ * need to be used
169169 */
170170 protected void determineInetAddress() {
171171 try {
@@ -172,27 +172,27 @@
173173 } catch (UnknownHostException e1) {
174174 System.out.println("Error resolving local hostname. Make sure that hostname is setup correctly.");
175175 e1.printStackTrace();
176 - }
 176+ }
177177 }
178 -
 178+
179179 /** Return true if host is the current host (IP or hostname) */
180180 public boolean isLocalhost(String host){
181181 if(host == null)
182182 return false;
183183 return host.equalsIgnoreCase(hostAddr) || host.equalsIgnoreCase(hostName);
184184 }
185 -
 185+
186186 /** Secure add-to-list, check if the index definition exists, and avoid duplicates in list */
187187 protected void addToList(ArrayList<String> list, String str){
188188 if(!list.contains(str)){
189189 String[] parts = str.split("\\.");
190 - if(database.containsKey(parts[0]) &&
 190+ if(database.containsKey(parts[0]) &&
191191 (parts.length==1 || (parts.length==2 && database.get(parts[0]).containsKey(parts[1])))){
192192 list.add(str);
193193 }
194194 }
195195 }
196 -
 196+
197197 @SuppressWarnings("unchecked")
198198 protected void checkSubdivisions(String dbname, String type){
199199 Hashtable<String,Hashtable<String,String>> typeParams = database.get(dbname);
@@ -204,11 +204,11 @@
205205 typeParams.put(type+".sub"+i,(Hashtable<String, String>) params.clone());
206206 }
207207 }
208 -
209 - /**
 208+
 209+ /**
210210 * Check if the setup is correct,i.e. there is indexer and searcher
211211 * for each db ...
212 - *
 212+ *
213213 * @return returns true if the setup is OK
214214 */
215215 public boolean checkIntegrity(){
@@ -260,7 +260,7 @@
261261 // expand logical index names on searchers
262262 for(String host : search.keySet()){
263263 ArrayList<String> hostsearch = search.get(host);
264 - for(String dbname : hostsearch.toArray(new String[]{})){
 264+ for(String dbname : hostsearch.toArray(new String[]{})){
265265 Hashtable<String, Hashtable<String,String>> types = database.get(dbname);
266266 if(types != null){ // if not null, dbrole is dbname
267267 if(types.containsKey("mainsplit")){
@@ -298,7 +298,7 @@
299299 } else if(typeid.matches("nspart[1-9][0-9]*")){
300300 type = "nssplit";
301301 dbrole = dbname + "." + typeid;
302 - } else if(typeid.equals("spell") || typeid.equals("links") || typeid.equals("related")
 302+ } else if(typeid.equals("spell") || typeid.equals("links") || typeid.equals("related")
303303 || typeid.equals("prefix") || typeid.equals("title_ngram")){
304304 type = typeid;
305305 dbrole = dbname + "." + typeid;
@@ -307,7 +307,7 @@
308308 dbrole = dbname + "." + typeid;
309309 } else
310310 continue; // uknown type, skip
311 -
 311+
312312 if(indexLocation.get(dbrole) == null){
313313 // fill-in with host that indexes dbname
314314 if(indexLocation.get(dbname) != null ){
@@ -319,13 +319,13 @@
320320 return false;
321321 } */
322322 // add same index location for highlight .hl index
323 - /* String host = indexLocation.get(dbrole);
 323+ /* String host = indexLocation.get(dbrole);
324324 indexLocation.put(dbrole+".hl",host);
325325 index.get(host).add(dbrole+".hl"); */
326 -
 326+
327327 }
328 - /* boolean searched = (getSearchHosts(dbrole).size() != 0);
329 - if(!searched && !(typeid.equals("mainsplit") || typeid.equals("split")
 328+ /* boolean searched = (getSearchHosts(dbrole).size() != 0);
 329+ if(!searched && !(typeid.equals("mainsplit") || typeid.equals("split")
330330 || typeid.equals("nssplit") || typeid.equals("links") || typeid.equals("related") || typeid.equals("title_ngram"))){
331331 if(verbose)
332332 System.out.println("WARNING: in Global Configuration: index "+dbrole+" is not searched by any host.");
@@ -334,10 +334,10 @@
335335 }
336336 return true;
337337 }
338 -
339 - /**
 338+
 339+ /**
340340 * Read a config file from a given URL
341 - *
 341+ *
342342 * @param url
343343 * @throws IOException
344344 */
@@ -351,15 +351,15 @@
352352 } catch (IOException e) {
353353 System.out.println("I/O Error in opening or reading global config at url "+url);
354354 throw e;
355 - }
 355+ }
356356 }
357357
358358 /**
359359 * Prepare hashtables to load data into them
360 - *
 360+ *
361361 */
362362 protected void init(){
363 - database = new Hashtable<String, Hashtable<String, Hashtable<String, String>>>();
 363+ database = new Hashtable<String, Hashtable<String, Hashtable<String, String>>>();
364364 searchGroup = new Hashtable<Integer,Hashtable<String, ArrayList<String>>>();
365365 search = new Hashtable<String, ArrayList<String>>();
366366 searchWildcard = new Hashtable<String, ArrayList<Pattern>>();
@@ -373,46 +373,46 @@
374374 oaiRepo = new Hashtable<String,String>();
375375 indexWildcard = new Hashtable<Pattern,String>();
376376 }
377 -
 377+
378378 protected String[] getArrayProperty(String name){
379379 String s = globalProperties.getProperty(name);
380380 if (s != null)
381381 return s.split(" ");
382382 return null;
383383 }
384 -
385 - /**
 384+
 385+ /**
386386 * Reads a config file from a bufferedreader, will
387387 * close the reader when done.
388 - *
 388+ *
389389 * @param in opened reader
390390 * @throws IOException
391391 */
392392 protected void read(BufferedReader in, String indexpath) throws IOException{
393 - String line="";
394 - Section section = null;
 393+ String line="";
 394+ Section section = null;
395395 Pattern roleRegexp = Pattern.compile("\\((.*?)\\)");
396396 int lineNum = 0;
397 - // sections
 397+ // sections
398398 int searchGroupNum = -1;
399 -
 399+
400400 init();
401401 this.indexPath = indexpath;
402 -
 402+
403403 while((line = in.readLine()) != null){
404404 lineNum ++;
405405 // strip comments
406406 line = line.replaceFirst("#.*","");
407 -
 407+
408408 if(line.trim().equals(""))
409 - continue;
410 -
 409+ continue;
 410+
411411 line = preprocessLine(line);
412 -
 412+
413413 if(line.startsWith("[") && line.length()>2 && !Character.isDigit(line.charAt(1))){ // section
414414 int last = line.indexOf("]");
415415 String s = line.substring(1,last);
416 -
 416+
417417 if(s.equalsIgnoreCase("properties")){
418418 globalProperties = new Properties();
419419 StringBuilder prop = new StringBuilder(line+"\n");
@@ -421,7 +421,7 @@
422422 break;
423423 prop.append(line);
424424 prop.append("\n");
425 - }
 425+ }
426426 globalProperties.load(new ByteArrayInputStream(prop.toString().getBytes("utf-8")));
427427 // get some predifined global properties
428428 this.databaseSuffixes = getArrayProperty("Database.suffix");
@@ -445,7 +445,7 @@
446446 last = line.indexOf("]");
447447 s = line.substring(1,last);
448448 }
449 -
 449+
450450 if(s.equalsIgnoreCase("database"))
451451 section = Section.DATABASE;
452452 else if(s.equalsIgnoreCase("index"))
@@ -468,33 +468,33 @@
469469 System.out.println("Ignoring a line up to first section heading...");
470470 } else if(section == Section.DATABASE || section == Section.DATABASE_GROUP){
471471 String[] parts = splitBySemicolon(line,lineNum);
472 - if(parts == null) continue;
 472+ if(parts == null) continue;
473473 String[] dbs = parts[0].split(",");
474474 for(int i=0;i<dbs.length;i++) dbs[i]=dbs[i].trim();
475 -
476 - // syntax: dbname : (role,params), (role2,params2)
 475+
 476+ // syntax: dbname : (role,params), (role2,params2)
477477 Matcher matcher = roleRegexp.matcher(parts[1]);
478478 while(matcher.find()){
479 - processDBRole(dbs,matcher.group(1));
 479+ processDBRole(dbs,matcher.group(1));
480480 }
481481 } else if(section == Section.SEARCH){
482482 String[] parts = splitBySemicolon(line,lineNum);
483483 if(parts == null) continue;
484484 String host = parts[0].trim();
485 -
 485+
486486 processSearchRoles( host, parts[1], searchGroupNum);
487487 } else if(section == Section.INDEX){
488488 String[] parts = splitBySemicolon(line,lineNum);
489489 if(parts == null) continue;
490490 String host = parts[0].trim();
491 -
 491+
492492 processIndexRoles(host,parts[1]);
493493 } else if(section == Section.INDEXPATH){
494494 String[] parts = splitBySemicolon(line,lineNum);
495495 if(parts == null) continue;
496496 String host = parts[0].trim();
497497 String path = parts[1].trim();
498 -
 498+
499499 if(indexRsyncPath.get(host)!=null && verbose)
500500 System.out.println("Warning: repeated path definition for host "+host+" on line "+lineNum+", overwriting old.");
501501 indexRsyncPath.put(host,path);
@@ -502,24 +502,24 @@
503503 String[] parts = splitBySemicolon(line,lineNum);
504504 if(parts == null) continue;
505505 String prefix = parts[0].trim();
506 - String filter = parts[1].trim();
507 -
 506+ String filter = parts[1].trim();
 507+
508508 if(filter.equalsIgnoreCase("<all>"))
509509 namespacePrefixAll = prefix;
510510 else
511 - namespacePrefix.put(prefix,new NamespaceFilter(filter));
 511+ namespacePrefix.put(prefix,new NamespaceFilter(filter));
512512 } else if(section == Section.OAI){
513513 String[] parts = splitBySemicolon(line,lineNum);
514514 if(parts == null) continue;
515515 String suffix = parts[0].trim();
516 - String url = parts[1].trim();
517 -
 516+ String url = parts[1].trim();
 517+
518518 oaiRepo.put(suffix,url);
519519 } else if(section == Section.NAMESPACE_BOOST){
520520 String[] parts = splitBySemicolon(line,lineNum);
521 - if(parts == null) continue;
 521+ if(parts == null) continue;
522522 String[] dbs = parts[0].split(",");
523 - for(int i=0;i<dbs.length;i++)
 523+ for(int i=0;i<dbs.length;i++)
524524 dbs[i]=dbs[i].trim();
525525 processNamespaceBoost(dbs,parts[1]);
526526 }
@@ -528,7 +528,7 @@
529529 in.close();
530530 System.exit(1);
531531 }
532 -
 532+
533533 makeIndexIdPool();
534534 if(useSmartInterwiki){
535535 for(IndexId iid : indexIdPool.values()){
@@ -538,17 +538,17 @@
539539 for(String suffix : databaseSuffixes) {
540540 if(dbname.endsWith(suffix))
541541 smartInterwikiCodes.add(dbname.substring(0, dbname.length() - suffix.length()).toLowerCase());
542 - }
 542+ }
543543 }
544544 }
545545 }
546546 in.close();
547547 }
548 -
 548+
549549 protected void processNamespaceBoost(String[] dbs, String def) {
550550 Pattern mapRegexp = Pattern.compile("\\((.*?)\\)");
551551 HashMap<Integer,Float> map = new HashMap<Integer,Float>();
552 -
 552+
553553 Matcher matcher = mapRegexp.matcher(def);
554554 while(matcher.find()){
555555 String[] parts = matcher.group(1).split(",");
@@ -565,7 +565,7 @@
566566 * A bit hackish: read InitialiseSettings which we know have a certain
567567 * format to avoid maintaining two copies for config files (one in php
568568 * other for lsearch in global conf)
569 - *
 569+ *
570570 * @param initset
571571 */
572572 protected void initializeWmfSettings(String initset) {
@@ -580,7 +580,7 @@
581581 Localization.readDBLocalizations(text);
582582 } catch (Exception e) {
583583 System.out.println("Error reading InitialiseSettings.php from url "+initset+" : "+e.getMessage());
584 - }
 584+ }
585585 }
586586
587587 /** Get all hosts which search this inxedId (dbrole) */
@@ -608,7 +608,7 @@
609609 }
610610 return searchHosts;
611611 }
612 -
 612+
613613 /** Get all hosts that search this dbname within current hosts search groups */
614614 protected HashSet<String> getMySearchHosts(String dbname, String dbrole){
615615 HashSet<String> searchHosts = new HashSet<String>();
@@ -617,16 +617,16 @@
618618 group = databaseHostGroup.get(dbname+"@"+hostName);
619619 if(group == null)
620620 return searchHosts;
621 -
 621+
622622 for(String host : searchGroup.get(group).keySet()){
623623 if(search.get(host).contains(dbrole))
624624 searchHosts.add(host);
625625 }
626626 return searchHosts;
627627 }
628 -
 628+
629629 /**
630 - * Call after all data is read from config file, make indexIds for all the
 630+ * Call after all data is read from config file, make indexIds for all the
631631 * indexes in the system
632632 *
633633 */
@@ -637,7 +637,7 @@
638638 localIndexes = new ArrayList<String>();
639639 if(index.get(hostName)!=null)
640640 localIndexes.addAll(index.get(hostName)); */
641 -
 641+
642642 // dbname -> ts index, e.g. enwiki -> en-titles.tspart1
643643 HashMap<String,String> dbnameTitlesPart = new HashMap<String,String>();
644644 // dbname -> matched suffix, e.f. enwiki -> wiki
@@ -670,7 +670,7 @@
671671 // process the general pattern
672672 if(database.containsKey("<all>")){
673673 Hashtable<String,Hashtable<String,String>> allKeyword = database.get("<all>");
674 - splitFactor = Integer.parseInt(allKeyword.get("titles_by_suffix").get("number"));
 674+ splitFactor = Integer.parseInt(allKeyword.get("titles_by_suffix").get("number"));
675675 for(int i=1;i<=splitFactor;i++){
676676 String part = "tspart"+i;
677677 suffixIwMap.putAll(allKeyword.get(part));
@@ -705,17 +705,17 @@
706706 code.put("code",dbLang.get(db));
707707 p.put("language",code);
708708 }
709 - database.put(db+"-titles",p);
 709+ database.put(db+"-titles",p);
710710 }
711711 }
712 -
 712+
713713 // iterate over all dbs and types
714714 for(String dbname : database.keySet()){
715715 if(dbname.startsWith("<"))
716716 continue; // keyword special case
717717 for(String typeid : database.get(dbname).keySet()){
718718 String type = "";
719 - String dbrole = "";
 719+ String dbrole = "";
720720 if(typeid.equals("single") || typeid.equals("mainsplit") || typeid.equals("split") || typeid.equals("nssplit")){
721721 type = typeid;
722722 dbrole = dbname;
@@ -742,20 +742,20 @@
743743 dbrole = dbname + "." + typeid;
744744 } else
745745 continue; // uknown type, skip
746 -
 746+
747747 HashSet<String> searchHosts = getSearchHosts(dbrole);
748748 // FIXME: grouping is largely broken and not really that useful
749749 // HashSet<String> mySearchHosts = getMySearchHosts(dbname,dbrole);
750750 HashSet<String> mySearchHosts = searchHosts;
751751 boolean mySearch = searchHosts.contains(hostAddr) || searchHosts.contains(hostName);
752752 String indexHost = getIndexHost(dbrole,dbname);
753 - boolean myIndex = isMyHost(indexHost);
 753+ boolean myIndex = isMyHost(indexHost);
754754 Hashtable<String,String> typeidParams = database.get(dbname).get(typeid);
755755 boolean isSubdivided = false;
756756 if(typeidParams != null && typeidParams.containsKey("subdivisions")){
757757 isSubdivided = true;
758758 }
759 -
 759+
760760 String rsyncIndexPath = "/";
761761 // if the index is on the local computer search for it
762762 // using both ip address and host name
@@ -764,14 +764,14 @@
765765 if(rsyncIndexPath == null)
766766 rsyncIndexPath = indexRsyncPath.get(hostName);
767767 if(rsyncIndexPath == null)
768 - rsyncIndexPath = indexRsyncPath.get("<default>");
 768+ rsyncIndexPath = indexRsyncPath.get("<default>");
769769 } else{
770770 rsyncIndexPath = indexRsyncPath.get(indexHost);
771771 if(rsyncIndexPath == null)
772772 rsyncIndexPath = indexRsyncPath.get("<default>");
773773 }
774774 String oairepo = getOAIRepo(dbname);
775 -
 775+
776776 IndexId iid = new IndexId(dbrole,
777777 type,
778778 indexHost,
@@ -789,8 +789,8 @@
790790 dbnameSuffix.get(dbname),
791791 getSuffixToDBMap(dbrole,dbnameTitlesPart,dbnameSuffix));
792792 indexIdPool.put(dbrole,iid);
793 -
794 - // add precursor indexes
 793+
 794+ // add precursor indexes
795795 if(type.equals("spell") || type.equals("prefix")){
796796 iid = new IndexId(dbrole+".pre",
797797 "precursor",
@@ -811,7 +811,7 @@
812812 indexIdPool.put(dbrole+".pre",iid);
813813 }
814814 // add highlight indexes
815 - if(type.equals("single") || type.equals("mainsplit") || type.equals("split")
 815+ if(type.equals("single") || type.equals("mainsplit") || type.equals("split")
816816 || type.equals("nssplit") || type.equals("subdivided")){
817817 dbrole+=".hl";
818818 searchHosts = getSearchHosts(dbrole);
@@ -837,15 +837,15 @@
838838 getSuffixToDBMap(dbrole,dbnameTitlesPart,dbnameSuffix));
839839 indexIdPool.put(dbrole,iid);
840840 }
841 -
 841+
842842 }
843843 if(indexIdPool.get(dbname).isNssplit()){
844844 indexIdPool.get(dbname).rebuildNsMap(indexIdPool);
845845 indexIdPool.get(dbname+".hl").rebuildNsMap(indexIdPool);
846846 }
847 - }
 847+ }
848848 }
849 -
 849+
850850 private String getIndexHost(String dbrole, String dbname){
851851 String indexHost = indexLocation.get(dbrole);
852852 if(indexHost == null)
@@ -880,7 +880,7 @@
881881 return map;
882882 }
883883
884 - /**
 884+ /**
885885 * Return the IndexId object given it's id string representation
886886 * @param dbrole
887887 * @return
@@ -888,11 +888,11 @@
889889 static public IndexId getIndexId(String dbrole){
890890 return indexIdPool.get(dbrole);
891891 }
892 -
 892+
893893 /**
894894 * Substitute {url} with content of url
895895 * where url points to list of db's one in each line
896 - *
 896+ *
897897 * @param line
898898 * @return processed line
899899 */
@@ -918,9 +918,9 @@
919919 text.append(l);
920920 }
921921 in.close();
922 -
 922+
923923 String dbs = text.toString();
924 -
 924+
925925 matcher.appendReplacement(replaced,dbs);
926926 } catch (MalformedURLException e) {
927927 System.out.println("Error in global config file: URL "+matcher.group(1)+" is a malfomed URL");
@@ -929,7 +929,7 @@
930930 }
931931 }
932932 matcher.appendTail(replaced);
933 -
 933+
934934 return replaced.toString();
935935
936936 }
@@ -942,7 +942,7 @@
943943 }
944944 return parts;
945945 }
946 -
 946+
947947 /**
948948 * Process a list: db.role, db1.role2... put this list into
949949 * index Hashtable, and make a reverse Hashtable indexLocation
@@ -951,13 +951,13 @@
952952 */
953953 protected void processIndexRoles(String host, String roles){
954954 String[] dbroles = roles.split("[, ]+");
955 -
 955+
956956 ArrayList<String> hostroles = index.get(host);
957957 if(hostroles == null){
958958 hostroles = new ArrayList<String>();
959959 index.put(host,hostroles);
960960 }
961 - for(String dbrole : dbroles){
 961+ for(String dbrole : dbroles){
962962 dbrole = dbrole.trim();
963963 if(dbrole.length()==0)
964964 continue;
@@ -970,12 +970,12 @@
971971 if(dbrole.contains("*"))
972972 indexWildcard.put(StringUtils.makeRegexp(dbrole),host);
973973 else
974 - indexLocation.put(dbrole,host);
 974+ indexLocation.put(dbrole,host);
975975 }
976976 }
977 -
 977+
978978 /**
979 - * Process a list: db.role, db1.role2... and put them into
 979+ * Process a list: db.role, db1.role2... and put them into
980980 * search and searchGroup hashtables
981981 * @param host
982982 * @param roles
@@ -983,19 +983,19 @@
984984 */
985985 protected void processSearchRoles(String host, String roles, int groupNum){
986986 String[] dbroles = roles.split("[, ]+");
987 -
 987+
988988 // the cummulative search hastable
989989 ArrayList<String> hostroles = search.get(host);
990990 if(hostroles == null){
991991 hostroles = new ArrayList<String>();
992992 search.put(host,hostroles);
993993 }
994 -
995 - // wildcards that if match will
 994+
 995+ // wildcards that if match will
996996 ArrayList<Pattern> hostwildcards = searchWildcard.get(host);
997997 if(hostwildcards == null)
998998 searchWildcard.put(host,hostwildcards = new ArrayList<Pattern>());
999 -
 999+
10001000 // process groups
10011001 Integer grp = new Integer(groupNum);
10021002 Hashtable<String,ArrayList<String>> grouphosts = searchGroup.get(grp);
@@ -1003,13 +1003,13 @@
10041004 grouphosts = new Hashtable<String,ArrayList<String>>();
10051005 searchGroup.put(grp,grouphosts);
10061006 }
1007 -
 1007+
10081008 ArrayList<String> grouproles = grouphosts.get(host);
10091009 if(grouproles == null){
10101010 grouproles = new ArrayList<String>();
10111011 grouphosts.put(host,grouproles);
10121012 }
1013 -
 1013+
10141014 // add to both lists
10151015 for(String dbrole : dbroles){
10161016 dbrole = dbrole.trim();
@@ -1033,24 +1033,24 @@
10341034 grouproles.add(dbrole);
10351035 }
10361036 }
1037 -
 1037+
10381038 /**
10391039 * Process roles in format type,param1,param2... and put
10401040 * them into the database Hashtable
1041 - *
 1041+ *
10421042 * @param db name of the db to which the role belongs
10431043 * @param role role string in above format
10441044 */
10451045 protected void processDBRole(String[] dbs, String role){
10461046 String[] tokens = role.split(",");
10471047 String type = tokens[0].trim().toLowerCase();
1048 -
 1048+
10491049 Hashtable<String,Hashtable<String,String>> dbroles = new Hashtable<String,Hashtable<String,String>>();
10501050 Hashtable<String,String> params = new Hashtable<String,String>();
1051 -
 1051+
10521052 if(type.equals("single") || type.equals("mainpart") ||
1053 - type.equals("restpart") || type.matches("part[1-9][0-9]*")){
1054 -
 1053+ type.equals("restpart") || type.matches("part[1-9][0-9]*")){
 1054+
10551055 // all params are optional, if absent default will be used
10561056 if(tokens.length>1){
10571057 String token = tokens[1].trim().toLowerCase();
@@ -1067,12 +1067,12 @@
10681068 params.put("maxBufDocs", tokens[3]);
10691069 if(tokens.length>4)
10701070 params.put("subdivisions", tokens[4]);
1071 -
 1071+
10721072 if(tokens.length>5 && verbose)
10731073 System.out.println("Unrecognized database parameters in ("+role+")");
1074 -
 1074+
10751075 dbroles.put(type,params);
1076 -
 1076+
10771077 } else if(type.equals("mainsplit")){
10781078 // currently no params
10791079 dbroles.put(type,params);
@@ -1089,33 +1089,33 @@
10901090 // langauge is optional, or maybe it shouldn't be
10911091 if(tokens.length>1)
10921092 params.put("code",tokens[1]);
1093 -
 1093+
10941094 if(tokens.length>2 && verbose)
10951095 System.out.println("Unrecognized language parameters in ("+role+")");
1096 -
 1096+
10971097 dbroles.put(type,params);
1098 -
 1098+
10991099 } else if(type.startsWith("warmup")){
11001100 // number of warmup queries
11011101 if(tokens.length>1)
11021102 params.put("count",tokens[1]);
1103 -
 1103+
11041104 if(tokens.length>2 && verbose)
11051105 System.out.println("Unrecognized warmup parameters in ("+role+")");
1106 -
 1106+
11071107 dbroles.put(type,params);
1108 -
 1108+
11091109 } else if(type.matches("nspart[1-9][0-9]*")){
11101110 // [0,1,2] syntax gets split up in first split, retokenize
11111111 String ns = role.substring(role.indexOf(",")+1,role.lastIndexOf("]")+1).trim();
11121112 tokens = role.substring(role.lastIndexOf("]")+1).split(",");
1113 - // definition of namespaces, e.g. [0,1,2]
 1113+ // definition of namespaces, e.g. [0,1,2]
11141114 if(ns.length() > 2 && ns.startsWith("[") && ns.endsWith("]"))
11151115 ns = ns.substring(1,ns.length()-1);
11161116 else
11171117 ns = "<default>";
11181118 params.put("namespaces",ns);
1119 -
 1119+
11201120 // all params are optional, if absent default will be used
11211121 if(tokens.length>1){
11221122 String token = tokens[1].trim().toLowerCase();
@@ -1132,35 +1132,35 @@
11331133 params.put("maxBufDocs", tokens[3]);
11341134 if(tokens.length>4)
11351135 params.put("subdivisions", tokens[4]);
1136 -
 1136+
11371137 if(tokens.length>5 && verbose)
11381138 System.out.println("Unrecognized database parameters in ("+role+")");
1139 -
 1139+
11401140 dbroles.put(type,params);
1141 -
1142 - } else if(type.equals("spell")){
 1141+
 1142+ } else if(type.equals("spell")){
11431143 // all params are optional, if absent default will be used
11441144 if(tokens.length>1)
11451145 params.put("wordsMinFreq",tokens[1]);
11461146 if(tokens.length>2)
11471147 params.put("phrasesMinFreq",tokens[2]);
1148 -
 1148+
11491149 if(tokens.length>3 && verbose)
11501150 System.out.println("Unrecognized suggest parameters in ("+role+")");
1151 -
 1151+
11521152 dbroles.put(type,params);
11531153 } else if(type.equals("title_ngram")){
11541154 // no params
11551155 if(tokens.length>1 && verbose)
11561156 System.out.println("Unrecognized title_ngram parameters in ("+role+")");
1157 -
 1157+
11581158 dbroles.put(type,params);
11591159 } else if(type.equals("prefix")){
11601160 // no params
11611161 if(tokens.length>1 && verbose)
11621162 System.out.println("Unrecognized prefix parameters in ("+role+")");
1163 -
1164 - dbroles.put(type,params);
 1163+
 1164+ dbroles.put(type,params);
11651165 } else if(type.equals("titles_by_suffix") || type.equals("titles_grouped")){
11661166 if(tokens.length>1) // number of segments
11671167 params.put("number",tokens[1]);
@@ -1182,20 +1182,20 @@
11831183 else
11841184 iw = suffix;
11851185 params.put(suffix,iw);
1186 - }
1187 - dbroles.put(type,params);
 1186+ }
 1187+ dbroles.put(type,params);
11881188 } else if(verbose){
11891189 System.out.println("Warning: Unrecognized role \""+role+"\".Ignoring.");
11901190 }
1191 -
1192 -
 1191+
 1192+
11931193 // add dbroles to all given dbs
11941194 for(int i=0;i<dbs.length;i++){
11951195 String db = dbs[i];
1196 -
 1196+
11971197 Hashtable<String, Hashtable<String, String>> dbr = database.get(db);
11981198 if(dbr == null){
1199 - dbr = new Hashtable<String, Hashtable<String, String>>();
 1199+ dbr = new Hashtable<String, Hashtable<String, String>>();
12001200 database.put(db,dbr);
12011201 }
12021202 if(type.equals("split") || type.equals("mainsplit") || type.equals("single") || type.equals("nssplit")){
@@ -1215,7 +1215,7 @@
12161216 * Returns if host should do some indexing
12171217 * @return true if this node is indexer
12181218 */
1219 - public boolean isIndexer(){
 1219+ public boolean isIndexer(){
12201220 return index.get(hostAddr)!=null || index.get(hostName)!=null;
12211221 }
12221222
@@ -1226,20 +1226,20 @@
12271227 public boolean isSearcher() {
12281228 return search.get(hostAddr)!=null || search.get(hostName)!=null;
12291229 }
1230 -
 1230+
12311231 /**
1232 - * Returns parameters of database, i.e. language, warmup ...
1233 - *
 1232+ * Returns parameters of database, i.e. language, warmup ...
 1233+ *
12341234 * @param dbname
12351235 * @return Hashtable of parameters for dbname.type
12361236 */
12371237 public Hashtable<String,String> getDBParams(String dbname, String type){
12381238 return database.get(dbname).get(type);
12391239 }
1240 -
1241 - /**
1242 - * Get integer parameter for dbname.type
1243 - * Returns defaultValue if the param is not defined
 1240+
 1241+ /**
 1242+ * Get integer parameter for dbname.type
 1243+ * Returns defaultValue if the param is not defined
12441244 */
12451245 public int getIntDBParam(String dbname, String type, String param, int defaultValue){
12461246 Hashtable<String,String> p = database.get(dbname).get(type);
@@ -1249,10 +1249,10 @@
12501250 else
12511251 return Integer.parseInt(val);
12521252 }
1253 -
1254 - /**
1255 - * Get string parameter for dbname.type
1256 - * Returns defaultValue if the param is not defined
 1253+
 1254+ /**
 1255+ * Get string parameter for dbname.type
 1256+ * Returns defaultValue if the param is not defined
12571257 */
12581258 public String getStringDBParam(String dbname, String type, String param, String defaultValue){
12591259 Hashtable<String,String> p = database.get(dbname).get(type);
@@ -1262,11 +1262,11 @@
12631263 else
12641264 return val;
12651265 }
1266 -
 1266+
12671267 /**
12681268 * Look at the logical DB structure ([Database] in global config)
1269 - * and figure out the main index type of database (e.g. single,
1270 - * mainpart, split).
 1269+ * and figure out the main index type of database (e.g. single,
 1270+ * mainpart, split).
12711271 * @param dbname
12721272 * @return lowercased db type
12731273 */
@@ -1276,10 +1276,10 @@
12771277 String type = (String)e.nextElement();
12781278 if(type.equals("single") || type.equals("mainsplit") || type.equals("split"))
12791279 return type;
1280 - }
 1280+ }
12811281 // global configuration consistency error
12821282 System.out.println("Database "+dbname+" does not have a specified type (eg single, mainsplit, split).");
1283 - return "unknown";
 1283+ return "unknown";
12841284 }
12851285
12861286 /**
@@ -1289,7 +1289,7 @@
12901290 public boolean isMyHost(String host) {
12911291 return host.equalsIgnoreCase(hostAddr) || host.equalsIgnoreCase(hostName);
12921292 }
1293 -
 1293+
12941294 /** Get language for a dbname */
12951295 public String getLanguage(String dbname) {
12961296 // first check explicit language paramter in global settings
@@ -1310,40 +1310,40 @@
13111311 for (String suffix : databaseSuffixes) {
13121312 if (dbname.endsWith(suffix))
13131313 return dbname.substring(0, dbname.length() - suffix.length());
1314 - }
 1314+ }
13151315 }
1316 -
 1316+
13171317 return "";
13181318 }
1319 -
 1319+
13201320 /** All indexes that localhost is indexing */
13211321 public HashSet<IndexId> getMyIndex(){
13221322 HashSet<IndexId> ret = new HashSet<IndexId>();
1323 -
 1323+
13241324 for(IndexId iid : indexIdPool.values()){
13251325 if(iid.isMyIndex())
13261326 ret.add(iid);
13271327 }
1328 -
 1328+
13291329 return ret;
13301330 }
1331 -
 1331+
13321332 /** All indexed that localhost is searching */
13331333 public HashSet<IndexId> getMySearch(){
13341334 HashSet<IndexId> ret = new HashSet<IndexId>();
1335 -
 1335+
13361336 for(IndexId iid : indexIdPool.values()){
13371337 if(iid.isMySearch())
13381338 ret.add(iid);
13391339 }
1340 -
 1340+
13411341 return ret;
13421342 }
13431343 /** Get all dbnames that are locally indexed */
13441344 public ArrayList<String> getMyIndexDBnames(){
13451345 HashSet<String> dbnames = new HashSet<String>();
13461346 ArrayList<String> dbnamesSorted = new ArrayList<String>();
1347 -
 1347+
13481348 for(IndexId iid : indexIdPool.values()){
13491349 if(iid.isMyIndex() && !iid.isTitlesBySuffix() && !iid.isSpell())
13501350 dbnames.add(iid.getDBname().toString());
@@ -1352,7 +1352,7 @@
13531353 Collections.sort(dbnamesSorted);
13541354 return dbnamesSorted;
13551355 }
1356 -
 1356+
13571357 /** Get the name of the localhost as it appears in global configuration */
13581358 public String getLocalhost(){
13591359 if(index.get(hostAddr) != null || search.get(hostAddr) != null)
@@ -1370,7 +1370,7 @@
13711371 public String getNamespacePrefixAll() {
13721372 return namespacePrefixAll;
13731373 }
1374 -
 1374+
13751375 /** Check wether dbname has some of the suffixes */
13761376 protected boolean checkSuffix(String[] suffixes, String dbname){
13771377 if(suffixes == null)
@@ -1383,32 +1383,32 @@
13841384 }
13851385 return false;
13861386 }
1387 -
 1387+
13881388 /** If dbname should have additional rank boost */
13891389 public boolean useAdditionalRank(String dbname){
13901390 return checkSuffix(additionalRank,dbname);
13911391 }
1392 -
 1392+
13931393 /** Returns if keyword scoring should be used for this db, using
13941394 * the suffixes from the global configuration
1395 - *
 1395+ *
13961396 * @param dbname
13971397 * @return
13981398 */
13991399 public boolean useKeywordScoring(String dbname){
1400 - return checkSuffix(keywordScoringSuffixes,dbname);
 1400+ return checkSuffix(keywordScoringSuffixes,dbname);
14011401 }
1402 -
 1402+
14031403 /**
1404 - * If this dbname is assigned an exact-case additional index.
1405 - *
 1404+ * If this dbname is assigned an exact-case additional index.
 1405+ *
14061406 * @param dbname
14071407 * @return
14081408 */
14091409 public boolean exactCaseIndex(String dbname){
1410 - return checkSuffix(exactCaseSuffix,dbname);
 1410+ return checkSuffix(exactCaseSuffix,dbname);
14111411 }
1412 -
 1412+
14131413 /** Find (longest) suffix that matches dbname */
14141414 public String findSuffix(String[] suffixes, String dbname){
14151415 if(suffixes == null)
@@ -1438,7 +1438,7 @@
14391439 return ret;
14401440 return null;
14411441 }
1442 -
 1442+
14431443 public AgeScaling getAgeScaling(String dbname){
14441444 String strong = findSuffix(ageScalingStrong,dbname);
14451445 String medium = findSuffix(ageScalingMedium,dbname);
@@ -1452,10 +1452,10 @@
14531453 return AgeScaling.MEDIUM;
14541454 if(w>=s && s>=m)
14551455 return AgeScaling.WEAK;
1456 -
 1456+
14571457 return AgeScaling.NONE;
14581458 }
1459 -
 1459+
14601460 /** Get OAI-repo url for dbname */
14611461 public String getOAIRepo(String dbname){
14621462 String repo = null;
@@ -1474,7 +1474,7 @@
14751475 repo += "/";
14761476 repo += "w/index.php"; // FIXME: we take this as generic path to index.php
14771477 }
1478 -
 1478+
14791479 }
14801480 // get from global config
14811481 if(repo == null){
@@ -1485,13 +1485,13 @@
14861486 repo = oaiRepo.get("<default>");
14871487 }
14881488 if(repo == null)
1489 - return ""; // failed, no url
1490 -
 1489+ return ""; // failed, no url
 1490+
14911491 // process $lang
14921492 String lang = getLanguage(dbname);
14931493 repo = repo.replace("$lang",lang.replace('_','-'));
14941494 repo = repo += "?title=Special:OAIRepository";
1495 -
 1495+
14961496 return repo;
14971497 }
14981498
@@ -1502,7 +1502,7 @@
15031503 public static void setVerbose(boolean verbose) {
15041504 GlobalConfiguration.verbose = verbose;
15051505 }
1506 -
 1506+
15071507 public NamespaceFilter getDefaultNamespace(IndexId iid){
15081508 return getDefaultNamespace(iid.getDBname());
15091509 }
@@ -1515,7 +1515,7 @@
15161516 }
15171517 return new NamespaceFilter(0);
15181518 }
1519 -
 1519+
15201520 public NamespaceFilter getContentNamespaces(IndexId iid){
15211521 return getContentNamespaces(iid.getDBname());
15221522 }
@@ -1528,7 +1528,7 @@
15291529 }
15301530 return new NamespaceFilter(0);
15311531 }
1532 -
 1532+
15331533 public NamespaceFilter getNamespacesWithSubpages(String dbname){
15341534 if(wgNamespacesWithSubpages != null){
15351535 if(wgNamespacesWithSubpages.containsKey(dbname))
@@ -1538,7 +1538,7 @@
15391539 }
15401540 return new NamespaceFilter(2);
15411541 }
1542 -
 1542+
15431543 public ArticleNamespaceScaling getNamespaceScaling(String dbname){
15441544 if(namespaceBoost.containsKey(dbname))
15451545 return namespaceBoost.get(dbname);
@@ -1547,19 +1547,19 @@
15481548 else
15491549 return new ArticleNamespaceScaling(new HashMap<Integer,Float>());
15501550 }
1551 -
 1551+
15521552 public HashSet<String> getSmartInterwikiCodes() {
15531553 return smartInterwikiCodes;
15541554 }
1555 -
 1555+
15561556 public boolean hasCommonsWiki(){
15571557 return commonsWiki != null;
15581558 }
1559 -
 1559+
15601560 public IndexId getCommonsWiki(){
15611561 return IndexId.get(commonsWiki);
15621562 }
1563 -
 1563+
15641564 /** Get all searchers (NOTE: this is kindof slow...) */
15651565 public HashSet<String> getAllSearchHosts(){
15661566 HashSet<String> hosts = new HashSet<String>();
@@ -1577,11 +1577,11 @@
15781578 public int getMaxSearchOffset() {
15791579 return maxSearchOffset;
15801580 }
1581 -
 1581+
15821582 public String getIndexPath(){
15831583 return indexPath;
15841584 }
15851585
1586 -
15871586
 1587+
15881588 }
\ No newline at end of file
Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/search/NamespaceFilter.java
@@ -1,24 +1,24 @@
22 /*
33 * Copyright 2005 Brion Vibber
4 - *
5 - * Permission is hereby granted, free of charge, to any person obtaining a copy
 4+ *
 5+ * Permission is hereby granted, free of charge, to any person obtaining a copy
66 * of this software and associated documentation files (the "Software"), to deal
7 - * in the Software without restriction, including without limitation the rights
8 - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 - * copies of the Software, and to permit persons to whom the Software is
 7+ * in the Software without restriction, including without limitation the rights
 8+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9+ * copies of the Software, and to permit persons to whom the Software is
1010 * furnished to do so, subject to the following conditions:
1111 *
12 - * The above copyright notice and this permission notice shall be included in
 12+ * The above copyright notice and this permission notice shall be included in
1313 * all copies or substantial portions of the Software.
1414 *
15 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 15+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1919 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121 * SOFTWARE.
22 - *
 22+ *
2323 * $Id: NamespaceFilter.java 8398 2005-04-17 06:21:19Z vibber $
2424 */
2525
@@ -34,16 +34,16 @@
3535 * to filter */
3636 public class NamespaceFilter implements Serializable {
3737 private BitSet included;
38 -
 38+
3939 protected void init(){
4040 included = new BitSet(64);
4141 }
42 -
 42+
4343 /** "all" filter */
4444 public NamespaceFilter() {
4545 init();
4646 }
47 -
 47+
4848 /** filter namespaces */
4949 public NamespaceFilter(Collection<Integer> namespaces){
5050 init();
@@ -66,7 +66,7 @@
6767 }
6868 }
6969 }
70 -
 70+
7171 /** Decompose this filter into an array of single-namespace filters, do OR to construct */
7272 public ArrayList<NamespaceFilter> decompose(){
7373 ArrayList<NamespaceFilter> dec = new ArrayList<NamespaceFilter>();
@@ -75,7 +75,7 @@
7676 }
7777 return dec;
7878 }
79 -
 79+
8080 public HashSet<Integer> getNamespaces(){
8181 HashSet<Integer> ret = new HashSet<Integer>();
8282 if(included.cardinality() == 0)
@@ -85,7 +85,7 @@
8686 }
8787 return ret;
8888 }
89 -
 89+
9090 public ArrayList<Integer> getNamespacesOrdered(){
9191 ArrayList<Integer> ret = new ArrayList<Integer>();
9292 if(included.cardinality() == 0)
@@ -95,36 +95,36 @@
9696 }
9797 return ret;
9898 }
99 -
 99+
100100 public boolean filter(String namespace) {
101101 return filter(Integer.parseInt(namespace));
102102 }
103 -
 103+
104104 public boolean filter(int namespace) {
105105 return included.get(namespace);
106106 }
107 -
 107+
108108 /** Set bit for namespace to true */
109109 public void set(int namespace){
110110 included.set(namespace);
111111 }
112 -
 112+
113113 /** Set bit for namespace to false */
114114 public void unset(int namespace){
115115 included.set(namespace,false);
116116 }
117 -
 117+
118118 public boolean contains(int namespace){
119119 if(namespace < 0)
120120 return false;
121121 else
122122 return included.get(namespace);
123123 }
124 -
 124+
125125 public boolean contains(String namespace){
126126 return contains(Integer.parseInt(namespace));
127127 }
128 -
 128+
129129 public BitSet getIncluded() {
130130 return included;
131131 }
@@ -132,16 +132,16 @@
133133 public int cardinality(){
134134 return included.cardinality();
135135 }
136 -
 136+
137137 public int getNamespace(){
138 - return included.nextSetBit(0);
 138+ return included.nextSetBit(0);
139139 }
140 -
 140+
141141 /** if empty filter ("all" keyword") */
142142 public boolean isAll(){
143 - return cardinality() == 0;
 143+ return cardinality() == 0;
144144 }
145 -
 145+
146146 @Override
147147 public String toString() {
148148 return included.toString();
@@ -173,5 +173,5 @@
174174 }
175175
176176
177 -
 177+
178178 }
Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java
@@ -13,11 +13,11 @@
1414 import org.wikimedia.lsearch.config.Configuration;
1515 import org.wikimedia.lsearch.index.IndexThread;
1616 /**
17 - * Implements a simplistic unicode decomposer. By default will use
 17+ * Implements a simplistic unicode decomposer. By default will use
1818 * unicode data from lib/UnicodeData.txt. The decomposer attempts
1919 * to decompose every character into compatible letters, for instance
2020 * š will be decomposed to s. Wile &#64257 will be decomposed into f and i.
21 - *
 21+ *
2222 * @author rainman
2323 *
2424 */
@@ -33,13 +33,13 @@
3434 if(len<buffer.length)
3535 buffer[len++] = ch;
3636 }
37 -
 37+
3838 }
3939 static org.apache.log4j.Logger log = Logger.getLogger(UnicodeDecomposer.class);
4040 final protected static char[][] decomposition = new char[65536][];
4141 final protected static boolean[] combining = new boolean[65536];
4242 protected static UnicodeDecomposer instance = null;
43 -
 43+
4444 /**
4545 * Get decomposing <b>letter</b> characters
4646 * @param ch
@@ -48,17 +48,17 @@
4949 public char[] decompose(char ch){
5050 return decomposition[ch];
5151 }
52 -
 52+
5353 protected UnicodeDecomposer(String resource){
5454 initFromResource(resource);
5555 log.debug("Loaded unicode decomposer");
5656 }
57 -
58 -
 57+
 58+
5959 public boolean isCombiningChar(char ch){
6060 return combining[ch];
6161 }
62 -
 62+
6363 /**
6464 * Get singleton instance of the Unicode decomposer class.
6565 * Loads lib/UnicodeData.txt on first call
@@ -68,10 +68,10 @@
6969 if(instance == null){
7070 instance = new UnicodeDecomposer("/UnicodeData.txt");
7171 }
72 -
 72+
7373 return instance;
7474 }
75 -
 75+
7676 protected void initFromResource(String resource){
7777 BitSet letters = new BitSet(65536);
7878 try {
@@ -89,30 +89,30 @@
9090 continue; // ignore any additional chars
9191 if(parts[2].charAt(0) == 'L')
9292 letters.set(chVal);
93 -
 93+
9494 if(parts[2].charAt(0) == 'M')
9595 combining[chVal] = true;
9696 else
9797 combining[chVal] = false;
9898 }
9999 in.close();
100 -
 100+
101101 // add some exception requested by users
102102 // yiddish stuffs
103103 combining[0x05B7] = true;
104104 combining[0x05B8] = true;
105105 combining[0x05BC] = true;
106106 combining[0x05BF] = true;
107 -
 107+
108108 // decomposition table
109109 char[][] table = new char[65536][];
110 -
 110+
111111 // default for all chars: no decomposition
112112 for(int ich = 0; ich <= 0xFFFF; ich++){
113113 decomposition[ich]=null;
114114 table[ich]=null;
115115 }
116 -
 116+
117117 // second pass, make the decomposition table
118118 in = new BufferedReader(new InputStreamReader(UnicodeDecomposer.class.getResourceAsStream(resource)));
119119 while((line = in.readLine()) != null){
@@ -138,32 +138,32 @@
139139 table[ch]= new char[len];
140140 for(i=0;i<len;i++)
141141 table[ch][i] = buf[i];
142 - }
143 - }
 142+ }
 143+ }
144144 }
145 -
 145+
146146 // some decomposition exceptions
147147 // yiddish stuffs
148148 table[0x05F0] = new char[2]; // HEBREW LIGATURE YIDDISH DOUBLE VAV
149149 table[0x05F0][0] = 0x05D5;
150150 table[0x05F0][1] = 0x05D5;
151 -
 151+
152152 table[0x05F1] = new char[2]; // HEBREW LIGATURE YIDDISH VAV YOD
153153 table[0x05F1][0] = 0x05D5;
154154 table[0x05F1][1] = 0x05D9;
155 -
 155+
156156 table[0x05F2] = new char[2]; // HEBREW LIGATURE YIDDISH DOUBLE YOD
157157 table[0x05F2][0] = 0x05D9;
158158 table[0x05F2][1] = 0x05D9;
159 -
 159+
160160 table[0xFB1F] = new char[2]; // HEBREW LIGATURE YIDDISH YOD YOD PATAH
161161 table[0xFB1F][0] = 0x05D9;
162162 table[0xFB1F][1] = 0x05D9;
163 -
 163+
164164 table[0xFB1D] = new char[1]; // HEBREW LETTER YOD WITH HIRIQ
165165 table[0xFB1D][0] = 0x05D9;
166 -
167 -
 166+
 167+
168168 // using decomposition table recursively decompose characters
169169 for(int ich = 0; ich <= 0xFFFF; ich++){
170170 if(table[ich]==null)
@@ -174,7 +174,7 @@
175175 decomposition[ich]= new char[buffer.len];
176176 for(i=0;i<buffer.len;i++)
177177 decomposition[ich][i] = buffer.buffer[i];
178 - }
 178+ }
179179 }
180180 in.close();
181181 } catch (IOException e) {
@@ -188,7 +188,7 @@
189189
190190 /**
191191 * Depth-first recursion, gradually decompose characters (if it has many diacritics)
192 - *
 192+ *
193193 * @param buf - buffer where to write resulting decompositions
194194 * @param table - mapping char -> decomposing letters
195195 * @param letters - bitset of letter characters

Status & tagging log