Index: trunk/lucene-search-2/test/org/wikimedia/lsearch/config/GlobalConfigurationTest.java |
— | — | @@ -44,7 +44,7 @@ |
45 | 45 | |
46 | 46 | public void testReadURL(){ |
47 | 47 | // database |
48 | | - Hashtable database = global.database; |
| 48 | + Hashtable database = global.database; |
49 | 49 | Hashtable roles = (Hashtable) database.get("entest"); |
50 | 50 | assertNotNull(roles.get("mainsplit")); |
51 | 51 | assertNotNull(roles.get("mainpart")); |
— | — | @@ -67,7 +67,7 @@ |
68 | 68 | |
69 | 69 | // search |
70 | 70 | Hashtable search = global.search; |
71 | | - ArrayList sr = (ArrayList) search.get("192.168.0.2"); |
| 71 | + ArrayList sr = (ArrayList) search.get("192.168.0.2"); |
72 | 72 | |
73 | 73 | String[] ssr = (String[]) sr.toArray(new String [] {} ); |
74 | 74 | |
— | — | @@ -82,12 +82,12 @@ |
83 | 83 | Hashtable<String,ArrayList<String>> g0 = sg.get(new Integer(0)); |
84 | 84 | assertEquals("{192.168.0.5=[entest.mainpart, entest.restpart], 192.168.0.2=[entest.mainpart]}",g0.toString()); |
85 | 85 | Hashtable<String,ArrayList<String>> g1 = sg.get(new Integer(1)); |
86 | | - assertEquals("{192.168.0.6=[frtest.part3, detest], 192.168.0.4=[frtest.part1, frtest.part2]}",g1.toString()); |
| 86 | + assertEquals("{192.168.0.6=[frtest.part3, detest], 192.168.0.4=[frtest.part1, frtest.part2]}",g1.toString()); |
87 | 87 | |
88 | 88 | |
89 | 89 | // index |
90 | 90 | Hashtable index = global.index; |
91 | | - ArrayList ir = (ArrayList) index.get("192.168.0.5"); |
| 91 | + ArrayList ir = (ArrayList) index.get("192.168.0.5"); |
92 | 92 | |
93 | 93 | String[] sir = (String[]) ir.toArray(new String [] {} ); |
94 | 94 | |
— | — | @@ -115,7 +115,7 @@ |
116 | 116 | String hostName = host.getHostName(); |
117 | 117 | System.out.println("Verify internet IP: "+hostAddr+", and hostname: "+hostName); |
118 | 118 | |
119 | | - // test prefixes |
| 119 | + // test prefixes |
120 | 120 | Hashtable<String,NamespaceFilter> p = global.namespacePrefix; |
121 | 121 | assertEquals(17,p.size()); |
122 | 122 | |
— | — | @@ -298,7 +298,7 @@ |
299 | 299 | //assertTrue(tn.isTitleNgram()); |
300 | 300 | |
301 | 301 | } |
302 | | - |
| 302 | + |
303 | 303 | public void testComplexWildcard(){ |
304 | 304 | Pattern p = Pattern.compile(StringUtils.wildcardToRegexp("(?!(enwiki.|dewiki.|frwiki.|itwiki.|nlwiki|.))*.spell")); |
305 | 305 | assertFalse(p.matcher("enwiki.spell").matches()); |
Index: trunk/lucene-search-2/src/org/apache/lucene/analysis/KStemmer.java |
— | — | @@ -1233,7 +1233,7 @@ |
1234 | 1234 | if (dict_ht == null) |
1235 | 1235 | initializeDictHash(); |
1236 | 1236 | } |
1237 | | - |
| 1237 | + |
1238 | 1238 | public String singular(String term) { |
1239 | 1239 | boolean stemIt; |
1240 | 1240 | String result; |
— | — | @@ -1281,17 +1281,17 @@ |
1282 | 1282 | while (true) { |
1283 | 1283 | entry = wordInDict(); |
1284 | 1284 | if (entry != null) break; |
1285 | | - plural(); |
| 1285 | + plural(); |
1286 | 1286 | break; |
1287 | 1287 | } |
1288 | | - |
| 1288 | + |
1289 | 1289 | if (entry != null) { |
1290 | 1290 | if (entry.root != null) |
1291 | 1291 | result = entry.root; |
1292 | 1292 | else |
1293 | 1293 | result = word.toString(); |
1294 | 1294 | } else result = word.toString(); |
1295 | | - |
| 1295 | + |
1296 | 1296 | /* Enter into cache, at the place not used by the last cache hit */ |
1297 | 1297 | if (stem_ht.size() < MaxCacheSize) { |
1298 | 1298 | /* Add term to cache */ |
Index: trunk/lucene-search-2/src/org/apache/lucene/analysis/KStemFilter.java |
— | — | @@ -102,7 +102,7 @@ |
103 | 103 | return null; |
104 | 104 | else { |
105 | 105 | String s = stemmer.stem(token.termText()); |
106 | | - if (!s.equals(token.termText())) |
| 106 | + if (!s.equals(token.termText())) |
107 | 107 | return new Token(s, token.startOffset, token.endOffset, token.type); |
108 | 108 | return token; |
109 | 109 | } |
Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/storage/MySQLStorage.java |
— | — | @@ -26,8 +26,8 @@ |
27 | 27 | |
28 | 28 | /** |
29 | 29 | * MySQL storage backend |
30 | | - * |
31 | | - * |
| 30 | + * |
| 31 | + * |
32 | 32 | * @author rainman |
33 | 33 | * |
34 | 34 | */ |
— | — | @@ -52,10 +52,10 @@ |
53 | 53 | /** db where to put everything, if we are not using one db per dbname */ |
54 | 54 | protected String defaultDB; |
55 | 55 | /** where sql stuff is, e.g. references_table.sql */ |
56 | | - protected String lib; |
57 | | - /** table name -> create table file */ |
| 56 | + protected String lib; |
| 57 | + /** table name -> create table file */ |
58 | 58 | protected Hashtable<String,String> tableDefs = new Hashtable<String,String>(); |
59 | | - |
| 59 | + |
60 | 60 | protected MySQLStorage() { |
61 | 61 | config = Configuration.open(); |
62 | 62 | try { |
— | — | @@ -63,9 +63,9 @@ |
64 | 64 | } catch (ClassNotFoundException e) { |
65 | 65 | log.error("Cannot load mysql jdbc driver, class not found: "+e.getMessage(),e); |
66 | 66 | } |
67 | | - |
| 67 | + |
68 | 68 | lib = config.getString("Storage","lib","./sql"); |
69 | | - |
| 69 | + |
70 | 70 | master = config.getString("Storage","master","localhost"); |
71 | 71 | String[] ss = config.getArray("Storage","slaves"); |
72 | 72 | if(ss != null){ |
— | — | @@ -85,15 +85,15 @@ |
86 | 86 | slaves = new Hashtable<String,Double>(); |
87 | 87 | for(Entry<String,Double> ed : rawslaves.entrySet()) |
88 | 88 | slaves.put(ed.getKey(),ed.getValue()/sum); |
89 | | - |
| 89 | + |
90 | 90 | } |
91 | | - |
| 91 | + |
92 | 92 | username = config.getString("Storage","username","root"); |
93 | 93 | password = config.getString("Storage","password",""); |
94 | | - |
| 94 | + |
95 | 95 | adminUsername = config.getString("Storage","adminuser",username); |
96 | 96 | adminPassword = config.getString("Storage","adminpass",password); |
97 | | - |
| 97 | + |
98 | 98 | // figure out db configuration |
99 | 99 | separate = config.getBoolean("Storage","useSeparateDBs"); |
100 | 100 | if(!separate){ |
— | — | @@ -102,23 +102,23 @@ |
103 | 103 | log.error("Set Storage.defaultDB in local configuration."); |
104 | 104 | } |
105 | 105 | } |
106 | | - } |
107 | | - |
| 106 | + } |
| 107 | + |
108 | 108 | /** Get connection for writing stuff, i.e. on the master */ |
109 | 109 | protected Connection getReadConnection(String dbname) throws IOException{ |
110 | 110 | return openConnection(dbname,false,false); |
111 | 111 | } |
112 | | - |
| 112 | + |
113 | 113 | /** Get connection for reading of (possibly lagged) stuff, i.e. on slaves (or master if there are no slaves) */ |
114 | 114 | protected Connection getWriteConnection(String dbname) throws IOException{ |
115 | 115 | return openConnection(dbname,true,false); |
116 | 116 | } |
117 | | - |
| 117 | + |
118 | 118 | /** Get administrators connection for creating tables/db, etc.. (on master) */ |
119 | 119 | protected Connection getAdminConnection(String dbname) throws IOException { |
120 | 120 | return openConnection(dbname,true,true); |
121 | 121 | } |
122 | | - |
| 122 | + |
123 | 123 | /** Open connection on the master, or load-balanced on one of the slaves */ |
124 | 124 | protected Connection openConnection(String dbname, boolean onMaster, boolean admin) throws IOException { |
125 | 125 | String host=null; |
— | — | @@ -153,26 +153,26 @@ |
154 | 154 | public String quote(String str){ |
155 | 155 | return "'"+str+"'"; |
156 | 156 | } |
157 | | - |
| 157 | + |
158 | 158 | public String escape(String str){ |
159 | 159 | return str.replace("\\","\\\\").replace("'","\\'"); |
160 | 160 | } |
161 | | - |
| 161 | + |
162 | 162 | public String getTableName(String name, String dbname){ |
163 | 163 | if(!separate) |
164 | 164 | return dbname+"_"+name; |
165 | 165 | else |
166 | 166 | return name; |
167 | 167 | } |
168 | | - |
169 | | - // inherit javadoc |
170 | | - public Collection<CompactArticleLinks> getPageReferences(Collection<Title> titles, String dbname) throws IOException { |
| 168 | + |
| 169 | + // inherit javadoc |
| 170 | + public Collection<CompactArticleLinks> getPageReferences(Collection<Title> titles, String dbname) throws IOException { |
171 | 171 | String sql = "SELECT page_key, page_references from "+getTableName("page",dbname)+" WHERE "; |
172 | 172 | if(titles == null || titles.size()==0) |
173 | 173 | return new ArrayList<CompactArticleLinks>(); |
174 | 174 | else if(titles.size()==1){ |
175 | 175 | sql += "page_key="+quote(escape(titles.iterator().next().getKey())); |
176 | | - } else{ |
| 176 | + } else{ |
177 | 177 | StringBuilder sb = new StringBuilder(sql); |
178 | 178 | sb.append("page_key IN ("); |
179 | 179 | Iterator<Title> it = titles.iterator(); |
— | — | @@ -193,7 +193,7 @@ |
194 | 194 | ResultSet res = stmt.executeQuery(sql); |
195 | 195 | ArrayList<CompactArticleLinks> ret = new ArrayList<CompactArticleLinks>(); |
196 | 196 | while(res.next()){ |
197 | | - ret.add(new CompactArticleLinks(res.getString("page_key"),res.getInt("page_references"))); |
| 197 | + ret.add(new CompactArticleLinks(res.getString("page_key"),res.getInt("page_references"))); |
198 | 198 | } |
199 | 199 | conn.close(); |
200 | 200 | return ret; |
— | — | @@ -205,12 +205,12 @@ |
206 | 206 | |
207 | 207 | // inherit javadoc |
208 | 208 | public void storePageReferences(Collection<CompactArticleLinks> refs, String dbname) throws IOException { |
209 | | - final int maxPerQuery = 10000; |
| 209 | + final int maxPerQuery = 10000; |
210 | 210 | Connection conn = getWriteConnection(dbname); |
211 | | - verifyTable("page",dbname,conn); |
| 211 | + verifyTable("page",dbname,conn); |
212 | 212 | Iterator<CompactArticleLinks> it = refs.iterator(); |
213 | | - // send chunks of maxPerQuery referenace replacements |
214 | | - while(it.hasNext()){ |
| 213 | + // send chunks of maxPerQuery referenace replacements |
| 214 | + while(it.hasNext()){ |
215 | 215 | StringBuilder sb = new StringBuilder("INSERT INTO "+getTableName("page",dbname)+" (page_key,page_references) VALUES "); |
216 | 216 | int count = 0; |
217 | 217 | while(it.hasNext() && count < maxPerQuery){ |
— | — | @@ -228,19 +228,19 @@ |
229 | 229 | try { |
230 | 230 | log.info("Storing "+Math.min(maxPerQuery,count)+" page ranks... "); |
231 | 231 | Statement stmt = conn.createStatement(); |
232 | | - stmt.executeUpdate(sb.toString()); |
233 | | - |
| 232 | + stmt.executeUpdate(sb.toString()); |
| 233 | + |
234 | 234 | } catch (SQLException e) { |
235 | 235 | log.error("Cannot execute replace query "+sb+" : "+e.getMessage()); |
236 | 236 | throw new IOException(e.getMessage()); |
237 | | - } |
| 237 | + } |
238 | 238 | } |
239 | 239 | try { |
240 | 240 | conn.close(); // be sure we close the connection |
241 | 241 | } catch (SQLException e) { |
242 | 242 | } |
243 | 243 | } |
244 | | - |
| 244 | + |
245 | 245 | /** Creates table if it doesn't exist */ |
246 | 246 | protected void verifyTable(String name, String dbname, Connection conn) throws IOException { |
247 | 247 | // verify if table exists |
— | — | @@ -251,19 +251,19 @@ |
252 | 252 | ResultSet res = stmt.executeQuery("SHOW TABLES LIKE '"+table+"';"); |
253 | 253 | if(res.next()) // table exists! |
254 | 254 | return; |
255 | | - |
| 255 | + |
256 | 256 | } catch (SQLException e) { |
257 | 257 | log.error("Cannot verify table "+table+" : "+e.getMessage()); |
258 | 258 | throw new IOException(e.getMessage()); |
259 | 259 | } |
260 | | - |
| 260 | + |
261 | 261 | // fetch table definition |
262 | 262 | String def = tableDefs.get(name); |
263 | 263 | if(def == null){ |
264 | 264 | if(!lib.endsWith(Configuration.PATH_SEP)) |
265 | 265 | lib = lib+Configuration.PATH_SEP; |
266 | | - |
267 | | - BufferedReader file = new BufferedReader(new FileReader(lib+name+"_table.sql")); |
| 266 | + |
| 267 | + BufferedReader file = new BufferedReader(new FileReader(lib+name+"_table.sql")); |
268 | 268 | StringBuilder sb = new StringBuilder(); |
269 | 269 | String line; |
270 | 270 | while((line = file.readLine()) != null){ |
— | — | @@ -276,7 +276,7 @@ |
277 | 277 | if(!separate) |
278 | 278 | tdef = def.replace("/*DBprefix*/",dbname+"_"); |
279 | 279 | else |
280 | | - tdef = def; |
| 280 | + tdef = def; |
281 | 281 | // create |
282 | 282 | try { |
283 | 283 | Connection admin = getAdminConnection(dbname); |
— | — | @@ -287,7 +287,7 @@ |
288 | 288 | } catch (SQLException e) { |
289 | 289 | log.error("Cannot create table "+table+" : "+e.getMessage()); |
290 | 290 | throw new IOException(e.getMessage()); |
291 | | - } |
| 291 | + } |
292 | 292 | } |
293 | 293 | |
294 | 294 | @Override |
— | — | @@ -299,7 +299,7 @@ |
300 | 300 | return new HashMap<Title, ArrayList<RelatedTitle>>(); |
301 | 301 | else if(titles.size()==1){ |
302 | 302 | sql += "a.page_key="+quote(escape(titles.iterator().next().getKey())); |
303 | | - } else{ |
| 303 | + } else{ |
304 | 304 | StringBuilder sb = new StringBuilder(sql); |
305 | 305 | sb.append("a.page_key IN ("); |
306 | 306 | Iterator<Title> it = titles.iterator(); |
— | — | @@ -328,7 +328,7 @@ |
329 | 329 | rel = new ArrayList<RelatedTitle>(); |
330 | 330 | ret.put(t1,rel); |
331 | 331 | } |
332 | | - rel.add(new RelatedTitle(t2,score)); |
| 332 | + rel.add(new RelatedTitle(t2,score)); |
333 | 333 | } |
334 | 334 | conn.close(); |
335 | 335 | return ret; |
— | — | @@ -337,12 +337,12 @@ |
338 | 338 | throw new IOException(e.getMessage()); |
339 | 339 | } |
340 | 340 | } |
341 | | - |
342 | | - protected HashMap<String,Integer> getPageIDs(Collection<String> keys, String dbname, Connection conn) throws IOException{ |
| 341 | + |
| 342 | + protected HashMap<String,Integer> getPageIDs(Collection<String> keys, String dbname, Connection conn) throws IOException{ |
343 | 343 | String sql = "SELECT page_key, page_id from "+getTableName("page",dbname)+" WHERE "; |
344 | 344 | if(keys.size()==1){ |
345 | 345 | sql += "page_key="+quote(escape(keys.iterator().next())); |
346 | | - } else{ |
| 346 | + } else{ |
347 | 347 | StringBuilder sb = new StringBuilder(sql); |
348 | 348 | sb.append("page_key IN ("); |
349 | 349 | Iterator<String> it = keys.iterator(); |
— | — | @@ -380,12 +380,12 @@ |
381 | 381 | keys.add(r.getRelates().toString()); |
382 | 382 | } |
383 | 383 | HashMap<String,Integer> map = getPageIDs(keys,dbname,read); |
384 | | - final int maxPerQuery = 20000; |
| 384 | + final int maxPerQuery = 20000; |
385 | 385 | Connection write = getWriteConnection(dbname); |
386 | | - verifyTable("related",dbname,write); |
| 386 | + verifyTable("related",dbname,write); |
387 | 387 | Iterator<Related> it = related.iterator(); |
388 | | - // send chunks of maxPerQuery referenace replacements |
389 | | - while(it.hasNext()){ |
| 388 | + // send chunks of maxPerQuery referenace replacements |
| 389 | + while(it.hasNext()){ |
390 | 390 | StringBuilder sb = new StringBuilder("INSERT INTO "+getTableName("related",dbname)+" (rel_to,rel_related,rel_score) VALUES "); |
391 | 391 | int count = 0; |
392 | 392 | while(it.hasNext() && count < maxPerQuery){ |
— | — | @@ -405,18 +405,18 @@ |
406 | 406 | try { |
407 | 407 | log.info("Storing "+Math.min(maxPerQuery,count)+" related pages... "); |
408 | 408 | Statement stmt = write.createStatement(); |
409 | | - stmt.executeUpdate(sb.toString()); |
410 | | - |
| 409 | + stmt.executeUpdate(sb.toString()); |
| 410 | + |
411 | 411 | } catch (SQLException e) { |
412 | 412 | log.error("Cannot execute replace query "+sb+" : "+e.getMessage()); |
413 | 413 | throw new IOException(e.getMessage()); |
414 | | - } |
| 414 | + } |
415 | 415 | } |
416 | 416 | try { |
417 | 417 | write.close(); // be sure we close the connection |
418 | 418 | read.close(); |
419 | 419 | } catch (SQLException e) { |
420 | 420 | } |
421 | | - |
422 | | - } |
| 421 | + |
| 422 | + } |
423 | 423 | } |
Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/config/GlobalConfiguration.java |
— | — | @@ -41,8 +41,8 @@ |
42 | 42 | /** |
43 | 43 | * Read and parse the global configuration file, is also used |
44 | 44 | * to discover where the indexes are. Global configuration manages |
45 | | - * a pool of global readonly {@link IndexId} instances. |
46 | | - * |
| 45 | + * a pool of global readonly {@link IndexId} instances. |
| 46 | + * |
47 | 47 | * @author rainman |
48 | 48 | * |
49 | 49 | */ |
— | — | @@ -50,7 +50,7 @@ |
51 | 51 | /** The following hashtables are directly read from the config file: */ |
52 | 52 | /** dbname -> hashtable ( role -> hashtable ( param => value) ) */ |
53 | 53 | protected Hashtable<String, Hashtable<String, Hashtable<String, String>>> database; |
54 | | - /** group -> host -> arraylist ( db.role ) */ |
| 54 | + /** group -> host -> arraylist ( db.role ) */ |
55 | 55 | protected Hashtable<Integer,Hashtable<String,ArrayList<String>>> searchGroup; |
56 | 56 | /** host -> arraylist ( db.role ) */ |
57 | 57 | protected Hashtable<String,ArrayList<String>> search; |
— | — | @@ -73,7 +73,7 @@ |
74 | 74 | /** prefixes, e.g. main, talk, help, and corresponding filters */ |
75 | 75 | protected Hashtable<String,NamespaceFilter> namespacePrefix; |
76 | 76 | /** keyword for all namespaces (i.e. no filtering) */ |
77 | | - protected String namespacePrefixAll; |
| 77 | + protected String namespacePrefixAll; |
78 | 78 | /** suffx -> OAI Repo url pattern */ |
79 | 79 | protected Hashtable<String,String> oaiRepo; |
80 | 80 | /** wgLanguageCode from InitialiseSettings, suffix -> lang code */ |
— | — | @@ -92,50 +92,50 @@ |
93 | 93 | protected String commonsWiki = null; |
94 | 94 | /** dbname -> namespace scaling */ |
95 | 95 | protected Hashtable<String,ArticleNamespaceScaling> namespaceBoost = new Hashtable<String,ArticleNamespaceScaling>(); |
96 | | - |
| 96 | + |
97 | 97 | /** info about this host */ |
98 | 98 | protected static InetAddress myHost; |
99 | | - protected static String hostAddr, hostName; |
100 | | - |
| 99 | + protected static String hostAddr, hostName; |
| 100 | + |
101 | 101 | /** Database suffix if dbname, the rest is supposed to be language, e.g srwiki => (suffix wiki) => sr */ |
102 | | - protected String[] databaseSuffixes = null; |
| 102 | + protected String[] databaseSuffixes = null; |
103 | 103 | /** Databases ending in suffix will use additional keyword scores */ |
104 | 104 | protected String[] keywordScoringSuffixes = null; |
105 | 105 | /** Databases ending in suffix will have 2 indexes, one with lowercased words, and one with exact case words */ |
106 | 106 | protected String[] exactCaseSuffix = null; |
107 | | - |
| 107 | + |
108 | 108 | /** For scaling scores according how old the indexed page is */ |
109 | 109 | protected String[] ageScalingStrong = null; |
110 | 110 | protected String[] ageScalingMedium = null; |
111 | 111 | protected String[] ageScalingWeak = null; |
112 | | - |
| 112 | + |
113 | 113 | /** wikis with additional global ranking data */ |
114 | 114 | protected String[] additionalRank = null; |
115 | | - |
| 115 | + |
116 | 116 | protected Properties globalProperties = null; |
117 | | - |
| 117 | + |
118 | 118 | /** All identifiers of all indexes (dbrole -> IndexId) */ |
119 | 119 | protected static Hashtable<String,IndexId> indexIdPool = new Hashtable<String,IndexId>(); |
120 | | - |
| 120 | + |
121 | 121 | protected static GlobalConfiguration instance = null; |
122 | | - |
| 122 | + |
123 | 123 | /** All the lang codes we encountered, used for "smart interwiki" */ |
124 | 124 | protected HashSet<String> smartInterwikiCodes = new HashSet<String>(); |
125 | 125 | protected boolean useSmartInterwiki = false; |
126 | 126 | protected int maxSearchLimit = 1000; |
127 | | - protected int maxSearchOffset = 1000000; |
128 | | - |
| 127 | + protected int maxSearchOffset = 1000000; |
| 128 | + |
129 | 129 | /** Wether to report warnings and info */ |
130 | 130 | protected static boolean verbose = true; |
131 | | - |
| 131 | + |
132 | 132 | /** Sections in lsearch-config.conf */ |
133 | 133 | protected static enum Section { DATABASE, INDEX, SEARCH, INDEXPATH, NAMESPACE_PREFIX, OAI, DATABASE_GROUP, NAMESPACE_BOOST }; |
134 | 134 | |
135 | 135 | |
136 | 136 | /** |
137 | | - * Use this function to override the hosts IP address which |
138 | | - * is determined automatically when first instance is made |
139 | | - * |
| 137 | + * Use this function to override the hosts IP address which |
| 138 | + * is determined automatically when first instance is made |
| 139 | + * |
140 | 140 | * @param host IP adress |
141 | 141 | */ |
142 | 142 | public static void setHost(InetAddress host){ |
— | — | @@ -143,15 +143,15 @@ |
144 | 144 | hostAddr = myHost.getHostAddress(); |
145 | 145 | hostName = myHost.getHostName(); |
146 | 146 | } |
147 | | - |
| 147 | + |
148 | 148 | protected GlobalConfiguration(){ |
149 | 149 | // try to determin this hosts IP address |
150 | 150 | determineInetAddress(); |
151 | | - } |
152 | | - |
| 151 | + } |
| 152 | + |
153 | 153 | /** |
154 | 154 | * Get singleton instance of this class |
155 | | - * |
| 155 | + * |
156 | 156 | * @return |
157 | 157 | */ |
158 | 158 | synchronized public static GlobalConfiguration getInstance() { |
— | — | @@ -159,12 +159,12 @@ |
160 | 160 | instance = new GlobalConfiguration(); |
161 | 161 | return instance; |
162 | 162 | } |
163 | | - |
| 163 | + |
164 | 164 | /** |
165 | | - * Try to determine the (non-loopback) IP address of this |
166 | | - * computer. Will work only if there is one attached |
| 165 | + * Try to determine the (non-loopback) IP address of this |
| 166 | + * computer. Will work only if there is one attached |
167 | 167 | * network interface, otherwise @link setHost() method |
168 | | - * need to be used |
| 168 | + * need to be used |
169 | 169 | */ |
170 | 170 | protected void determineInetAddress() { |
171 | 171 | try { |
— | — | @@ -172,27 +172,27 @@ |
173 | 173 | } catch (UnknownHostException e1) { |
174 | 174 | System.out.println("Error resolving local hostname. Make sure that hostname is setup correctly."); |
175 | 175 | e1.printStackTrace(); |
176 | | - } |
| 176 | + } |
177 | 177 | } |
178 | | - |
| 178 | + |
179 | 179 | /** Return true if host is the current host (IP or hostname) */ |
180 | 180 | public boolean isLocalhost(String host){ |
181 | 181 | if(host == null) |
182 | 182 | return false; |
183 | 183 | return host.equalsIgnoreCase(hostAddr) || host.equalsIgnoreCase(hostName); |
184 | 184 | } |
185 | | - |
| 185 | + |
186 | 186 | /** Secure add-to-list, check if the index definition exists, and avoid duplicates in list */ |
187 | 187 | protected void addToList(ArrayList<String> list, String str){ |
188 | 188 | if(!list.contains(str)){ |
189 | 189 | String[] parts = str.split("\\."); |
190 | | - if(database.containsKey(parts[0]) && |
| 190 | + if(database.containsKey(parts[0]) && |
191 | 191 | (parts.length==1 || (parts.length==2 && database.get(parts[0]).containsKey(parts[1])))){ |
192 | 192 | list.add(str); |
193 | 193 | } |
194 | 194 | } |
195 | 195 | } |
196 | | - |
| 196 | + |
197 | 197 | @SuppressWarnings("unchecked") |
198 | 198 | protected void checkSubdivisions(String dbname, String type){ |
199 | 199 | Hashtable<String,Hashtable<String,String>> typeParams = database.get(dbname); |
— | — | @@ -204,11 +204,11 @@ |
205 | 205 | typeParams.put(type+".sub"+i,(Hashtable<String, String>) params.clone()); |
206 | 206 | } |
207 | 207 | } |
208 | | - |
209 | | - /** |
| 208 | + |
| 209 | + /** |
210 | 210 | * Check if the setup is correct,i.e. there is indexer and searcher |
211 | 211 | * for each db ... |
212 | | - * |
| 212 | + * |
213 | 213 | * @return returns true if the setup is OK |
214 | 214 | */ |
215 | 215 | public boolean checkIntegrity(){ |
— | — | @@ -260,7 +260,7 @@ |
261 | 261 | // expand logical index names on searchers |
262 | 262 | for(String host : search.keySet()){ |
263 | 263 | ArrayList<String> hostsearch = search.get(host); |
264 | | - for(String dbname : hostsearch.toArray(new String[]{})){ |
| 264 | + for(String dbname : hostsearch.toArray(new String[]{})){ |
265 | 265 | Hashtable<String, Hashtable<String,String>> types = database.get(dbname); |
266 | 266 | if(types != null){ // if not null, dbrole is dbname |
267 | 267 | if(types.containsKey("mainsplit")){ |
— | — | @@ -298,7 +298,7 @@ |
299 | 299 | } else if(typeid.matches("nspart[1-9][0-9]*")){ |
300 | 300 | type = "nssplit"; |
301 | 301 | dbrole = dbname + "." + typeid; |
302 | | - } else if(typeid.equals("spell") || typeid.equals("links") || typeid.equals("related") |
| 302 | + } else if(typeid.equals("spell") || typeid.equals("links") || typeid.equals("related") |
303 | 303 | || typeid.equals("prefix") || typeid.equals("title_ngram")){ |
304 | 304 | type = typeid; |
305 | 305 | dbrole = dbname + "." + typeid; |
— | — | @@ -307,7 +307,7 @@ |
308 | 308 | dbrole = dbname + "." + typeid; |
309 | 309 | } else |
310 | 310 | continue; // uknown type, skip |
311 | | - |
| 311 | + |
312 | 312 | if(indexLocation.get(dbrole) == null){ |
313 | 313 | // fill-in with host that indexes dbname |
314 | 314 | if(indexLocation.get(dbname) != null ){ |
— | — | @@ -319,13 +319,13 @@ |
320 | 320 | return false; |
321 | 321 | } */ |
322 | 322 | // add same index location for highlight .hl index |
323 | | - /* String host = indexLocation.get(dbrole); |
| 323 | + /* String host = indexLocation.get(dbrole); |
324 | 324 | indexLocation.put(dbrole+".hl",host); |
325 | 325 | index.get(host).add(dbrole+".hl"); */ |
326 | | - |
| 326 | + |
327 | 327 | } |
328 | | - /* boolean searched = (getSearchHosts(dbrole).size() != 0); |
329 | | - if(!searched && !(typeid.equals("mainsplit") || typeid.equals("split") |
| 328 | + /* boolean searched = (getSearchHosts(dbrole).size() != 0); |
| 329 | + if(!searched && !(typeid.equals("mainsplit") || typeid.equals("split") |
330 | 330 | || typeid.equals("nssplit") || typeid.equals("links") || typeid.equals("related") || typeid.equals("title_ngram"))){ |
331 | 331 | if(verbose) |
332 | 332 | System.out.println("WARNING: in Global Configuration: index "+dbrole+" is not searched by any host."); |
— | — | @@ -334,10 +334,10 @@ |
335 | 335 | } |
336 | 336 | return true; |
337 | 337 | } |
338 | | - |
339 | | - /** |
| 338 | + |
| 339 | + /** |
340 | 340 | * Read a config file from a given URL |
341 | | - * |
| 341 | + * |
342 | 342 | * @param url |
343 | 343 | * @throws IOException |
344 | 344 | */ |
— | — | @@ -351,15 +351,15 @@ |
352 | 352 | } catch (IOException e) { |
353 | 353 | System.out.println("I/O Error in opening or reading global config at url "+url); |
354 | 354 | throw e; |
355 | | - } |
| 355 | + } |
356 | 356 | } |
357 | 357 | |
358 | 358 | /** |
359 | 359 | * Prepare hashtables to load data into them |
360 | | - * |
| 360 | + * |
361 | 361 | */ |
362 | 362 | protected void init(){ |
363 | | - database = new Hashtable<String, Hashtable<String, Hashtable<String, String>>>(); |
| 363 | + database = new Hashtable<String, Hashtable<String, Hashtable<String, String>>>(); |
364 | 364 | searchGroup = new Hashtable<Integer,Hashtable<String, ArrayList<String>>>(); |
365 | 365 | search = new Hashtable<String, ArrayList<String>>(); |
366 | 366 | searchWildcard = new Hashtable<String, ArrayList<Pattern>>(); |
— | — | @@ -373,46 +373,46 @@ |
374 | 374 | oaiRepo = new Hashtable<String,String>(); |
375 | 375 | indexWildcard = new Hashtable<Pattern,String>(); |
376 | 376 | } |
377 | | - |
| 377 | + |
378 | 378 | protected String[] getArrayProperty(String name){ |
379 | 379 | String s = globalProperties.getProperty(name); |
380 | 380 | if (s != null) |
381 | 381 | return s.split(" "); |
382 | 382 | return null; |
383 | 383 | } |
384 | | - |
385 | | - /** |
| 384 | + |
| 385 | + /** |
386 | 386 | * Reads a config file from a bufferedreader, will |
387 | 387 | * close the reader when done. |
388 | | - * |
| 388 | + * |
389 | 389 | * @param in opened reader |
390 | 390 | * @throws IOException |
391 | 391 | */ |
392 | 392 | protected void read(BufferedReader in, String indexpath) throws IOException{ |
393 | | - String line=""; |
394 | | - Section section = null; |
| 393 | + String line=""; |
| 394 | + Section section = null; |
395 | 395 | Pattern roleRegexp = Pattern.compile("\\((.*?)\\)"); |
396 | 396 | int lineNum = 0; |
397 | | - // sections |
| 397 | + // sections |
398 | 398 | int searchGroupNum = -1; |
399 | | - |
| 399 | + |
400 | 400 | init(); |
401 | 401 | this.indexPath = indexpath; |
402 | | - |
| 402 | + |
403 | 403 | while((line = in.readLine()) != null){ |
404 | 404 | lineNum ++; |
405 | 405 | // strip comments |
406 | 406 | line = line.replaceFirst("#.*",""); |
407 | | - |
| 407 | + |
408 | 408 | if(line.trim().equals("")) |
409 | | - continue; |
410 | | - |
| 409 | + continue; |
| 410 | + |
411 | 411 | line = preprocessLine(line); |
412 | | - |
| 412 | + |
413 | 413 | if(line.startsWith("[") && line.length()>2 && !Character.isDigit(line.charAt(1))){ // section |
414 | 414 | int last = line.indexOf("]"); |
415 | 415 | String s = line.substring(1,last); |
416 | | - |
| 416 | + |
417 | 417 | if(s.equalsIgnoreCase("properties")){ |
418 | 418 | globalProperties = new Properties(); |
419 | 419 | StringBuilder prop = new StringBuilder(line+"\n"); |
— | — | @@ -421,7 +421,7 @@ |
422 | 422 | break; |
423 | 423 | prop.append(line); |
424 | 424 | prop.append("\n"); |
425 | | - } |
| 425 | + } |
426 | 426 | globalProperties.load(new ByteArrayInputStream(prop.toString().getBytes("utf-8"))); |
427 | 427 | // get some predifined global properties |
428 | 428 | this.databaseSuffixes = getArrayProperty("Database.suffix"); |
— | — | @@ -445,7 +445,7 @@ |
446 | 446 | last = line.indexOf("]"); |
447 | 447 | s = line.substring(1,last); |
448 | 448 | } |
449 | | - |
| 449 | + |
450 | 450 | if(s.equalsIgnoreCase("database")) |
451 | 451 | section = Section.DATABASE; |
452 | 452 | else if(s.equalsIgnoreCase("index")) |
— | — | @@ -468,33 +468,33 @@ |
469 | 469 | System.out.println("Ignoring a line up to first section heading..."); |
470 | 470 | } else if(section == Section.DATABASE || section == Section.DATABASE_GROUP){ |
471 | 471 | String[] parts = splitBySemicolon(line,lineNum); |
472 | | - if(parts == null) continue; |
| 472 | + if(parts == null) continue; |
473 | 473 | String[] dbs = parts[0].split(","); |
474 | 474 | for(int i=0;i<dbs.length;i++) dbs[i]=dbs[i].trim(); |
475 | | - |
476 | | - // syntax: dbname : (role,params), (role2,params2) |
| 475 | + |
| 476 | + // syntax: dbname : (role,params), (role2,params2) |
477 | 477 | Matcher matcher = roleRegexp.matcher(parts[1]); |
478 | 478 | while(matcher.find()){ |
479 | | - processDBRole(dbs,matcher.group(1)); |
| 479 | + processDBRole(dbs,matcher.group(1)); |
480 | 480 | } |
481 | 481 | } else if(section == Section.SEARCH){ |
482 | 482 | String[] parts = splitBySemicolon(line,lineNum); |
483 | 483 | if(parts == null) continue; |
484 | 484 | String host = parts[0].trim(); |
485 | | - |
| 485 | + |
486 | 486 | processSearchRoles( host, parts[1], searchGroupNum); |
487 | 487 | } else if(section == Section.INDEX){ |
488 | 488 | String[] parts = splitBySemicolon(line,lineNum); |
489 | 489 | if(parts == null) continue; |
490 | 490 | String host = parts[0].trim(); |
491 | | - |
| 491 | + |
492 | 492 | processIndexRoles(host,parts[1]); |
493 | 493 | } else if(section == Section.INDEXPATH){ |
494 | 494 | String[] parts = splitBySemicolon(line,lineNum); |
495 | 495 | if(parts == null) continue; |
496 | 496 | String host = parts[0].trim(); |
497 | 497 | String path = parts[1].trim(); |
498 | | - |
| 498 | + |
499 | 499 | if(indexRsyncPath.get(host)!=null && verbose) |
500 | 500 | System.out.println("Warning: repeated path definition for host "+host+" on line "+lineNum+", overwriting old."); |
501 | 501 | indexRsyncPath.put(host,path); |
— | — | @@ -502,24 +502,24 @@ |
503 | 503 | String[] parts = splitBySemicolon(line,lineNum); |
504 | 504 | if(parts == null) continue; |
505 | 505 | String prefix = parts[0].trim(); |
506 | | - String filter = parts[1].trim(); |
507 | | - |
| 506 | + String filter = parts[1].trim(); |
| 507 | + |
508 | 508 | if(filter.equalsIgnoreCase("<all>")) |
509 | 509 | namespacePrefixAll = prefix; |
510 | 510 | else |
511 | | - namespacePrefix.put(prefix,new NamespaceFilter(filter)); |
| 511 | + namespacePrefix.put(prefix,new NamespaceFilter(filter)); |
512 | 512 | } else if(section == Section.OAI){ |
513 | 513 | String[] parts = splitBySemicolon(line,lineNum); |
514 | 514 | if(parts == null) continue; |
515 | 515 | String suffix = parts[0].trim(); |
516 | | - String url = parts[1].trim(); |
517 | | - |
| 516 | + String url = parts[1].trim(); |
| 517 | + |
518 | 518 | oaiRepo.put(suffix,url); |
519 | 519 | } else if(section == Section.NAMESPACE_BOOST){ |
520 | 520 | String[] parts = splitBySemicolon(line,lineNum); |
521 | | - if(parts == null) continue; |
| 521 | + if(parts == null) continue; |
522 | 522 | String[] dbs = parts[0].split(","); |
523 | | - for(int i=0;i<dbs.length;i++) |
| 523 | + for(int i=0;i<dbs.length;i++) |
524 | 524 | dbs[i]=dbs[i].trim(); |
525 | 525 | processNamespaceBoost(dbs,parts[1]); |
526 | 526 | } |
— | — | @@ -528,7 +528,7 @@ |
529 | 529 | in.close(); |
530 | 530 | System.exit(1); |
531 | 531 | } |
532 | | - |
| 532 | + |
533 | 533 | makeIndexIdPool(); |
534 | 534 | if(useSmartInterwiki){ |
535 | 535 | for(IndexId iid : indexIdPool.values()){ |
— | — | @@ -538,17 +538,17 @@ |
539 | 539 | for(String suffix : databaseSuffixes) { |
540 | 540 | if(dbname.endsWith(suffix)) |
541 | 541 | smartInterwikiCodes.add(dbname.substring(0, dbname.length() - suffix.length()).toLowerCase()); |
542 | | - } |
| 542 | + } |
543 | 543 | } |
544 | 544 | } |
545 | 545 | } |
546 | 546 | in.close(); |
547 | 547 | } |
548 | | - |
| 548 | + |
549 | 549 | protected void processNamespaceBoost(String[] dbs, String def) { |
550 | 550 | Pattern mapRegexp = Pattern.compile("\\((.*?)\\)"); |
551 | 551 | HashMap<Integer,Float> map = new HashMap<Integer,Float>(); |
552 | | - |
| 552 | + |
553 | 553 | Matcher matcher = mapRegexp.matcher(def); |
554 | 554 | while(matcher.find()){ |
555 | 555 | String[] parts = matcher.group(1).split(","); |
— | — | @@ -565,7 +565,7 @@ |
566 | 566 | * A bit hackish: read InitialiseSettings which we know have a certain |
567 | 567 | * format to avoid maintaining two copies for config files (one in php |
568 | 568 | * other for lsearch in global conf) |
569 | | - * |
| 569 | + * |
570 | 570 | * @param initset |
571 | 571 | */ |
572 | 572 | protected void initializeWmfSettings(String initset) { |
— | — | @@ -580,7 +580,7 @@ |
581 | 581 | Localization.readDBLocalizations(text); |
582 | 582 | } catch (Exception e) { |
583 | 583 | System.out.println("Error reading InitialiseSettings.php from url "+initset+" : "+e.getMessage()); |
584 | | - } |
| 584 | + } |
585 | 585 | } |
586 | 586 | |
587 | 587 | /** Get all hosts which search this inxedId (dbrole) */ |
— | — | @@ -608,7 +608,7 @@ |
609 | 609 | } |
610 | 610 | return searchHosts; |
611 | 611 | } |
612 | | - |
| 612 | + |
613 | 613 | /** Get all hosts that search this dbname within current hosts search groups */ |
614 | 614 | protected HashSet<String> getMySearchHosts(String dbname, String dbrole){ |
615 | 615 | HashSet<String> searchHosts = new HashSet<String>(); |
— | — | @@ -617,16 +617,16 @@ |
618 | 618 | group = databaseHostGroup.get(dbname+"@"+hostName); |
619 | 619 | if(group == null) |
620 | 620 | return searchHosts; |
621 | | - |
| 621 | + |
622 | 622 | for(String host : searchGroup.get(group).keySet()){ |
623 | 623 | if(search.get(host).contains(dbrole)) |
624 | 624 | searchHosts.add(host); |
625 | 625 | } |
626 | 626 | return searchHosts; |
627 | 627 | } |
628 | | - |
| 628 | + |
629 | 629 | /** |
630 | | - * Call after all data is read from config file, make indexIds for all the |
| 630 | + * Call after all data is read from config file, make indexIds for all the |
631 | 631 | * indexes in the system |
632 | 632 | * |
633 | 633 | */ |
— | — | @@ -637,7 +637,7 @@ |
638 | 638 | localIndexes = new ArrayList<String>(); |
639 | 639 | if(index.get(hostName)!=null) |
640 | 640 | localIndexes.addAll(index.get(hostName)); */ |
641 | | - |
| 641 | + |
642 | 642 | // dbname -> ts index, e.g. enwiki -> en-titles.tspart1 |
643 | 643 | HashMap<String,String> dbnameTitlesPart = new HashMap<String,String>(); |
644 | 644 | // dbname -> matched suffix, e.f. enwiki -> wiki |
— | — | @@ -670,7 +670,7 @@ |
671 | 671 | // process the general pattern |
672 | 672 | if(database.containsKey("<all>")){ |
673 | 673 | Hashtable<String,Hashtable<String,String>> allKeyword = database.get("<all>"); |
674 | | - splitFactor = Integer.parseInt(allKeyword.get("titles_by_suffix").get("number")); |
| 674 | + splitFactor = Integer.parseInt(allKeyword.get("titles_by_suffix").get("number")); |
675 | 675 | for(int i=1;i<=splitFactor;i++){ |
676 | 676 | String part = "tspart"+i; |
677 | 677 | suffixIwMap.putAll(allKeyword.get(part)); |
— | — | @@ -705,17 +705,17 @@ |
706 | 706 | code.put("code",dbLang.get(db)); |
707 | 707 | p.put("language",code); |
708 | 708 | } |
709 | | - database.put(db+"-titles",p); |
| 709 | + database.put(db+"-titles",p); |
710 | 710 | } |
711 | 711 | } |
712 | | - |
| 712 | + |
713 | 713 | // iterate over all dbs and types |
714 | 714 | for(String dbname : database.keySet()){ |
715 | 715 | if(dbname.startsWith("<")) |
716 | 716 | continue; // keyword special case |
717 | 717 | for(String typeid : database.get(dbname).keySet()){ |
718 | 718 | String type = ""; |
719 | | - String dbrole = ""; |
| 719 | + String dbrole = ""; |
720 | 720 | if(typeid.equals("single") || typeid.equals("mainsplit") || typeid.equals("split") || typeid.equals("nssplit")){ |
721 | 721 | type = typeid; |
722 | 722 | dbrole = dbname; |
— | — | @@ -742,20 +742,20 @@ |
743 | 743 | dbrole = dbname + "." + typeid; |
744 | 744 | } else |
745 | 745 | continue; // uknown type, skip |
746 | | - |
| 746 | + |
747 | 747 | HashSet<String> searchHosts = getSearchHosts(dbrole); |
748 | 748 | // FIXME: grouping is largely broken and not really that useful |
749 | 749 | // HashSet<String> mySearchHosts = getMySearchHosts(dbname,dbrole); |
750 | 750 | HashSet<String> mySearchHosts = searchHosts; |
751 | 751 | boolean mySearch = searchHosts.contains(hostAddr) || searchHosts.contains(hostName); |
752 | 752 | String indexHost = getIndexHost(dbrole,dbname); |
753 | | - boolean myIndex = isMyHost(indexHost); |
| 753 | + boolean myIndex = isMyHost(indexHost); |
754 | 754 | Hashtable<String,String> typeidParams = database.get(dbname).get(typeid); |
755 | 755 | boolean isSubdivided = false; |
756 | 756 | if(typeidParams != null && typeidParams.containsKey("subdivisions")){ |
757 | 757 | isSubdivided = true; |
758 | 758 | } |
759 | | - |
| 759 | + |
760 | 760 | String rsyncIndexPath = "/"; |
761 | 761 | // if the index is on the local computer search for it |
762 | 762 | // using both ip address and host name |
— | — | @@ -764,14 +764,14 @@ |
765 | 765 | if(rsyncIndexPath == null) |
766 | 766 | rsyncIndexPath = indexRsyncPath.get(hostName); |
767 | 767 | if(rsyncIndexPath == null) |
768 | | - rsyncIndexPath = indexRsyncPath.get("<default>"); |
| 768 | + rsyncIndexPath = indexRsyncPath.get("<default>"); |
769 | 769 | } else{ |
770 | 770 | rsyncIndexPath = indexRsyncPath.get(indexHost); |
771 | 771 | if(rsyncIndexPath == null) |
772 | 772 | rsyncIndexPath = indexRsyncPath.get("<default>"); |
773 | 773 | } |
774 | 774 | String oairepo = getOAIRepo(dbname); |
775 | | - |
| 775 | + |
776 | 776 | IndexId iid = new IndexId(dbrole, |
777 | 777 | type, |
778 | 778 | indexHost, |
— | — | @@ -789,8 +789,8 @@ |
790 | 790 | dbnameSuffix.get(dbname), |
791 | 791 | getSuffixToDBMap(dbrole,dbnameTitlesPart,dbnameSuffix)); |
792 | 792 | indexIdPool.put(dbrole,iid); |
793 | | - |
794 | | - // add precursor indexes |
| 793 | + |
| 794 | + // add precursor indexes |
795 | 795 | if(type.equals("spell") || type.equals("prefix")){ |
796 | 796 | iid = new IndexId(dbrole+".pre", |
797 | 797 | "precursor", |
— | — | @@ -811,7 +811,7 @@ |
812 | 812 | indexIdPool.put(dbrole+".pre",iid); |
813 | 813 | } |
814 | 814 | // add highlight indexes |
815 | | - if(type.equals("single") || type.equals("mainsplit") || type.equals("split") |
| 815 | + if(type.equals("single") || type.equals("mainsplit") || type.equals("split") |
816 | 816 | || type.equals("nssplit") || type.equals("subdivided")){ |
817 | 817 | dbrole+=".hl"; |
818 | 818 | searchHosts = getSearchHosts(dbrole); |
— | — | @@ -837,15 +837,15 @@ |
838 | 838 | getSuffixToDBMap(dbrole,dbnameTitlesPart,dbnameSuffix)); |
839 | 839 | indexIdPool.put(dbrole,iid); |
840 | 840 | } |
841 | | - |
| 841 | + |
842 | 842 | } |
843 | 843 | if(indexIdPool.get(dbname).isNssplit()){ |
844 | 844 | indexIdPool.get(dbname).rebuildNsMap(indexIdPool); |
845 | 845 | indexIdPool.get(dbname+".hl").rebuildNsMap(indexIdPool); |
846 | 846 | } |
847 | | - } |
| 847 | + } |
848 | 848 | } |
849 | | - |
| 849 | + |
850 | 850 | private String getIndexHost(String dbrole, String dbname){ |
851 | 851 | String indexHost = indexLocation.get(dbrole); |
852 | 852 | if(indexHost == null) |
— | — | @@ -880,7 +880,7 @@ |
881 | 881 | return map; |
882 | 882 | } |
883 | 883 | |
884 | | - /** |
| 884 | + /** |
885 | 885 | * Return the IndexId object given it's id string representation |
886 | 886 | * @param dbrole |
887 | 887 | * @return |
— | — | @@ -888,11 +888,11 @@ |
889 | 889 | static public IndexId getIndexId(String dbrole){ |
890 | 890 | return indexIdPool.get(dbrole); |
891 | 891 | } |
892 | | - |
| 892 | + |
893 | 893 | /** |
894 | 894 | * Substitute {url} with content of url |
895 | 895 | * where url points to list of db's one in each line |
896 | | - * |
| 896 | + * |
897 | 897 | * @param line |
898 | 898 | * @return processed line |
899 | 899 | */ |
— | — | @@ -918,9 +918,9 @@ |
919 | 919 | text.append(l); |
920 | 920 | } |
921 | 921 | in.close(); |
922 | | - |
| 922 | + |
923 | 923 | String dbs = text.toString(); |
924 | | - |
| 924 | + |
925 | 925 | matcher.appendReplacement(replaced,dbs); |
926 | 926 | } catch (MalformedURLException e) { |
927 | 927 | System.out.println("Error in global config file: URL "+matcher.group(1)+" is a malfomed URL"); |
— | — | @@ -929,7 +929,7 @@ |
930 | 930 | } |
931 | 931 | } |
932 | 932 | matcher.appendTail(replaced); |
933 | | - |
| 933 | + |
934 | 934 | return replaced.toString(); |
935 | 935 | |
936 | 936 | } |
— | — | @@ -942,7 +942,7 @@ |
943 | 943 | } |
944 | 944 | return parts; |
945 | 945 | } |
946 | | - |
| 946 | + |
947 | 947 | /** |
948 | 948 | * Process a list: db.role, db1.role2... put this list into |
949 | 949 | * index Hashtable, and make a reverse Hashtable indexLocation |
— | — | @@ -951,13 +951,13 @@ |
952 | 952 | */ |
953 | 953 | protected void processIndexRoles(String host, String roles){ |
954 | 954 | String[] dbroles = roles.split("[, ]+"); |
955 | | - |
| 955 | + |
956 | 956 | ArrayList<String> hostroles = index.get(host); |
957 | 957 | if(hostroles == null){ |
958 | 958 | hostroles = new ArrayList<String>(); |
959 | 959 | index.put(host,hostroles); |
960 | 960 | } |
961 | | - for(String dbrole : dbroles){ |
| 961 | + for(String dbrole : dbroles){ |
962 | 962 | dbrole = dbrole.trim(); |
963 | 963 | if(dbrole.length()==0) |
964 | 964 | continue; |
— | — | @@ -970,12 +970,12 @@ |
971 | 971 | if(dbrole.contains("*")) |
972 | 972 | indexWildcard.put(StringUtils.makeRegexp(dbrole),host); |
973 | 973 | else |
974 | | - indexLocation.put(dbrole,host); |
| 974 | + indexLocation.put(dbrole,host); |
975 | 975 | } |
976 | 976 | } |
977 | | - |
| 977 | + |
978 | 978 | /** |
979 | | - * Process a list: db.role, db1.role2... and put them into |
| 979 | + * Process a list: db.role, db1.role2... and put them into |
980 | 980 | * search and searchGroup hashtables |
981 | 981 | * @param host |
982 | 982 | * @param roles |
— | — | @@ -983,19 +983,19 @@ |
984 | 984 | */ |
985 | 985 | protected void processSearchRoles(String host, String roles, int groupNum){ |
986 | 986 | String[] dbroles = roles.split("[, ]+"); |
987 | | - |
| 987 | + |
988 | 988 | // the cummulative search hastable |
989 | 989 | ArrayList<String> hostroles = search.get(host); |
990 | 990 | if(hostroles == null){ |
991 | 991 | hostroles = new ArrayList<String>(); |
992 | 992 | search.put(host,hostroles); |
993 | 993 | } |
994 | | - |
995 | | - // wildcards that if match will |
| 994 | + |
| 995 | + // wildcards that if match will |
996 | 996 | ArrayList<Pattern> hostwildcards = searchWildcard.get(host); |
997 | 997 | if(hostwildcards == null) |
998 | 998 | searchWildcard.put(host,hostwildcards = new ArrayList<Pattern>()); |
999 | | - |
| 999 | + |
1000 | 1000 | // process groups |
1001 | 1001 | Integer grp = new Integer(groupNum); |
1002 | 1002 | Hashtable<String,ArrayList<String>> grouphosts = searchGroup.get(grp); |
— | — | @@ -1003,13 +1003,13 @@ |
1004 | 1004 | grouphosts = new Hashtable<String,ArrayList<String>>(); |
1005 | 1005 | searchGroup.put(grp,grouphosts); |
1006 | 1006 | } |
1007 | | - |
| 1007 | + |
1008 | 1008 | ArrayList<String> grouproles = grouphosts.get(host); |
1009 | 1009 | if(grouproles == null){ |
1010 | 1010 | grouproles = new ArrayList<String>(); |
1011 | 1011 | grouphosts.put(host,grouproles); |
1012 | 1012 | } |
1013 | | - |
| 1013 | + |
1014 | 1014 | // add to both lists |
1015 | 1015 | for(String dbrole : dbroles){ |
1016 | 1016 | dbrole = dbrole.trim(); |
— | — | @@ -1033,24 +1033,24 @@ |
1034 | 1034 | grouproles.add(dbrole); |
1035 | 1035 | } |
1036 | 1036 | } |
1037 | | - |
| 1037 | + |
1038 | 1038 | /** |
1039 | 1039 | * Process roles in format type,param1,param2... and put |
1040 | 1040 | * them into the database Hashtable |
1041 | | - * |
| 1041 | + * |
1042 | 1042 | * @param db name of the db to which the role belongs |
1043 | 1043 | * @param role role string in above format |
1044 | 1044 | */ |
1045 | 1045 | protected void processDBRole(String[] dbs, String role){ |
1046 | 1046 | String[] tokens = role.split(","); |
1047 | 1047 | String type = tokens[0].trim().toLowerCase(); |
1048 | | - |
| 1048 | + |
1049 | 1049 | Hashtable<String,Hashtable<String,String>> dbroles = new Hashtable<String,Hashtable<String,String>>(); |
1050 | 1050 | Hashtable<String,String> params = new Hashtable<String,String>(); |
1051 | | - |
| 1051 | + |
1052 | 1052 | if(type.equals("single") || type.equals("mainpart") || |
1053 | | - type.equals("restpart") || type.matches("part[1-9][0-9]*")){ |
1054 | | - |
| 1053 | + type.equals("restpart") || type.matches("part[1-9][0-9]*")){ |
| 1054 | + |
1055 | 1055 | // all params are optional, if absent default will be used |
1056 | 1056 | if(tokens.length>1){ |
1057 | 1057 | String token = tokens[1].trim().toLowerCase(); |
— | — | @@ -1067,12 +1067,12 @@ |
1068 | 1068 | params.put("maxBufDocs", tokens[3]); |
1069 | 1069 | if(tokens.length>4) |
1070 | 1070 | params.put("subdivisions", tokens[4]); |
1071 | | - |
| 1071 | + |
1072 | 1072 | if(tokens.length>5 && verbose) |
1073 | 1073 | System.out.println("Unrecognized database parameters in ("+role+")"); |
1074 | | - |
| 1074 | + |
1075 | 1075 | dbroles.put(type,params); |
1076 | | - |
| 1076 | + |
1077 | 1077 | } else if(type.equals("mainsplit")){ |
1078 | 1078 | // currently no params |
1079 | 1079 | dbroles.put(type,params); |
— | — | @@ -1089,33 +1089,33 @@ |
1090 | 1090 | // langauge is optional, or maybe it shouldn't be |
1091 | 1091 | if(tokens.length>1) |
1092 | 1092 | params.put("code",tokens[1]); |
1093 | | - |
| 1093 | + |
1094 | 1094 | if(tokens.length>2 && verbose) |
1095 | 1095 | System.out.println("Unrecognized language parameters in ("+role+")"); |
1096 | | - |
| 1096 | + |
1097 | 1097 | dbroles.put(type,params); |
1098 | | - |
| 1098 | + |
1099 | 1099 | } else if(type.startsWith("warmup")){ |
1100 | 1100 | // number of warmup queries |
1101 | 1101 | if(tokens.length>1) |
1102 | 1102 | params.put("count",tokens[1]); |
1103 | | - |
| 1103 | + |
1104 | 1104 | if(tokens.length>2 && verbose) |
1105 | 1105 | System.out.println("Unrecognized warmup parameters in ("+role+")"); |
1106 | | - |
| 1106 | + |
1107 | 1107 | dbroles.put(type,params); |
1108 | | - |
| 1108 | + |
1109 | 1109 | } else if(type.matches("nspart[1-9][0-9]*")){ |
1110 | 1110 | // [0,1,2] syntax gets split up in first split, retokenize |
1111 | 1111 | String ns = role.substring(role.indexOf(",")+1,role.lastIndexOf("]")+1).trim(); |
1112 | 1112 | tokens = role.substring(role.lastIndexOf("]")+1).split(","); |
1113 | | - // definition of namespaces, e.g. [0,1,2] |
| 1113 | + // definition of namespaces, e.g. [0,1,2] |
1114 | 1114 | if(ns.length() > 2 && ns.startsWith("[") && ns.endsWith("]")) |
1115 | 1115 | ns = ns.substring(1,ns.length()-1); |
1116 | 1116 | else |
1117 | 1117 | ns = "<default>"; |
1118 | 1118 | params.put("namespaces",ns); |
1119 | | - |
| 1119 | + |
1120 | 1120 | // all params are optional, if absent default will be used |
1121 | 1121 | if(tokens.length>1){ |
1122 | 1122 | String token = tokens[1].trim().toLowerCase(); |
— | — | @@ -1132,35 +1132,35 @@ |
1133 | 1133 | params.put("maxBufDocs", tokens[3]); |
1134 | 1134 | if(tokens.length>4) |
1135 | 1135 | params.put("subdivisions", tokens[4]); |
1136 | | - |
| 1136 | + |
1137 | 1137 | if(tokens.length>5 && verbose) |
1138 | 1138 | System.out.println("Unrecognized database parameters in ("+role+")"); |
1139 | | - |
| 1139 | + |
1140 | 1140 | dbroles.put(type,params); |
1141 | | - |
1142 | | - } else if(type.equals("spell")){ |
| 1141 | + |
| 1142 | + } else if(type.equals("spell")){ |
1143 | 1143 | // all params are optional, if absent default will be used |
1144 | 1144 | if(tokens.length>1) |
1145 | 1145 | params.put("wordsMinFreq",tokens[1]); |
1146 | 1146 | if(tokens.length>2) |
1147 | 1147 | params.put("phrasesMinFreq",tokens[2]); |
1148 | | - |
| 1148 | + |
1149 | 1149 | if(tokens.length>3 && verbose) |
1150 | 1150 | System.out.println("Unrecognized suggest parameters in ("+role+")"); |
1151 | | - |
| 1151 | + |
1152 | 1152 | dbroles.put(type,params); |
1153 | 1153 | } else if(type.equals("title_ngram")){ |
1154 | 1154 | // no params |
1155 | 1155 | if(tokens.length>1 && verbose) |
1156 | 1156 | System.out.println("Unrecognized title_ngram parameters in ("+role+")"); |
1157 | | - |
| 1157 | + |
1158 | 1158 | dbroles.put(type,params); |
1159 | 1159 | } else if(type.equals("prefix")){ |
1160 | 1160 | // no params |
1161 | 1161 | if(tokens.length>1 && verbose) |
1162 | 1162 | System.out.println("Unrecognized prefix parameters in ("+role+")"); |
1163 | | - |
1164 | | - dbroles.put(type,params); |
| 1163 | + |
| 1164 | + dbroles.put(type,params); |
1165 | 1165 | } else if(type.equals("titles_by_suffix") || type.equals("titles_grouped")){ |
1166 | 1166 | if(tokens.length>1) // number of segments |
1167 | 1167 | params.put("number",tokens[1]); |
— | — | @@ -1182,20 +1182,20 @@ |
1183 | 1183 | else |
1184 | 1184 | iw = suffix; |
1185 | 1185 | params.put(suffix,iw); |
1186 | | - } |
1187 | | - dbroles.put(type,params); |
| 1186 | + } |
| 1187 | + dbroles.put(type,params); |
1188 | 1188 | } else if(verbose){ |
1189 | 1189 | System.out.println("Warning: Unrecognized role \""+role+"\".Ignoring."); |
1190 | 1190 | } |
1191 | | - |
1192 | | - |
| 1191 | + |
| 1192 | + |
1193 | 1193 | // add dbroles to all given dbs |
1194 | 1194 | for(int i=0;i<dbs.length;i++){ |
1195 | 1195 | String db = dbs[i]; |
1196 | | - |
| 1196 | + |
1197 | 1197 | Hashtable<String, Hashtable<String, String>> dbr = database.get(db); |
1198 | 1198 | if(dbr == null){ |
1199 | | - dbr = new Hashtable<String, Hashtable<String, String>>(); |
| 1199 | + dbr = new Hashtable<String, Hashtable<String, String>>(); |
1200 | 1200 | database.put(db,dbr); |
1201 | 1201 | } |
1202 | 1202 | if(type.equals("split") || type.equals("mainsplit") || type.equals("single") || type.equals("nssplit")){ |
— | — | @@ -1215,7 +1215,7 @@ |
1216 | 1216 | * Returns if host should do some indexing |
1217 | 1217 | * @return true if this node is indexer |
1218 | 1218 | */ |
1219 | | - public boolean isIndexer(){ |
| 1219 | + public boolean isIndexer(){ |
1220 | 1220 | return index.get(hostAddr)!=null || index.get(hostName)!=null; |
1221 | 1221 | } |
1222 | 1222 | |
— | — | @@ -1226,20 +1226,20 @@ |
1227 | 1227 | public boolean isSearcher() { |
1228 | 1228 | return search.get(hostAddr)!=null || search.get(hostName)!=null; |
1229 | 1229 | } |
1230 | | - |
| 1230 | + |
1231 | 1231 | /** |
1232 | | - * Returns parameters of database, i.e. language, warmup ... |
1233 | | - * |
| 1232 | + * Returns parameters of database, i.e. language, warmup ... |
| 1233 | + * |
1234 | 1234 | * @param dbname |
1235 | 1235 | * @return Hashtable of parameters for dbname.type |
1236 | 1236 | */ |
1237 | 1237 | public Hashtable<String,String> getDBParams(String dbname, String type){ |
1238 | 1238 | return database.get(dbname).get(type); |
1239 | 1239 | } |
1240 | | - |
1241 | | - /** |
1242 | | - * Get integer parameter for dbname.type |
1243 | | - * Returns defaultValue if the param is not defined |
| 1240 | + |
| 1241 | + /** |
| 1242 | + * Get integer parameter for dbname.type |
| 1243 | + * Returns defaultValue if the param is not defined |
1244 | 1244 | */ |
1245 | 1245 | public int getIntDBParam(String dbname, String type, String param, int defaultValue){ |
1246 | 1246 | Hashtable<String,String> p = database.get(dbname).get(type); |
— | — | @@ -1249,10 +1249,10 @@ |
1250 | 1250 | else |
1251 | 1251 | return Integer.parseInt(val); |
1252 | 1252 | } |
1253 | | - |
1254 | | - /** |
1255 | | - * Get string parameter for dbname.type |
1256 | | - * Returns defaultValue if the param is not defined |
| 1253 | + |
| 1254 | + /** |
| 1255 | + * Get string parameter for dbname.type |
| 1256 | + * Returns defaultValue if the param is not defined |
1257 | 1257 | */ |
1258 | 1258 | public String getStringDBParam(String dbname, String type, String param, String defaultValue){ |
1259 | 1259 | Hashtable<String,String> p = database.get(dbname).get(type); |
— | — | @@ -1262,11 +1262,11 @@ |
1263 | 1263 | else |
1264 | 1264 | return val; |
1265 | 1265 | } |
1266 | | - |
| 1266 | + |
1267 | 1267 | /** |
1268 | 1268 | * Look at the logical DB structure ([Database] in global config) |
1269 | | - * and figure out the main index type of database (e.g. single, |
1270 | | - * mainpart, split). |
| 1269 | + * and figure out the main index type of database (e.g. single, |
| 1270 | + * mainpart, split). |
1271 | 1271 | * @param dbname |
1272 | 1272 | * @return lowercased db type |
1273 | 1273 | */ |
— | — | @@ -1276,10 +1276,10 @@ |
1277 | 1277 | String type = (String)e.nextElement(); |
1278 | 1278 | if(type.equals("single") || type.equals("mainsplit") || type.equals("split")) |
1279 | 1279 | return type; |
1280 | | - } |
| 1280 | + } |
1281 | 1281 | // global configuration consistency error |
1282 | 1282 | System.out.println("Database "+dbname+" does not have a specified type (eg single, mainsplit, split)."); |
1283 | | - return "unknown"; |
| 1283 | + return "unknown"; |
1284 | 1284 | } |
1285 | 1285 | |
1286 | 1286 | /** |
— | — | @@ -1289,7 +1289,7 @@ |
1290 | 1290 | public boolean isMyHost(String host) { |
1291 | 1291 | return host.equalsIgnoreCase(hostAddr) || host.equalsIgnoreCase(hostName); |
1292 | 1292 | } |
1293 | | - |
| 1293 | + |
1294 | 1294 | /** Get language for a dbname */ |
1295 | 1295 | public String getLanguage(String dbname) { |
1296 | 1296 | // first check explicit language paramter in global settings |
— | — | @@ -1310,40 +1310,40 @@ |
1311 | 1311 | for (String suffix : databaseSuffixes) { |
1312 | 1312 | if (dbname.endsWith(suffix)) |
1313 | 1313 | return dbname.substring(0, dbname.length() - suffix.length()); |
1314 | | - } |
| 1314 | + } |
1315 | 1315 | } |
1316 | | - |
| 1316 | + |
1317 | 1317 | return ""; |
1318 | 1318 | } |
1319 | | - |
| 1319 | + |
1320 | 1320 | /** All indexes that localhost is indexing */ |
1321 | 1321 | public HashSet<IndexId> getMyIndex(){ |
1322 | 1322 | HashSet<IndexId> ret = new HashSet<IndexId>(); |
1323 | | - |
| 1323 | + |
1324 | 1324 | for(IndexId iid : indexIdPool.values()){ |
1325 | 1325 | if(iid.isMyIndex()) |
1326 | 1326 | ret.add(iid); |
1327 | 1327 | } |
1328 | | - |
| 1328 | + |
1329 | 1329 | return ret; |
1330 | 1330 | } |
1331 | | - |
| 1331 | + |
1332 | 1332 | /** All indexed that localhost is searching */ |
1333 | 1333 | public HashSet<IndexId> getMySearch(){ |
1334 | 1334 | HashSet<IndexId> ret = new HashSet<IndexId>(); |
1335 | | - |
| 1335 | + |
1336 | 1336 | for(IndexId iid : indexIdPool.values()){ |
1337 | 1337 | if(iid.isMySearch()) |
1338 | 1338 | ret.add(iid); |
1339 | 1339 | } |
1340 | | - |
| 1340 | + |
1341 | 1341 | return ret; |
1342 | 1342 | } |
1343 | 1343 | /** Get all dbnames that are locally indexed */ |
1344 | 1344 | public ArrayList<String> getMyIndexDBnames(){ |
1345 | 1345 | HashSet<String> dbnames = new HashSet<String>(); |
1346 | 1346 | ArrayList<String> dbnamesSorted = new ArrayList<String>(); |
1347 | | - |
| 1347 | + |
1348 | 1348 | for(IndexId iid : indexIdPool.values()){ |
1349 | 1349 | if(iid.isMyIndex() && !iid.isTitlesBySuffix() && !iid.isSpell()) |
1350 | 1350 | dbnames.add(iid.getDBname().toString()); |
— | — | @@ -1352,7 +1352,7 @@ |
1353 | 1353 | Collections.sort(dbnamesSorted); |
1354 | 1354 | return dbnamesSorted; |
1355 | 1355 | } |
1356 | | - |
| 1356 | + |
1357 | 1357 | /** Get the name of the localhost as it appears in global configuration */ |
1358 | 1358 | public String getLocalhost(){ |
1359 | 1359 | if(index.get(hostAddr) != null || search.get(hostAddr) != null) |
— | — | @@ -1370,7 +1370,7 @@ |
1371 | 1371 | public String getNamespacePrefixAll() { |
1372 | 1372 | return namespacePrefixAll; |
1373 | 1373 | } |
1374 | | - |
| 1374 | + |
1375 | 1375 | /** Check wether dbname has some of the suffixes */ |
1376 | 1376 | protected boolean checkSuffix(String[] suffixes, String dbname){ |
1377 | 1377 | if(suffixes == null) |
— | — | @@ -1383,32 +1383,32 @@ |
1384 | 1384 | } |
1385 | 1385 | return false; |
1386 | 1386 | } |
1387 | | - |
| 1387 | + |
1388 | 1388 | /** If dbname should have additional rank boost */ |
1389 | 1389 | public boolean useAdditionalRank(String dbname){ |
1390 | 1390 | return checkSuffix(additionalRank,dbname); |
1391 | 1391 | } |
1392 | | - |
| 1392 | + |
1393 | 1393 | /** Returns if keyword scoring should be used for this db, using |
1394 | 1394 | * the suffixes from the global configuration |
1395 | | - * |
| 1395 | + * |
1396 | 1396 | * @param dbname |
1397 | 1397 | * @return |
1398 | 1398 | */ |
1399 | 1399 | public boolean useKeywordScoring(String dbname){ |
1400 | | - return checkSuffix(keywordScoringSuffixes,dbname); |
| 1400 | + return checkSuffix(keywordScoringSuffixes,dbname); |
1401 | 1401 | } |
1402 | | - |
| 1402 | + |
1403 | 1403 | /** |
1404 | | - * If this dbname is assigned an exact-case additional index. |
1405 | | - * |
| 1404 | + * If this dbname is assigned an exact-case additional index. |
| 1405 | + * |
1406 | 1406 | * @param dbname |
1407 | 1407 | * @return |
1408 | 1408 | */ |
1409 | 1409 | public boolean exactCaseIndex(String dbname){ |
1410 | | - return checkSuffix(exactCaseSuffix,dbname); |
| 1410 | + return checkSuffix(exactCaseSuffix,dbname); |
1411 | 1411 | } |
1412 | | - |
| 1412 | + |
1413 | 1413 | /** Find (longest) suffix that matches dbname */ |
1414 | 1414 | public String findSuffix(String[] suffixes, String dbname){ |
1415 | 1415 | if(suffixes == null) |
— | — | @@ -1438,7 +1438,7 @@ |
1439 | 1439 | return ret; |
1440 | 1440 | return null; |
1441 | 1441 | } |
1442 | | - |
| 1442 | + |
1443 | 1443 | public AgeScaling getAgeScaling(String dbname){ |
1444 | 1444 | String strong = findSuffix(ageScalingStrong,dbname); |
1445 | 1445 | String medium = findSuffix(ageScalingMedium,dbname); |
— | — | @@ -1452,10 +1452,10 @@ |
1453 | 1453 | return AgeScaling.MEDIUM; |
1454 | 1454 | if(w>=s && s>=m) |
1455 | 1455 | return AgeScaling.WEAK; |
1456 | | - |
| 1456 | + |
1457 | 1457 | return AgeScaling.NONE; |
1458 | 1458 | } |
1459 | | - |
| 1459 | + |
1460 | 1460 | /** Get OAI-repo url for dbname */ |
1461 | 1461 | public String getOAIRepo(String dbname){ |
1462 | 1462 | String repo = null; |
— | — | @@ -1474,7 +1474,7 @@ |
1475 | 1475 | repo += "/"; |
1476 | 1476 | repo += "w/index.php"; // FIXME: we take this as generic path to index.php |
1477 | 1477 | } |
1478 | | - |
| 1478 | + |
1479 | 1479 | } |
1480 | 1480 | // get from global config |
1481 | 1481 | if(repo == null){ |
— | — | @@ -1485,13 +1485,13 @@ |
1486 | 1486 | repo = oaiRepo.get("<default>"); |
1487 | 1487 | } |
1488 | 1488 | if(repo == null) |
1489 | | - return ""; // failed, no url |
1490 | | - |
| 1489 | + return ""; // failed, no url |
| 1490 | + |
1491 | 1491 | // process $lang |
1492 | 1492 | String lang = getLanguage(dbname); |
1493 | 1493 | repo = repo.replace("$lang",lang.replace('_','-')); |
1494 | 1494 | repo = repo += "?title=Special:OAIRepository"; |
1495 | | - |
| 1495 | + |
1496 | 1496 | return repo; |
1497 | 1497 | } |
1498 | 1498 | |
— | — | @@ -1502,7 +1502,7 @@ |
1503 | 1503 | public static void setVerbose(boolean verbose) { |
1504 | 1504 | GlobalConfiguration.verbose = verbose; |
1505 | 1505 | } |
1506 | | - |
| 1506 | + |
1507 | 1507 | public NamespaceFilter getDefaultNamespace(IndexId iid){ |
1508 | 1508 | return getDefaultNamespace(iid.getDBname()); |
1509 | 1509 | } |
— | — | @@ -1515,7 +1515,7 @@ |
1516 | 1516 | } |
1517 | 1517 | return new NamespaceFilter(0); |
1518 | 1518 | } |
1519 | | - |
| 1519 | + |
1520 | 1520 | public NamespaceFilter getContentNamespaces(IndexId iid){ |
1521 | 1521 | return getContentNamespaces(iid.getDBname()); |
1522 | 1522 | } |
— | — | @@ -1528,7 +1528,7 @@ |
1529 | 1529 | } |
1530 | 1530 | return new NamespaceFilter(0); |
1531 | 1531 | } |
1532 | | - |
| 1532 | + |
1533 | 1533 | public NamespaceFilter getNamespacesWithSubpages(String dbname){ |
1534 | 1534 | if(wgNamespacesWithSubpages != null){ |
1535 | 1535 | if(wgNamespacesWithSubpages.containsKey(dbname)) |
— | — | @@ -1538,7 +1538,7 @@ |
1539 | 1539 | } |
1540 | 1540 | return new NamespaceFilter(2); |
1541 | 1541 | } |
1542 | | - |
| 1542 | + |
1543 | 1543 | public ArticleNamespaceScaling getNamespaceScaling(String dbname){ |
1544 | 1544 | if(namespaceBoost.containsKey(dbname)) |
1545 | 1545 | return namespaceBoost.get(dbname); |
— | — | @@ -1547,19 +1547,19 @@ |
1548 | 1548 | else |
1549 | 1549 | return new ArticleNamespaceScaling(new HashMap<Integer,Float>()); |
1550 | 1550 | } |
1551 | | - |
| 1551 | + |
1552 | 1552 | public HashSet<String> getSmartInterwikiCodes() { |
1553 | 1553 | return smartInterwikiCodes; |
1554 | 1554 | } |
1555 | | - |
| 1555 | + |
1556 | 1556 | public boolean hasCommonsWiki(){ |
1557 | 1557 | return commonsWiki != null; |
1558 | 1558 | } |
1559 | | - |
| 1559 | + |
1560 | 1560 | public IndexId getCommonsWiki(){ |
1561 | 1561 | return IndexId.get(commonsWiki); |
1562 | 1562 | } |
1563 | | - |
| 1563 | + |
1564 | 1564 | /** Get all searchers (NOTE: this is kindof slow...) */ |
1565 | 1565 | public HashSet<String> getAllSearchHosts(){ |
1566 | 1566 | HashSet<String> hosts = new HashSet<String>(); |
— | — | @@ -1577,11 +1577,11 @@ |
1578 | 1578 | public int getMaxSearchOffset() { |
1579 | 1579 | return maxSearchOffset; |
1580 | 1580 | } |
1581 | | - |
| 1581 | + |
1582 | 1582 | public String getIndexPath(){ |
1583 | 1583 | return indexPath; |
1584 | 1584 | } |
1585 | 1585 | |
1586 | | - |
1587 | 1586 | |
| 1587 | + |
1588 | 1588 | } |
\ No newline at end of file |
Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/search/NamespaceFilter.java |
— | — | @@ -1,24 +1,24 @@ |
2 | 2 | /* |
3 | 3 | * Copyright 2005 Brion Vibber |
4 | | - * |
5 | | - * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 4 | + * |
| 5 | + * Permission is hereby granted, free of charge, to any person obtaining a copy |
6 | 6 | * of this software and associated documentation files (the "Software"), to deal |
7 | | - * in the Software without restriction, including without limitation the rights |
8 | | - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
9 | | - * copies of the Software, and to permit persons to whom the Software is |
| 7 | + * in the Software without restriction, including without limitation the rights |
| 8 | + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 9 | + * copies of the Software, and to permit persons to whom the Software is |
10 | 10 | * furnished to do so, subject to the following conditions: |
11 | 11 | * |
12 | | - * The above copyright notice and this permission notice shall be included in |
| 12 | + * The above copyright notice and this permission notice shall be included in |
13 | 13 | * all copies or substantial portions of the Software. |
14 | 14 | * |
15 | | - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | | - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | | - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
18 | | - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 15 | + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 18 | + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
20 | 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
21 | 21 | * SOFTWARE. |
22 | | - * |
| 22 | + * |
23 | 23 | * $Id: NamespaceFilter.java 8398 2005-04-17 06:21:19Z vibber $ |
24 | 24 | */ |
25 | 25 | |
— | — | @@ -34,16 +34,16 @@ |
35 | 35 | * to filter */ |
36 | 36 | public class NamespaceFilter implements Serializable { |
37 | 37 | private BitSet included; |
38 | | - |
| 38 | + |
39 | 39 | protected void init(){ |
40 | 40 | included = new BitSet(64); |
41 | 41 | } |
42 | | - |
| 42 | + |
43 | 43 | /** "all" filter */ |
44 | 44 | public NamespaceFilter() { |
45 | 45 | init(); |
46 | 46 | } |
47 | | - |
| 47 | + |
48 | 48 | /** filter namespaces */ |
49 | 49 | public NamespaceFilter(Collection<Integer> namespaces){ |
50 | 50 | init(); |
— | — | @@ -66,7 +66,7 @@ |
67 | 67 | } |
68 | 68 | } |
69 | 69 | } |
70 | | - |
| 70 | + |
71 | 71 | /** Decompose this filter into an array of single-namespace filters, do OR to construct */ |
72 | 72 | public ArrayList<NamespaceFilter> decompose(){ |
73 | 73 | ArrayList<NamespaceFilter> dec = new ArrayList<NamespaceFilter>(); |
— | — | @@ -75,7 +75,7 @@ |
76 | 76 | } |
77 | 77 | return dec; |
78 | 78 | } |
79 | | - |
| 79 | + |
80 | 80 | public HashSet<Integer> getNamespaces(){ |
81 | 81 | HashSet<Integer> ret = new HashSet<Integer>(); |
82 | 82 | if(included.cardinality() == 0) |
— | — | @@ -85,7 +85,7 @@ |
86 | 86 | } |
87 | 87 | return ret; |
88 | 88 | } |
89 | | - |
| 89 | + |
90 | 90 | public ArrayList<Integer> getNamespacesOrdered(){ |
91 | 91 | ArrayList<Integer> ret = new ArrayList<Integer>(); |
92 | 92 | if(included.cardinality() == 0) |
— | — | @@ -95,36 +95,36 @@ |
96 | 96 | } |
97 | 97 | return ret; |
98 | 98 | } |
99 | | - |
| 99 | + |
100 | 100 | public boolean filter(String namespace) { |
101 | 101 | return filter(Integer.parseInt(namespace)); |
102 | 102 | } |
103 | | - |
| 103 | + |
104 | 104 | public boolean filter(int namespace) { |
105 | 105 | return included.get(namespace); |
106 | 106 | } |
107 | | - |
| 107 | + |
108 | 108 | /** Set bit for namespace to true */ |
109 | 109 | public void set(int namespace){ |
110 | 110 | included.set(namespace); |
111 | 111 | } |
112 | | - |
| 112 | + |
113 | 113 | /** Set bit for namespace to false */ |
114 | 114 | public void unset(int namespace){ |
115 | 115 | included.set(namespace,false); |
116 | 116 | } |
117 | | - |
| 117 | + |
118 | 118 | public boolean contains(int namespace){ |
119 | 119 | if(namespace < 0) |
120 | 120 | return false; |
121 | 121 | else |
122 | 122 | return included.get(namespace); |
123 | 123 | } |
124 | | - |
| 124 | + |
125 | 125 | public boolean contains(String namespace){ |
126 | 126 | return contains(Integer.parseInt(namespace)); |
127 | 127 | } |
128 | | - |
| 128 | + |
129 | 129 | public BitSet getIncluded() { |
130 | 130 | return included; |
131 | 131 | } |
— | — | @@ -132,16 +132,16 @@ |
133 | 133 | public int cardinality(){ |
134 | 134 | return included.cardinality(); |
135 | 135 | } |
136 | | - |
| 136 | + |
137 | 137 | public int getNamespace(){ |
138 | | - return included.nextSetBit(0); |
| 138 | + return included.nextSetBit(0); |
139 | 139 | } |
140 | | - |
| 140 | + |
141 | 141 | /** if empty filter ("all" keyword") */ |
142 | 142 | public boolean isAll(){ |
143 | | - return cardinality() == 0; |
| 143 | + return cardinality() == 0; |
144 | 144 | } |
145 | | - |
| 145 | + |
146 | 146 | @Override |
147 | 147 | public String toString() { |
148 | 148 | return included.toString(); |
— | — | @@ -173,5 +173,5 @@ |
174 | 174 | } |
175 | 175 | |
176 | 176 | |
177 | | - |
| 177 | + |
178 | 178 | } |
Index: trunk/lucene-search-2/src/org/wikimedia/lsearch/util/UnicodeDecomposer.java |
— | — | @@ -13,11 +13,11 @@ |
14 | 14 | import org.wikimedia.lsearch.config.Configuration; |
15 | 15 | import org.wikimedia.lsearch.index.IndexThread; |
16 | 16 | /** |
17 | | - * Implements a simplistic unicode decomposer. By default will use |
| 17 | + * Implements a simplistic unicode decomposer. By default will use |
18 | 18 | * unicode data from lib/UnicodeData.txt. The decomposer attempts |
19 | 19 | * to decompose every character into compatible letters, for instance |
20 | 20 | * š will be decomposed to s. Wile fi will be decomposed into f and i. |
21 | | - * |
| 21 | + * |
22 | 22 | * @author rainman |
23 | 23 | * |
24 | 24 | */ |
— | — | @@ -33,13 +33,13 @@ |
34 | 34 | if(len<buffer.length) |
35 | 35 | buffer[len++] = ch; |
36 | 36 | } |
37 | | - |
| 37 | + |
38 | 38 | } |
39 | 39 | static org.apache.log4j.Logger log = Logger.getLogger(UnicodeDecomposer.class); |
40 | 40 | final protected static char[][] decomposition = new char[65536][]; |
41 | 41 | final protected static boolean[] combining = new boolean[65536]; |
42 | 42 | protected static UnicodeDecomposer instance = null; |
43 | | - |
| 43 | + |
44 | 44 | /** |
45 | 45 | * Get decomposing <b>letter</b> characters |
46 | 46 | * @param ch |
— | — | @@ -48,17 +48,17 @@ |
49 | 49 | public char[] decompose(char ch){ |
50 | 50 | return decomposition[ch]; |
51 | 51 | } |
52 | | - |
| 52 | + |
53 | 53 | protected UnicodeDecomposer(String resource){ |
54 | 54 | initFromResource(resource); |
55 | 55 | log.debug("Loaded unicode decomposer"); |
56 | 56 | } |
57 | | - |
58 | | - |
| 57 | + |
| 58 | + |
59 | 59 | public boolean isCombiningChar(char ch){ |
60 | 60 | return combining[ch]; |
61 | 61 | } |
62 | | - |
| 62 | + |
63 | 63 | /** |
64 | 64 | * Get singleton instance of the Unicode decomposer class. |
65 | 65 | * Loads lib/UnicodeData.txt on first call |
— | — | @@ -68,10 +68,10 @@ |
69 | 69 | if(instance == null){ |
70 | 70 | instance = new UnicodeDecomposer("/UnicodeData.txt"); |
71 | 71 | } |
72 | | - |
| 72 | + |
73 | 73 | return instance; |
74 | 74 | } |
75 | | - |
| 75 | + |
76 | 76 | protected void initFromResource(String resource){ |
77 | 77 | BitSet letters = new BitSet(65536); |
78 | 78 | try { |
— | — | @@ -89,30 +89,30 @@ |
90 | 90 | continue; // ignore any additional chars |
91 | 91 | if(parts[2].charAt(0) == 'L') |
92 | 92 | letters.set(chVal); |
93 | | - |
| 93 | + |
94 | 94 | if(parts[2].charAt(0) == 'M') |
95 | 95 | combining[chVal] = true; |
96 | 96 | else |
97 | 97 | combining[chVal] = false; |
98 | 98 | } |
99 | 99 | in.close(); |
100 | | - |
| 100 | + |
101 | 101 | // add some exception requested by users |
102 | 102 | // yiddish stuffs |
103 | 103 | combining[0x05B7] = true; |
104 | 104 | combining[0x05B8] = true; |
105 | 105 | combining[0x05BC] = true; |
106 | 106 | combining[0x05BF] = true; |
107 | | - |
| 107 | + |
108 | 108 | // decomposition table |
109 | 109 | char[][] table = new char[65536][]; |
110 | | - |
| 110 | + |
111 | 111 | // default for all chars: no decomposition |
112 | 112 | for(int ich = 0; ich <= 0xFFFF; ich++){ |
113 | 113 | decomposition[ich]=null; |
114 | 114 | table[ich]=null; |
115 | 115 | } |
116 | | - |
| 116 | + |
117 | 117 | // second pass, make the decomposition table |
118 | 118 | in = new BufferedReader(new InputStreamReader(UnicodeDecomposer.class.getResourceAsStream(resource))); |
119 | 119 | while((line = in.readLine()) != null){ |
— | — | @@ -138,32 +138,32 @@ |
139 | 139 | table[ch]= new char[len]; |
140 | 140 | for(i=0;i<len;i++) |
141 | 141 | table[ch][i] = buf[i]; |
142 | | - } |
143 | | - } |
| 142 | + } |
| 143 | + } |
144 | 144 | } |
145 | | - |
| 145 | + |
146 | 146 | // some decomposition exceptions |
147 | 147 | // yiddish stuffs |
148 | 148 | table[0x05F0] = new char[2]; // HEBREW LIGATURE YIDDISH DOUBLE VAV |
149 | 149 | table[0x05F0][0] = 0x05D5; |
150 | 150 | table[0x05F0][1] = 0x05D5; |
151 | | - |
| 151 | + |
152 | 152 | table[0x05F1] = new char[2]; // HEBREW LIGATURE YIDDISH VAV YOD |
153 | 153 | table[0x05F1][0] = 0x05D5; |
154 | 154 | table[0x05F1][1] = 0x05D9; |
155 | | - |
| 155 | + |
156 | 156 | table[0x05F2] = new char[2]; // HEBREW LIGATURE YIDDISH DOUBLE YOD |
157 | 157 | table[0x05F2][0] = 0x05D9; |
158 | 158 | table[0x05F2][1] = 0x05D9; |
159 | | - |
| 159 | + |
160 | 160 | table[0xFB1F] = new char[2]; // HEBREW LIGATURE YIDDISH YOD YOD PATAH |
161 | 161 | table[0xFB1F][0] = 0x05D9; |
162 | 162 | table[0xFB1F][1] = 0x05D9; |
163 | | - |
| 163 | + |
164 | 164 | table[0xFB1D] = new char[1]; // HEBREW LETTER YOD WITH HIRIQ |
165 | 165 | table[0xFB1D][0] = 0x05D9; |
166 | | - |
167 | | - |
| 166 | + |
| 167 | + |
168 | 168 | // using decomposition table recursively decompose characters |
169 | 169 | for(int ich = 0; ich <= 0xFFFF; ich++){ |
170 | 170 | if(table[ich]==null) |
— | — | @@ -174,7 +174,7 @@ |
175 | 175 | decomposition[ich]= new char[buffer.len]; |
176 | 176 | for(i=0;i<buffer.len;i++) |
177 | 177 | decomposition[ich][i] = buffer.buffer[i]; |
178 | | - } |
| 178 | + } |
179 | 179 | } |
180 | 180 | in.close(); |
181 | 181 | } catch (IOException e) { |
— | — | @@ -188,7 +188,7 @@ |
189 | 189 | |
190 | 190 | /** |
191 | 191 | * Depth-first recursion, gradually decompose characters (if it has many diacritics) |
192 | | - * |
| 192 | + * |
193 | 193 | * @param buf - buffer where to write resulting decompositions |
194 | 194 | * @param table - mapping char -> decomposing letters |
195 | 195 | * @param letters - bitset of letter characters |