Index: branches/lucene-search-2.1/src/org/apache/lucene/analysis/KStemData7.java |
— | — | @@ -231,7 +231,7 @@ |
232 | 232 | "shameless","shammy","shampoo","shamrock","shandy",
|
233 | 233 | "shanghai","shank","shantung","shanty","shantytown",
|
234 | 234 | "shape","shaped","shapely","shard","share",
|
235 | | -"sharecropper","shareholder","shares","shark","sharkskin",
|
| 235 | +"sharecropper","shareholder","shark","sharkskin",
|
236 | 236 | "sharp","sharpen","sharpener","sharper","sharpshooter",
|
237 | 237 | "shatter","shave","shaver","shaving","shawl",
|
238 | 238 | "shay","she","sheaf","shear","shears",
|
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/IndexDaemon.java |
— | — | @@ -62,7 +62,7 @@ |
63 | 63 | snapshotPrecursors("","true"); |
64 | 64 | } |
65 | 65 | public void snapshotPrecursors(String pattern){ |
66 | | - indexer.makeSnapshotsNow(false,pattern,true); |
| 66 | + indexer.makeSnapshotsNow(true,pattern,true); |
67 | 67 | } |
68 | 68 | |
69 | 69 | public void snapshotPrecursors(String pattern, String optimize){ |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/interoperability/RMIMessengerImpl.java |
— | — | @@ -61,11 +61,16 @@ |
62 | 62 | long[] timestamps = new long[dbroles.length]; |
63 | 63 | int i=0; |
64 | 64 | for(String dbrole : dbroles){ |
65 | | - LocalIndex li = indexRegistry.getLatestSnapshot(IndexId.get(dbrole)); |
66 | | - if(li != null) |
67 | | - timestamps[i++] = li.timestamp; |
68 | | - else |
| 65 | + try{ |
| 66 | + LocalIndex li = indexRegistry.getLatestSnapshot(IndexId.get(dbrole)); |
| 67 | + if(li != null) |
| 68 | + timestamps[i++] = li.timestamp; |
| 69 | + else |
| 70 | + timestamps[i++] = 0; |
| 71 | + } catch(RuntimeException e){ |
| 72 | + log.warn("Error getting snapshot for index "+dbrole, e); |
69 | 73 | timestamps[i++] = 0; |
| 74 | + } |
70 | 75 | } |
71 | 76 | log.debug(" <-/ replying: "+Arrays.toString(timestamps)); |
72 | 77 | return timestamps; |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/UpdateThread.java |
— | — | @@ -271,7 +271,7 @@ |
272 | 272 | // rsync |
273 | 273 | log.info("Starting rsync of "+iid); |
274 | 274 | String snapshotpath = iid.getRsyncSnapshotPath()+"/"+li.timestamp; |
275 | | - Command.exec(rsyncPath+" "+rsyncParams+" -W --delete -r rsync://"+iid.getIndexHost()+snapshotpath+" "+iid.getUpdatePath()); |
| 275 | + Command.exec(rsyncPath+" "+rsyncParams+" -W --delete -u -t -r rsync://"+iid.getIndexHost()+snapshotpath+" "+iid.getUpdatePath()); |
276 | 276 | log.info("Finished rsync of "+iid+" in "+(System.currentTimeMillis()-startTime)+" ms"); |
277 | 277 | |
278 | 278 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/search/SearcherCache.java |
— | — | @@ -121,7 +121,8 @@ |
122 | 122 | } |
123 | 123 | for(int i=0;i<reader.maxDoc();i++){ |
124 | 124 | for(CacheBuilder b : builders){ |
125 | | - b.cache(i,reader.document(i)); |
| 125 | + if( !reader.isDeleted(i) ) |
| 126 | + b.cache(i,reader.document(i)); |
126 | 127 | } |
127 | 128 | } |
128 | 129 | for(CacheBuilder b : builders){ |
— | — | @@ -504,12 +505,20 @@ |
505 | 506 | ArrayList<InitialDeployer> threads = new ArrayList<InitialDeployer>(); |
506 | 507 | |
507 | 508 | // divide mys list into chunks and assign them to different worker threads |
508 | | - int inc = mys.size() / threadNum + 1; |
509 | | - int start = 0; |
| 509 | + float inc = (float)mys.size() / threadNum; |
| 510 | + if( inc < 1 ) |
| 511 | + inc = 1; |
| 512 | + float start = 0; |
510 | 513 | for(int i=0;i<threadNum;i++){ |
511 | | - threads.add(new InitialDeployer( |
512 | | - mys.subList(start, Math.min(start+inc, mys.size())))); |
| 514 | + int end = Math.min((int)(start+inc), mys.size()); |
| 515 | + if( i == threadNum-1 ) |
| 516 | + end = mys.size(); // take rest of the list |
| 517 | + |
| 518 | + threads.add(new InitialDeployer( mys.subList((int)(start), end) )); |
513 | 519 | start += inc; |
| 520 | + // config error, too many threads |
| 521 | + if( start >= mys.size()) |
| 522 | + break; |
514 | 523 | } |
515 | 524 | |
516 | 525 | // start all threads |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/IndexThread.java |
— | — | @@ -106,13 +106,13 @@ |
107 | 107 | boolean optimize = true; |
108 | 108 | |
109 | 109 | public Pattern(boolean optimize, String pattern, boolean forPrecursors){ |
110 | | - this(pattern,forPrecursors,false); |
111 | | - this.optimize = optimize; |
| 110 | + this(pattern,forPrecursors,false,optimize); |
112 | 111 | } |
113 | | - public Pattern(String pattern, boolean forPrecursors, boolean not){ |
| 112 | + public Pattern(String pattern, boolean forPrecursors, boolean not, boolean optimize){ |
114 | 113 | this.pattern = pattern; |
115 | 114 | this.forPrecursors = forPrecursors; |
116 | 115 | this.not = not; |
| 116 | + this.optimize = optimize; |
117 | 117 | } |
118 | 118 | @Override |
119 | 119 | public int hashCode() { |
— | — | @@ -218,7 +218,7 @@ |
219 | 219 | ArrayList<Pattern> rawPatterns = new ArrayList<Pattern>(); |
220 | 220 | synchronized (snapshotPatterns) { |
221 | 221 | for(Pattern p : snapshotPatterns){ // convert wildcards into regexp |
222 | | - pat.add(new Pattern(StringUtils.wildcardToRegexp(p.pattern),p.forPrecursors,p.pattern.startsWith("^"))); |
| 222 | + pat.add(new Pattern(StringUtils.wildcardToRegexp(p.pattern),p.forPrecursors,p.pattern.startsWith("^"),p.optimize)); |
223 | 223 | rawPatterns.add(p); |
224 | 224 | } |
225 | 225 | snapshotPatterns.clear(); |
— | — | @@ -247,11 +247,13 @@ |
248 | 248 | try{ |
249 | 249 | if(iid.isLogical()) |
250 | 250 | continue; |
251 | | - if(matchesPattern(pat,iid)){ |
| 251 | + Pattern p = matchesPattern(pat,iid); |
| 252 | + if( p != null){ |
252 | 253 | // enforce outer transaction lock to connect optimization & snapshot |
253 | 254 | lock = iid.getTransactionLock(IndexId.Transaction.INDEX); |
254 | 255 | lock.lock(); |
255 | | - optimizeIndex(iid); |
| 256 | + if( p.optimize ) |
| 257 | + optimizeIndex(iid); |
256 | 258 | makeIndexSnapshot(iid,iid.getIndexPath()); |
257 | 259 | lock.unlock(); |
258 | 260 | lock = null; |
— | — | @@ -269,7 +271,7 @@ |
270 | 272 | for( IndexId iid : indexes ){ |
271 | 273 | if(iid.isLogical() || badOptimization.contains(iid)) |
272 | 274 | continue; |
273 | | - if(matchesPattern(pat,iid)){ |
| 275 | + if(matchesPattern(pat,iid) != null){ |
274 | 276 | |
275 | 277 | registry.refreshSnapshots(iid); |
276 | 278 | } |
— | — | @@ -281,16 +283,17 @@ |
282 | 284 | } |
283 | 285 | } |
284 | 286 | |
285 | | - private boolean matchesPattern(ArrayList<Pattern> pat, IndexId iid) { |
| 287 | + /** Returns the matching pattern or null if none is matching */ |
| 288 | + private Pattern matchesPattern(ArrayList<Pattern> pat, IndexId iid) { |
286 | 289 | String string = iid.toString(); |
287 | 290 | for(Pattern p : pat){ |
288 | 291 | if((iid.isPrecursor() && !p.forPrecursors) ||(!iid.isPrecursor() && p.forPrecursors)) |
289 | 292 | continue; |
290 | 293 | boolean match = p.pattern.equals("")? true : string.matches(p.pattern); |
291 | 294 | if((match && !p.not) || (!match && p.not)) |
292 | | - return true; |
| 295 | + return p; |
293 | 296 | } |
294 | | - return false; |
| 297 | + return null; |
295 | 298 | } |
296 | 299 | |
297 | 300 | public static void makeIndexSnapshot(IndexId iid, String indexPath){ |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/analyzers/WikiQueryParser.java |
— | — | @@ -644,7 +644,7 @@ |
645 | 645 | // check if it's a valid field |
646 | 646 | String f = new String(buffer,0,length); |
647 | 647 | |
648 | | - List fieldOperators = getFieldOperators(); |
| 648 | + List<String> fieldOperators = getFieldOperators(); |
649 | 649 | |
650 | 650 | if( f.equals(namespaceAllKeyword) |
651 | 651 | || fieldOperators.contains(f) |
— | — | @@ -661,8 +661,8 @@ |
662 | 662 | return TokenType.WORD; |
663 | 663 | } |
664 | 664 | |
665 | | - private List getFieldOperators() { |
666 | | - List fieldOperators = new ArrayList(); |
| 665 | + private List<String> getFieldOperators() { |
| 666 | + List<String> fieldOperators = new ArrayList<String>(); |
667 | 667 | fieldOperators.add("intitle"); |
668 | 668 | fieldOperators.add("incategory"); |
669 | 669 | fieldOperators.add("inthread"); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/IncrementalUpdater.java |
— | — | @@ -102,6 +102,9 @@ |
103 | 103 | HashSet<String> firstPass = new HashSet<String>(); // if dbname is here, then it's our update pass |
104 | 104 | String defaultTimestamp = "2001-01-01"; |
105 | 105 | boolean errors = false; |
| 106 | + boolean requestSnapshot = false; |
| 107 | + String noOptimizationDBlistFile = null; |
| 108 | + HashSet<String> noOptimizationDBs = new HashSet<String>(); |
106 | 109 | |
107 | 110 | // args |
108 | 111 | for(int i=0; i<args.length; i++){ |
— | — | @@ -123,6 +126,10 @@ |
124 | 127 | excludeFile = args[++i]; |
125 | 128 | else if(args[i].equals("-n")) |
126 | 129 | notification = true; |
| 130 | + else if(args[i].equals("-sn")) |
| 131 | + requestSnapshot = true; |
| 132 | + else if(args[i].equals("-nof")) |
| 133 | + noOptimizationDBlistFile = args[++i]; |
127 | 134 | else if(args[i].equals("--help")) |
128 | 135 | break; |
129 | 136 | else if(args[i].startsWith("-")){ |
— | — | @@ -135,6 +142,9 @@ |
136 | 143 | dbnames.addAll(global.getMyIndexDBnames()); |
137 | 144 | dbnames.addAll(readDBList(dblist)); |
138 | 145 | excludeList.addAll(readDBList(excludeFile)); |
| 146 | + if( noOptimizationDBlistFile != null) |
| 147 | + noOptimizationDBs.addAll(readDBList(noOptimizationDBlistFile)); |
| 148 | + |
139 | 149 | if(dbnames.size() == 0){ |
140 | 150 | System.out.println("Syntax: java IncrementalUpdater [-d] [-s sleep] [-t timestamp] [-e dbname] [-f dblist] [-n] [--no-ranks] dbname1 dbname2 ..."); |
141 | 151 | System.out.println("Options:"); |
— | — | @@ -147,7 +157,8 @@ |
148 | 158 | System.out.println(" -n - wait for notification of flush after done updating one db (default: "+notification+")"); |
149 | 159 | System.out.println(" -e - exclude dbname from incremental updates (overrides -f)"); |
150 | 160 | System.out.println(" -ef - exclude db names listed in dblist file"); |
151 | | - |
| 161 | + System.out.println(" -sn - immediately make unoptimized snapshot as updates finish "); |
| 162 | + System.out.println(" -nof - use with -sn to specify a file with databases not to be optimized"); |
152 | 163 | return; |
153 | 164 | } |
154 | 165 | // preload |
— | — | @@ -232,10 +243,10 @@ |
233 | 244 | String host = iid.getIndexHost(); |
234 | 245 | boolean req = messenger.requestFlushAndNotify(dbname,host); |
235 | 246 | if(req){ |
236 | | - log.info("Waiting for flush notification"); |
| 247 | + log.info("Waiting for flush notification for "+dbname); |
237 | 248 | Boolean succ = null; |
238 | 249 | do{ |
239 | | - Thread.sleep(3000); |
| 250 | + Thread.sleep(1500); |
240 | 251 | succ = messenger.isSuccessfulFlush(dbname,host); |
241 | 252 | if(succ != null){ |
242 | 253 | if(succ){ |
— | — | @@ -248,6 +259,21 @@ |
249 | 260 | } |
250 | 261 | } |
251 | 262 | } while(succ == null); |
| 263 | + if(requestSnapshot){ |
| 264 | + boolean optimize = !noOptimizationDBs.contains(dbname); |
| 265 | + // snapshot the content and highlight indexes without optimizing them |
| 266 | + String p = dbname+"|"+dbname+".pa*|"+dbname+".ns*|"+dbname+".h*"; |
| 267 | + messenger.requestSnapshotAndNotify(host, optimize, p, false); |
| 268 | + log.info("Waiting for snapshot notification for "+dbname); |
| 269 | + while( !messenger.snapshotFinished(host,optimize,p,false) ){ |
| 270 | + try { |
| 271 | + Thread.sleep(1500); |
| 272 | + } catch (InterruptedException e) { |
| 273 | + log.warn("Interrupted", e); |
| 274 | + } |
| 275 | + } |
| 276 | + log.info("Snapshot of "+dbname+" successful"); |
| 277 | + } |
252 | 278 | } else |
253 | 279 | continue main_loop; |
254 | 280 | } |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/OAIHarvester.java |
— | — | @@ -58,6 +58,7 @@ |
59 | 59 | } |
60 | 60 | |
61 | 61 | protected void read(URL url) throws IOException { |
| 62 | + log.info("Reading records from "+url); |
62 | 63 | collector = new IndexUpdatesCollector(iid); |
63 | 64 | InputStream in = new BufferedInputStream(url.openStream()); |
64 | 65 | parser = new OAIParser(in,collector); |
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/util/Configure.java |
— | — | @@ -84,7 +84,7 @@ |
85 | 85 | return Command.exec(new String[] { |
86 | 86 | "/bin/bash", |
87 | 87 | "-c", |
88 | | - "cd "+mediawiki+" && (echo \"return \\$"+var+"\" | php maintenance/eval.php)"}).trim(); |
| 88 | + "cd "+mediawiki+" && (echo \"return \\$"+var+"\" | php maintenance/eval.php | sed -e 's/^> // ; /^$/d')"}).trim(); |
89 | 89 | } |
90 | 90 | |
91 | 91 | /** create config file from template, replacing variables |
Index: branches/lucene-search-2.1/lib/mwdumper.jar |
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |