r41273 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r41272‎ | r41273 | r41274 >
Date:22:48, 25 September 2008
Author:rainman
Status:old
Tags:
Comment:
Incremental updater retest:
* allow inc updater to start new db from scratch
* always report localization directly to indexer
Modified paths:
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/SearchDaemon.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/Transaction.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/WikiIndexModifier.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/interoperability/RMIMessenger.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/interoperability/RMIMessengerClient.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/interoperability/RMIMessengerImpl.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/IndexUpdatesCollector.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/ranks/Links.java (modified) (history)
  • /branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/CleanIndexWriter.java (modified) (history)

Diff [purge]

Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/frontend/SearchDaemon.java
@@ -373,7 +373,7 @@
374374 */
375375 private void sendResultLine(double score, String namespace, String title) {
376376 try{
377 - sendOutputLine(score + " " + namespace + " " + encodeTitle(title));
 377+ sendOutputLine((float)score + " " + namespace + " " + encodeTitle(title));
378378 } catch(Exception e){
379379 log.error("Error sending result line ("+score + " " + namespace + " " + title +"): "+e.getMessage());
380380 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/spell/CleanIndexWriter.java
@@ -91,16 +91,20 @@
9292 Transaction trans = new Transaction(iid, IndexId.Transaction.INDEX);
9393 trans.begin();
9494 try{
95 - IndexReader reader = IndexReader.open(iid.getIndexPath());
96 - // batch delete
97 - for(IndexUpdateRecord rec : records){
98 - if(rec.doDelete()){
99 - Article a = rec.getArticle();
100 - log.debug(iid+": Deleting "+a);
101 - reader.deleteDocuments(new Term("key",rec.getIndexKey()));
 95+ try{
 96+ IndexReader reader = IndexReader.open(iid.getIndexPath());
 97+ // batch delete
 98+ for(IndexUpdateRecord rec : records){
 99+ if(rec.doDelete()){
 100+ Article a = rec.getArticle();
 101+ log.debug(iid+": Deleting "+a);
 102+ reader.deleteDocuments(new Term("key",rec.getIndexKey()));
 103+ }
102104 }
 105+ reader.close();
 106+ } catch(Exception e){
 107+ log.warn("Error opening for batch update read "+iid+" : "+e.getMessage());
103108 }
104 - reader.close();
105109 // batch add
106110 openWriter(iid.getIndexPath(),false);
107111 for(IndexUpdateRecord rec : records){
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/interoperability/RMIMessengerClient.java
@@ -11,6 +11,7 @@
1212 import java.util.HashMap;
1313 import java.util.HashSet;
1414 import java.util.Hashtable;
 15+import java.util.Map;
1516 import java.util.Set;
1617
1718 import org.apache.log4j.Logger;
@@ -395,4 +396,15 @@
396397 }
397398 return false;
398399 }
 400+
 401+ public void addLocalizationCustomMapping(String host, Map<Integer,String> namespaceIndexToName, String dbname) throws RemoteException {
 402+ RMIMessenger r;
 403+ try {
 404+ r = messengerFromCache(host);
 405+ r.addLocalizationCustomMapping(namespaceIndexToName, dbname);
 406+ } catch(NotBoundException e){
 407+ e.printStackTrace();
 408+ log.error("Messenger not bound: "+e.getMessage());
 409+ }
 410+ }
399411 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/interoperability/RMIMessenger.java
@@ -6,6 +6,7 @@
77 import java.util.ArrayList;
88 import java.util.HashMap;
99 import java.util.HashSet;
 10+import java.util.Map;
1011
1112 import org.apache.lucene.analysis.Token;
1213 import org.apache.lucene.index.Term;
@@ -245,4 +246,13 @@
246247 * @return
247248 */
248249 public boolean snapshotFinished(boolean optimize, String pattern, boolean forPrecursor) throws RemoteException;
 250+
 251+ /**
 252+ * Call on indexer to remotely add custom localized names for OAI header site info
 253+ *
 254+ * @param namespaceIndexToName
 255+ * @param dbname
 256+ * @throws RemoteException
 257+ */
 258+ public void addLocalizationCustomMapping(Map<Integer,String> namespaceIndexToName, String dbname) throws RemoteException;
249259 }
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/interoperability/RMIMessengerImpl.java
@@ -6,6 +6,8 @@
77 import java.util.Arrays;
88 import java.util.HashMap;
99 import java.util.HashSet;
 10+import java.util.Map;
 11+import java.util.Map.Entry;
1012
1113 import org.apache.log4j.Logger;
1214 import org.apache.lucene.analysis.Token;
@@ -37,6 +39,7 @@
3840 import org.wikimedia.lsearch.spell.SuggestQuery;
3941 import org.wikimedia.lsearch.spell.SuggestResult;
4042 import org.wikimedia.lsearch.spell.SuggestSimilar;
 43+import org.wikimedia.lsearch.util.Localization;
4144
4245 /** Local implementation for {@link RMIMessenger} */
4346 public class RMIMessengerImpl implements RMIMessenger {
@@ -232,6 +235,13 @@
233236 return indexThread.snapshotFinished(optimize,pattern,forPrecursor);
234237 }
235238
 239+ public void addLocalizationCustomMapping(Map<Integer, String> namespaceIndexToName, String dbname) throws RemoteException {
 240+ for(Entry<Integer,String> e : namespaceIndexToName.entrySet()){
 241+ Localization.addCustomMapping(e.getValue(),e.getKey(),dbname);
 242+ }
 243+
 244+ }
 245+
236246 protected RMIMessengerImpl(){
237247 networkStatus = null;
238248 indexRegistry = null;
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/prefix/PrefixIndexBuilder.java
@@ -315,6 +315,7 @@
316316 Transaction trans = new Transaction(pre, IndexId.Transaction.INDEX);
317317 trans.begin();
318318 try{
 319+ try{
319320 IndexReader reader = IndexReader.open(pre.getIndexPath());
320321 // batch delete
321322 for(IndexUpdateRecord rec : records){
@@ -325,6 +326,9 @@
326327 }
327328 }
328329 reader.close();
 330+ } catch(Exception e){
 331+ log.warn("Error while opening prefix precursor "+pre+" : "+e.getMessage());
 332+ }
329333 // batch add
330334 writer = WikiIndexModifier.openForWrite(pre.getIndexPath(),false,new PrefixAnalyzer());
331335 initWriter(writer);
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/Transaction.java
@@ -147,12 +147,14 @@
148148 * Commit changes to index.
149149 */
150150 public void commit(){
 151+ boolean wasInTransaction = inTransaction;
151152 try{
152153 cleanup();
153154 inTransaction = false;
154155 log.info("Successfully commited changes on "+iid);
155156 } finally{
156 - lock.unlock();
 157+ if(wasInTransaction)
 158+ lock.unlock();
157159 }
158160 }
159161
@@ -160,6 +162,7 @@
161163 * Rollback changes to index. Returns to previous consistent state.
162164 */
163165 public void rollback(){
 166+ boolean wasInTransaction = inTransaction;
164167 try{
165168 if(inTransaction){
166169 recover();
@@ -167,7 +170,8 @@
168171 log.info("Succesfully rollbacked changes on "+iid);
169172 }
170173 } finally{
171 - lock.unlock();
 174+ if(wasInTransaction)
 175+ lock.unlock();
172176 }
173177 }
174178
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/index/WikiIndexModifier.java
@@ -238,13 +238,15 @@
239239 public static IndexWriter openForWrite(String path, boolean rewrite) throws IOException{
240240 return openForWrite(path,rewrite,null);
241241 }
 242+
242243 public static IndexWriter openForWrite(String path, boolean rewrite, Analyzer analyzer) throws IOException{
243244 try {
244245 return new IndexWriter(path,analyzer,rewrite);
245246 } catch (IOException e) {
246247 try {
247248 // unlock, retry
248 - if(!new File(path).exists()){
 249+ File target = new File(path);
 250+ if(!target.exists() || (target.isDirectory() && target.listFiles().length==0)){
249251 // try to make brand new index
250252 makeDBPath(path); // ensure all directories are made
251253 log.info("Making new index at path "+path);
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/IndexUpdatesCollector.java
@@ -3,6 +3,7 @@
44 import java.io.IOException;
55 import java.util.ArrayList;
66 import java.util.Date;
 7+import java.util.HashMap;
78 import java.util.Hashtable;
89 import java.util.Iterator;
910 import java.util.Map.Entry;
@@ -18,6 +19,7 @@
1920 import org.wikimedia.lsearch.config.GlobalConfiguration;
2021 import org.wikimedia.lsearch.config.IndexId;
2122 import org.wikimedia.lsearch.index.IndexUpdateRecord;
 23+import org.wikimedia.lsearch.interoperability.RMIMessengerClient;
2224 import org.wikimedia.lsearch.ranks.LinksBuilder;
2325 import org.wikimedia.lsearch.related.Related;
2426 import org.wikimedia.lsearch.related.RelatedTitle;
@@ -86,12 +88,15 @@
8789
8890 public void writeSiteinfo(Siteinfo info) throws IOException {
8991 this.info = info;
 92+ RMIMessengerClient messenger = new RMIMessengerClient(true);
9093 // write to localization
 94+ HashMap<Integer,String> map = new HashMap<Integer,String>();
9195 Iterator it = info.Namespaces.orderedEntries();
9296 while(it.hasNext()){
9397 Entry<Integer,String> pair = (Entry<Integer,String>)it.next();
94 - Localization.addCustomMapping(pair.getValue(),pair.getKey(),iid.getDBname());
 98+ map.put(pair.getKey(),pair.getValue());
9599 }
 100+ messenger.addLocalizationCustomMapping(iid.getIndexHost(),map,iid.getDBname());
96101 }
97102
98103 public void close() throws IOException {
Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/ranks/Links.java
@@ -2,6 +2,7 @@
33
44 import java.io.ByteArrayInputStream;
55 import java.io.ByteArrayOutputStream;
 6+import java.io.File;
67 import java.io.IOException;
78 import java.io.ObjectInputStream;
89 import java.io.ObjectOutputStream;
@@ -40,6 +41,7 @@
4142 import org.apache.lucene.search.PhraseQuery;
4243 import org.apache.lucene.search.TermQuery;
4344 import org.apache.lucene.store.Directory;
 45+import org.apache.lucene.store.FSDirectory;
4446 import org.apache.lucene.store.RAMDirectory;
4547 import org.wikimedia.lsearch.analyzers.FilterFactory;
4648 import org.wikimedia.lsearch.analyzers.PrefixAnalyzer;
@@ -224,7 +226,7 @@
225227 if(writer == null){
226228 if(directory == null)
227229 throw new RuntimeException("Opened for read, but trying to write");
228 - writer = new IndexWriter(directory,new SimpleAnalyzer(),false);
 230+ writer = WikiIndexModifier.openForWrite(((FSDirectory)directory).getFile().getAbsolutePath(),false,new SimpleAnalyzer());
229231 initWriter(writer);
230232 reader = null;
231233 searcher = null;
@@ -247,24 +249,29 @@
248250 Transaction trans = new Transaction(iid, IndexId.Transaction.INDEX);
249251 trans.begin();
250252 try{
251 - ensureRead();
252 - // batch delete
253 - for(IndexUpdateRecord rec : records){
254 - if(rec.doDelete()){
255 - Article a = rec.getArticle();
256 - if(a.getTitle()==null || a.getTitle().equals("")){
257 - // try to fetch ns:title so we can have nicer debug info
258 - String key = getKeyFromPageId(rec.getIndexKey());
259 - if(key != null)
260 - a.setNsTitleKey(key);
 253+ try{
 254+ ensureRead();
 255+ // batch delete
 256+ for(IndexUpdateRecord rec : records){
 257+ if(rec.doDelete()){
 258+ Article a = rec.getArticle();
 259+ if(a.getTitle()==null || a.getTitle().equals("")){
 260+ // try to fetch ns:title so we can have nicer debug info
 261+ String key = getKeyFromPageId(rec.getIndexKey());
 262+ if(key != null)
 263+ a.setNsTitleKey(key);
 264+ }
 265+ log.debug(iid+": Deleting "+a);
 266+ reader.deleteDocuments(new Term("article_pageid",rec.getIndexKey()));
261267 }
262 - log.debug(iid+": Deleting "+a);
263 - reader.deleteDocuments(new Term("article_pageid",rec.getIndexKey()));
264268 }
 269+ flush();
 270+ } catch(Exception e){
 271+ // report but continue
 272+ log.warn("Error opening links index "+ iid +": "+e.getMessage());
265273 }
266 - flush();
267274 // batch add
268 - writer = new IndexWriter(iid.getIndexPath(),new SimpleAnalyzer(),false);
 275+ writer = WikiIndexModifier.openForWrite(iid.getIndexPath(),false,new SimpleAnalyzer());
269276 initWriter(writer);
270277 for(IndexUpdateRecord rec : records){
271278 if(rec.doAdd()){