Index: branches/lucene-search-2.1/src/org/wikimedia/lsearch/oai/OAIHarvester.java |
— | — | @@ -28,6 +28,8 @@ |
29 | 29 | protected IndexId iid; |
30 | 30 | protected String resumptionToken, responseDate; |
31 | 31 | protected String host; |
| 32 | + /** number of retries before giving up, useful when there are broken servers in the cluster */ |
| 33 | + protected int retries = 5; |
32 | 34 | |
33 | 35 | public OAIHarvester(IndexId iid, String url, Authenticator auth) throws MalformedURLException{ |
34 | 36 | this.urlbase = url; |
— | — | @@ -59,13 +61,24 @@ |
60 | 62 | |
61 | 63 | protected void read(URL url) throws IOException { |
62 | 64 | log.info("Reading records from "+url); |
63 | | - collector = new IndexUpdatesCollector(iid); |
64 | | - InputStream in = new BufferedInputStream(url.openStream()); |
65 | | - parser = new OAIParser(in,collector); |
66 | | - parser.parse(); |
67 | | - resumptionToken = parser.getResumptionToken(); |
68 | | - responseDate = parser.getResponseDate(); |
69 | | - in.close(); |
| 65 | + // try reading from url a number of times before giving up |
| 66 | + for(int tryNum = 1; tryNum <= this.retries; tryNum++){ |
| 67 | + try{ |
| 68 | + collector = new IndexUpdatesCollector(iid); |
| 69 | + InputStream in = new BufferedInputStream(url.openStream()); |
| 70 | + parser = new OAIParser(in,collector); |
| 71 | + parser.parse(); |
| 72 | + resumptionToken = parser.getResumptionToken(); |
| 73 | + responseDate = parser.getResponseDate(); |
| 74 | + in.close(); |
| 75 | + break; |
| 76 | + } catch(IOException e){ |
| 77 | + if(tryNum == this.retries) |
| 78 | + throw e; |
| 79 | + else |
| 80 | + log.warn("Error reading from url (will retry): "+url); |
| 81 | + } |
| 82 | + } |
70 | 83 | } |
71 | 84 | |
72 | 85 | /** Invoke ListRecords using the last resumption token, get atLeast num of records */ |
Index: branches/lucene-search-2.1/build |
— | — | @@ -9,10 +9,15 @@ |
10 | 10 | [ -e $dumps ] || mkdir $dumps |
11 | 11 | dumpfile="$dumps/dump-$dbname.xml" |
12 | 12 | timestamp=`date -u +%Y-%m-%d` |
13 | | - slave=`php $mediawiki/maintenance/getSlaveServer.php $dbname` |
| 13 | + slave=`php $mediawiki/maintenance/getSlaveServer.php \ |
| 14 | + $dbname \ |
| 15 | + --conf $mediawiki/LocalSettings.php \ |
| 16 | + --aconf $mediawiki/AdminSettings.php` |
14 | 17 | echo "Dumping $dbname..." |
15 | 18 | cd $mediawiki && php maintenance/dumpBackup.php \ |
16 | 19 | $dbname \ |
| 20 | + --conf $mediawiki/LocalSettings.php \ |
| 21 | + --aconf $mediawiki/AdminSettings.php \ |
17 | 22 | --current \ |
18 | 23 | --server=$slave > $dumpfile |
19 | 24 | [ -e $indexes/status ] || mkdir -p $indexes/status |