Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseWikiWordStore.java |
— | — | @@ -273,7 +273,6 @@ |
274 | 274 | } |
275 | 275 | } |
276 | 276 | |
277 | | - @Deprecated |
278 | 277 | protected void log(String msg) { |
279 | 278 | database.info(msg); |
280 | 279 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/GlobalConceptStoreSchema.java |
— | — | @@ -206,7 +206,7 @@ |
207 | 207 | String[] ll = listPrefixes("resource"); |
208 | 208 | if (ll.length>32) throw new IllegalArgumentException("only up to 32 languages are supported! found "+ll.length+" prefixes: "+Arrays.toString(ll)); |
209 | 209 | |
210 | | - Arrays.sort(ll); |
| 210 | + Arrays.sort(ll); //FIXME: sort by size! |
211 | 211 | Corpus[] cc = new Corpus[ll.length]; |
212 | 212 | |
213 | 213 | int i = 0; |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseGlobalConceptStoreBuilder.java |
— | — | @@ -16,6 +16,7 @@ |
17 | 17 | import javax.sql.DataSource; |
18 | 18 | |
19 | 19 | import de.brightbyte.application.Agenda; |
| 20 | +import de.brightbyte.application.Agenda.Record; |
20 | 21 | import de.brightbyte.data.ChunkyBitSet; |
21 | 22 | import de.brightbyte.db.DatabaseAccess; |
22 | 23 | import de.brightbyte.db.DatabaseField; |
— | — | @@ -159,6 +160,20 @@ |
160 | 161 | return ((id / idOffsetGranularity) +1) * idOffsetGranularity; |
161 | 162 | } |
162 | 163 | |
| 164 | + protected int deletePendingConceptsAfter(int id) throws PersistenceException { |
| 165 | + try { |
| 166 | + String sql = "DELETE FROM "+conceptTable.getSQLName()+" WHERE id > "+id; |
| 167 | + Integer n = (Integer)database.executeUpdate("deleteConceptsAfter", sql); |
| 168 | + |
| 169 | + sql = "DELETE FROM "+mergeTable.getSQLName()+" WHERE new > "+id; |
| 170 | + database.executeUpdate("deleteConceptsAfter", sql); |
| 171 | + |
| 172 | + return n; |
| 173 | + } catch (SQLException e) { |
| 174 | + throw new PersistenceException(e); |
| 175 | + } |
| 176 | + } |
| 177 | + |
163 | 178 | public int getMaxConceptId() throws PersistenceException { |
164 | 179 | try { |
165 | 180 | flush(); |
— | — | @@ -401,7 +416,7 @@ |
402 | 417 | " SELECT LL.concept, " + |
403 | 418 | " R.global_concept, " + |
404 | 419 | " 1 " + |
405 | | - " FROM "+langlinkTable.getSQLName()+" as LL " + |
| 420 | + " FROM "+langlinkTable.getSQLName()+" as LL force index(concept_language_target) " + |
406 | 421 | " JOIN "+originTable.getSQLName()+" as R force index(lang_name) " + |
407 | 422 | " ON R.lang = LL.language AND R.local_concept_name = LL.target"; |
408 | 423 | |
— | — | @@ -464,11 +479,39 @@ |
465 | 480 | return name; |
466 | 481 | } |
467 | 482 | |
| 483 | + public void cleanupDirtyStep(Record rec) throws PersistenceException { |
| 484 | + if (rec.parameters.get("lastConceptId_")!=null) { |
| 485 | + int lastId = (Integer)rec.parameters.get("lastConceptId_"); |
| 486 | + log(context+"::"+name+"#FindMergeCandidatesQuery continues in dirty step, deleting pending concepts from id > "+lastId); |
| 487 | + deletePendingConceptsAfter(lastId); |
| 488 | + conceptId = lastId +1; |
| 489 | + } else { |
| 490 | + int lastId = getMaxConceptId(); |
| 491 | + warning(0, context+"::"+name+"#FindMergeCandidatesQuery continues in dirty step, no lastConceptId_", "getMaxConceptId() = "+lastId, null); |
| 492 | + conceptId = lastId +1; |
| 493 | + } |
| 494 | + |
| 495 | + String sql = "SELECT old, new FROM " + mergeTable.getSQLName(); |
| 496 | + ResultSet res = DatabaseGlobalConceptStoreBuilder.this.executeQuery(context+"::"+name+"#cleanupDirtyStep.stop", sql); |
| 497 | + |
| 498 | + try { |
| 499 | + while (res.next()) { //TODO: progress? safepoint? |
| 500 | + int leftId = res.getInt("old"); |
| 501 | + int rightId = res.getInt("new"); |
| 502 | + stop.set(leftId, true); |
| 503 | + stop.set(rightId, true); |
| 504 | + } |
| 505 | + res.close(); |
| 506 | + } catch (SQLException e) { |
| 507 | + throw new PersistenceException(e); |
| 508 | + } |
| 509 | + } |
| 510 | + |
| 511 | + public String getQueryState() { |
| 512 | + return "lastConceptId_=I"+conceptId; |
| 513 | + } |
| 514 | + |
468 | 515 | public int executeUpdate(int chunk, long first, long end) throws PersistenceException { |
469 | | - //FIXME: for a clean continuation, we weould need to delete merged concepts |
470 | | - // created during the last inclomplete chunk, somilar to the safepoint recovery |
471 | | - // in the import loop! |
472 | | - |
473 | 516 | String sql = "SELECT " + |
474 | 517 | " L.id as concept1_id, " + |
475 | 518 | " L.name as concept1_name, " + |
— | — | @@ -482,10 +525,10 @@ |
483 | 526 | " R.language_bits as concept2_language_bits, " + |
484 | 527 | " R.language_count as concept2_language_count, " + |
485 | 528 | " R.random as concept2_random " + |
486 | | - " FROM "+relationTable.getSQLName()+" as J1 "+ |
487 | | - " JOIN "+relationTable.getSQLName()+" as J2 ON J2.concept1 = J1.concept2 "+ |
488 | | - " JOIN "+conceptTable.getSQLName()+" as L ON L.id = J1.concept1 " + |
489 | | - " JOIN "+conceptTable.getSQLName()+" as R ON R.id = J2.concept1 " + |
| 529 | + " FROM "+relationTable.getSQLName()+" as J1 force index(PRIMARY) "+ |
| 530 | + " JOIN "+relationTable.getSQLName()+" as J2 force index(PRIMARY) ON J2.concept1 = J1.concept2 "+ |
| 531 | + " JOIN "+conceptTable.getSQLName()+" as L force index(PRIMARY) ON L.id = J1.concept1 " + |
| 532 | + " JOIN "+conceptTable.getSQLName()+" as R force index(PRIMARY) ON R.id = J2.concept1 " + |
490 | 533 | " WHERE (L.language_bits & R.language_bits) = 0 " + |
491 | 534 | " AND (J1.concept1 >= "+first+" AND J1.concept1 < "+end+")" + |
492 | 535 | " AND (J1.langref >= 1 AND J2.langref >= 1) " + |
— | — | @@ -634,6 +677,8 @@ |
635 | 678 | Agenda.Record rec = getAgenda().getCurrentRecord(); |
636 | 679 | ofs = (Integer)rec.parameters.get("offset_"); |
637 | 680 | |
| 681 | + //NOTE: don't call deletePendingConceptsAfter(ofs) here, do cleanup per chunk! |
| 682 | + |
638 | 683 | int c = findMergeCandidates(ofs); |
639 | 684 | flush(); |
640 | 685 | |