Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/PropertyImporter.java |
— | — | @@ -148,14 +148,10 @@ |
149 | 149 | @Override |
150 | 150 | public void finish() throws PersistenceException { |
151 | 151 | ConceptBasedStoreBuilder store = buildConcepts ? this.store : this.propertyStore; |
152 | | - boolean resolveIdsFirst = buildConcepts ? true : false; |
153 | 152 | |
154 | | - if (beginTask("PropertyImporter.finish", "finishImport")) { |
155 | | - store.preparePostProcessing(); |
156 | | - endTask("PropertyImporter.finish", "finishImport"); |
157 | | - } |
| 153 | + store.prepareMassProcessing(); //NOTE: always make sure the DB is ready for mass processing |
158 | 154 | |
159 | | - if (resolveIdsFirst && beginTask("PropertyImporter.finish", "finishIdReferences#1")) { |
| 155 | + if (beginTask("PropertyImporter.finish", "finishIdReferences#1")) { |
160 | 156 | store.finishIdReferences(); |
161 | 157 | endTask("PropertyImporter.finish", "finishIdReferences#1"); |
162 | 158 | } |
— | — | @@ -165,7 +161,7 @@ |
166 | 162 | endTask("PropertyImporter.finish", "finishAliases"); |
167 | 163 | } |
168 | 164 | |
169 | | - if (!resolveIdsFirst && beginTask("PropertyImporter.finish", "finishIdReferences#2")) { |
| 165 | + if (beginTask("PropertyImporter.finish", "finishIdReferences#2")) { //NOTE: resolve IDs again, some may only be known know, after resolving aliases. |
170 | 166 | store.finishIdReferences(); |
171 | 167 | endTask("PropertyImporter.finish", "finishIdReferences#2"); |
172 | 168 | } |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ConceptImporter.java |
— | — | @@ -47,10 +47,7 @@ |
48 | 48 | |
49 | 49 | @Override |
50 | 50 | public void finish() throws PersistenceException { |
51 | | - if (beginTask("ConceptImporter.finish", "finishImport")) { |
52 | | - store.preparePostProcessing(); |
53 | | - endTask("ConceptImporter.finish", "finishImport"); |
54 | | - } |
| 51 | + store.prepareMassProcessing(); //NOTE: always make sure the DB is ready for mass processing |
55 | 52 | |
56 | 53 | if (beginTask("ConceptImporter.finish", "finishBadLinks")) { |
57 | 54 | store.finishBadLinks(); |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java |
— | — | @@ -867,11 +867,6 @@ |
868 | 868 | return dataset; |
869 | 869 | } |
870 | 870 | |
871 | | - public void preparePostProcessing() throws PersistenceException { |
872 | | - log("* preparePostProcessing *"); |
873 | | - } |
874 | | - |
875 | | - |
876 | 871 | public void prepareMassInsert() throws PersistenceException { |
877 | 872 | log("* prepareMassInsert *"); |
878 | 873 | } |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordStoreBuilder.java |
— | — | @@ -517,7 +517,7 @@ |
518 | 518 | } |
519 | 519 | } |
520 | 520 | |
521 | | - protected int resolveRedirects(RelationTable aliasTable, DatabaseTable table, String relNameField, String relIdField, AliasScope scope, int chunkFactor, String forceRIndex, String forceEIndex) throws PersistenceException { |
| 521 | + protected int resolveRedirects(RelationTable aliasTable, DatabaseTable table, String relNameField, String relIdField, AliasScope scope, int chunkFactor, String where, String forceRIndex, String forceEIndex) throws PersistenceException { |
522 | 522 | if (relIdField==null && relNameField==null) throw new IllegalArgumentException("relNameFields and relIdField can't both be null"); |
523 | 523 | |
524 | 524 | if (forceRIndex==null) { |
— | — | @@ -558,8 +558,13 @@ |
559 | 559 | if (nmField!=null && idField!=null) sql += ", "; |
560 | 560 | if (idField!=null) sql += " R."+relIdField+" = E.target"; |
561 | 561 | |
562 | | - String where = scope == null ? null : " scope = "+scope.ordinal(); |
| 562 | + String w = scope == null ? null : " scope = "+scope.ordinal(); |
563 | 563 | |
| 564 | + if (w!=null) { |
| 565 | + if (where == null) where = w; |
| 566 | + else where = "("+where+") AND ("+w+")"; |
| 567 | + } |
| 568 | + |
564 | 569 | return executeChunkedUpdate("resolveRedirects", table.getName()+"."+relNameField+"+"+relIdField, sql, where, aliasTable, "source", chunkFactor); |
565 | 570 | } |
566 | 571 | |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseIncrementalStoreBuilder.java |
— | — | @@ -55,16 +55,21 @@ |
56 | 56 | Runtime.getRuntime().gc(); //run garbage collection |
57 | 57 | } |
58 | 58 | |
59 | | - public void preparePostProcessing() throws PersistenceException { |
| 59 | + public void prepareMassInsert() throws PersistenceException { |
60 | 60 | try { |
61 | | - flush(); |
62 | | - if (beginTask("DatabaseLocalConceptStore.preparePostProcessing", "enableKeys")) { |
| 61 | + database.disableKeys(); |
| 62 | + } catch (SQLException e) { |
| 63 | + throw new PersistenceException(e); |
| 64 | + } |
| 65 | + } |
| 66 | + |
| 67 | + public void prepareMassProcessing() throws PersistenceException { |
| 68 | + try { |
| 69 | + flush(); |
63 | 70 | database.enableKeys(); |
64 | | - endTask("DatabaseLocalConceptStore.preparePostProcessing", "enableKeys"); |
65 | | - } |
66 | 71 | } catch (SQLException e) { |
67 | 72 | throw new PersistenceException(e); |
68 | | - } |
| 73 | + } |
69 | 74 | } |
70 | 75 | |
71 | 76 | } |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java |
— | — | @@ -632,6 +632,7 @@ |
633 | 633 | } |
634 | 634 | |
635 | 635 | public void prepareMassProcessing() throws PersistenceException { |
| 636 | + this.flush(); |
636 | 637 | this.enableKeys(); |
637 | 638 | |
638 | 639 | if (propertyStore!=null) { |
— | — | @@ -674,21 +675,6 @@ |
675 | 676 | } |
676 | 677 | } |
677 | 678 | |
678 | | - public void preparePostProcessing() throws PersistenceException { |
679 | | - flush(); |
680 | | - enableKeys(); |
681 | | - |
682 | | - if (propertyStore!=null && beginTask("preparePostProcessing", "propertyStore.preparePostProcessing")) { |
683 | | - propertyStore.preparePostProcessing(); |
684 | | - endTask("preparePostProcessing", "propertyStore.preparePostProcessing"); |
685 | | - } |
686 | | - |
687 | | - if (textStore!=null && beginTask("preparePostProcessing", "textStore.preparePostProcessing")) { |
688 | | - textStore.preparePostProcessing(); |
689 | | - endTask("preparePostProcessing", "textStore.preparePostProcessing"); |
690 | | - } |
691 | | - } |
692 | | - |
693 | 679 | public void finishSections() throws PersistenceException { |
694 | 680 | if (beginTask("finishSections", "buildSectionConcepts")) { |
695 | 681 | int n = buildSectionConcepts(); |
— | — | @@ -841,7 +827,7 @@ |
842 | 828 | //NOTE: need to resolve category-aliases here, so no concepts are generated for aliased categories! |
843 | 829 | //NOTE: bad category redirs have been droped in finishBadLinks |
844 | 830 | if (beginTask("finishMissingConcpets", "resolveRedirects:broader")) { |
845 | | - int n = resolveRedirects(aliasTable, broaderTable, "broad_name", idManager==null ? null : "broad", AliasScope.CATEGORY, 1, idManager==null ? "broad_name" : "broad_narrow", null); |
| 831 | + int n = resolveRedirects(aliasTable, broaderTable, "broad_name", idManager==null ? null : "broad", AliasScope.CATEGORY, 1, null, idManager==null ? "broad_name" : "broad_narrow", null); |
846 | 832 | endTask("finishMissingConcpets", "resolveRedirects:broader", n+" entries"); |
847 | 833 | } |
848 | 834 | |
— | — | @@ -939,24 +925,24 @@ |
940 | 926 | if (beginTask("finishAliases", "resolveRedirects:link")) { |
941 | 927 | //XXX: SLOW! |
942 | 928 | //TODO: smaller chunks? chunk on target table, not alias table? force index? |
943 | | - int n = resolveRedirects(aliasTable, linkTable, "target_name", "target", AliasScope.REDIRECT, 8, "target_anchor", null); |
| 929 | + int n = resolveRedirects(aliasTable, linkTable, "target_name", "target", AliasScope.REDIRECT, 8, null, "target_anchor", null); |
944 | 930 | endTask("finishAliases", "resolveRedirects:link", n+" entries"); |
945 | 931 | } |
946 | 932 | |
947 | 933 | //NOTE: broader.broad_name already done in finishMissingConcepts for AliasScope.BROADER |
948 | 934 | |
949 | 935 | if (beginTask("finishAliases", "resolveRedirects:about")) { |
950 | | - int n = resolveRedirects(aliasTable, aboutTable, "concept_name", "concept", null, 1, null, null); |
| 936 | + int n = resolveRedirects(aliasTable, aboutTable, "concept_name", "concept", null, 1, null, null, null); |
951 | 937 | endTask("finishAliases", "resolveRedirects:about", n+" entries"); |
952 | 938 | } |
953 | 939 | |
954 | 940 | if (beginTask("finishAliases", "resolveRedirects:narrow")) { |
955 | | - int n = resolveRedirects(aliasTable, broaderTable, "narrow_name", "narrow", null, 1, null, null); |
| 941 | + int n = resolveRedirects(aliasTable, broaderTable, "narrow_name", "narrow", null, 1, null, null, null); |
956 | 942 | endTask("finishAliases", "resolveRedirects:narrow", n+" entries"); |
957 | 943 | } |
958 | 944 | |
959 | 945 | if (beginTask("finishAliases", "resolveRedirects:broad")) { |
960 | | - int n = resolveRedirects(aliasTable, broaderTable, "broad_name", "broad", null, 1, null, null); |
| 946 | + int n = resolveRedirects(aliasTable, broaderTable, "broad_name", "broad", null, 1, null, null, null); |
961 | 947 | endTask("finishAliases", "resolveRedirects:broad", n+" entries"); |
962 | 948 | } |
963 | 949 | |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/IncrementalStoreBuilder.java |
— | — | @@ -4,8 +4,6 @@ |
5 | 5 | |
6 | 6 | public interface IncrementalStoreBuilder extends WikiWordStoreBuilder { |
7 | 7 | |
8 | | - public void preparePostProcessing() throws PersistenceException; |
9 | | - |
10 | 8 | public void deleteDataAfter(int delAfter, boolean inclusive) throws PersistenceException; |
11 | 9 | |
12 | 10 | public void prepareMassProcessing() throws PersistenceException; |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabasePropertyStoreBuilder.java |
— | — | @@ -110,35 +110,66 @@ |
111 | 111 | return (Corpus)database.getDataset(); |
112 | 112 | } |
113 | 113 | |
114 | | - public void finishAliases() throws PersistenceException { |
115 | | - if (beginTask("DatabasePropertyStoreBuilder.finishAliases", "resolveRedirects:property")) { |
116 | | - RelationTable aliasTable = (RelationTable)conceptStoreSchema.getTable("alias"); |
117 | | - int n = resolveRedirects(aliasTable, propertyTable, "concept_name", idManager!=null ? "concept" : null, AliasScope.REDIRECT, 3, null, null); |
118 | | - endTask("DatabasePropertyStoreBuilder.finishAliases", "resolveRedirects:property", n+" entries"); |
| 114 | + protected boolean hasUnresolvedConceptReferences() throws PersistenceException{ |
| 115 | + try { |
| 116 | + Number c = (Number)database.executeSingleValueQuery("DatabasePropertyStoreBuilder.finishAliases#hasNull?", "select exists(select * from wmde10apr_en_property where concept is null)"); |
| 117 | + return c.intValue() > 0; |
| 118 | + } catch (SQLException e) { |
| 119 | + throw new PersistenceException(e); |
119 | 120 | } |
120 | 121 | } |
121 | | - |
122 | | - public void finishIdReferences() throws PersistenceException { |
123 | | - if (idManager==null && beginTask("DatabasePropertyStoreBuilder.finishIdReferences", "buildIdLinks:property")) { |
124 | | - int n = buildIdLinks(propertyTable, "concept_name", "concept", 1); |
125 | | - endTask("DatabasePropertyStoreBuilder.finishIdReferences", "buildIdLinks:property", n+" references"); |
126 | | - } |
127 | | - } |
128 | 122 | |
129 | | - public void prepareMassInsert() throws PersistenceException { |
| 123 | + protected boolean hasResolvedConceptReferences() throws PersistenceException { |
130 | 124 | try { |
131 | | - database.disableKeys(); |
| 125 | + Number c = (Number)database.executeSingleValueQuery("DatabasePropertyStoreBuilder.finishAliases#hasNull?", "select exists(select * from wmde10apr_en_property where concept is not null)"); |
| 126 | + return c.intValue() > 0; |
132 | 127 | } catch (SQLException e) { |
133 | 128 | throw new PersistenceException(e); |
134 | 129 | } |
135 | 130 | } |
136 | 131 | |
| 132 | + public void finishAliases() throws PersistenceException { |
| 133 | + RelationTable aliasTable = (RelationTable)conceptStoreSchema.getTable("alias"); |
| 134 | + |
| 135 | + if (beginTask("DatabasePropertyStoreBuilder.finishAliases", "resolveRedirects:property#id")) { |
| 136 | + int n = 0; |
| 137 | + if (hasResolvedConceptReferences()) { |
| 138 | + n = resolveRedirects(aliasTable, propertyTable, "concept_name", "concept", AliasScope.REDIRECT, 3, null, null, null); |
| 139 | + } |
| 140 | + |
| 141 | + endTask("DatabasePropertyStoreBuilder.finishAliases", "resolveRedirects:property#id", n+" entries"); |
| 142 | + } |
| 143 | + |
| 144 | + if (beginTask("DatabasePropertyStoreBuilder.finishAliases", "resolveRedirects:property#name")) { |
| 145 | + int n = 0; |
| 146 | + if (hasUnresolvedConceptReferences()) { |
| 147 | + n = resolveRedirects(aliasTable, propertyTable, "concept_name", null, AliasScope.REDIRECT, 3, propertyTable.getSQLName()+".concept is null", null, null); |
| 148 | + } |
| 149 | + |
| 150 | + endTask("DatabasePropertyStoreBuilder.finishAliases", "resolveRedirects:property#name", n+" entries"); |
| 151 | + } |
| 152 | + } |
| 153 | + |
| 154 | + public void finishIdReferences() throws PersistenceException { |
| 155 | + if (beginTask("DatabasePropertyStoreBuilder.finishIdReferences", "buildIdLinks:property")) { |
| 156 | + int n = 0; |
| 157 | + if (hasUnresolvedConceptReferences()) { |
| 158 | + n = buildIdLinks(propertyTable, "concept_name", "concept", 1); |
| 159 | + } |
| 160 | + |
| 161 | + endTask("DatabasePropertyStoreBuilder.finishIdReferences", "buildIdLinks:property", n+" references"); |
| 162 | + } |
| 163 | + } |
| 164 | + |
137 | 165 | public void prepareMassProcessing() throws PersistenceException { |
138 | 166 | try { |
139 | | - database.enableKeys(); |
| 167 | + this.conceptStoreSchema.enableKeys(); |
140 | 168 | } catch (SQLException e) { |
141 | 169 | throw new PersistenceException(e); |
142 | 170 | } |
| 171 | + |
| 172 | + super.prepareMassProcessing(); |
143 | 173 | } |
144 | 174 | |
| 175 | + |
145 | 176 | } |
\ No newline at end of file |