r50230 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r50229‎ | r50230 | r50231 >
Date:18:48, 5 May 2009
Author:daniel
Status:deferred
Tags:
Comment:
Made PropertyExtraction independant of concept creation
Modified paths:
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ConceptImporter.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ImportApp.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/PropertyImporter.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/ConceptBasedStoreBuilder.java (added) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabasePropertyStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseTextStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/IncrementalStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/LocalConceptStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/PropertyStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/TextStoreBuilder.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/PropertyImporter.java
@@ -12,10 +12,13 @@
1313 import de.brightbyte.wikiword.TweakSet;
1414 import de.brightbyte.wikiword.analyzer.WikiTextAnalyzer;
1515 import de.brightbyte.wikiword.analyzer.WikiPage;
 16+import de.brightbyte.wikiword.store.builder.ConceptBasedStoreBuilder;
1617 import de.brightbyte.wikiword.store.builder.LocalConceptStoreBuilder;
1718
1819 public class PropertyImporter extends ConceptImporter {
1920
 21+ boolean buildConcepts = true;
 22+
2023 public PropertyImporter(WikiTextAnalyzer analyzer, LocalConceptStoreBuilder store, TweakSet tweaks) throws PersistenceException {
2124 super(analyzer, store, tweaks);
2225 }
@@ -44,24 +47,37 @@
4548 String name = analyzerPage.getConceptName();
4649 String rcName = analyzerPage.getResourceName();
4750
48 - int rcId = storeResource(rcName, analyzerPage.getResourceType(), timestamp);
 51+ int rcId = 0;
 52+ int cid = 0;
4953
50 - ConceptType ctype = analyzerPage.getConceptType();
51 - int cid = storeConcept(rcId, name, ctype);
 54+ ResourceType rcType = analyzerPage.getResourceType();
5255
53 - //storeProperty(rcId, cid, name, "__TYPE__", analyzerPage.getConceptType().getName()); //FIXME: remove me!
 56+ if (buildConcepts) {
 57+ rcId = storeResource(rcName, rcType, timestamp);
 58+
 59+ if (rcType == ResourceType.REDIRECT) {
 60+ storeAlias(analyzerPage, rcId);
 61+ }
 62+
 63+ ConceptType ctype = analyzerPage.getConceptType();
 64+ cid = storeConcept(rcId, name, ctype);
 65+ }
5466
55 - MultiMap<String, CharSequence, Set<CharSequence>> properties = analyzerPage.getProperties();
56 - for (Map.Entry<String, Set<CharSequence>> e: properties.entrySet()) {
57 - String property = e.getKey();
 67+ if (rcType == ResourceType.ARTICLE || rcType == ResourceType.SUPPLEMENT) {
 68+ MultiMap<String, CharSequence, Set<CharSequence>> properties = analyzerPage.getProperties();
 69+ for (Map.Entry<String, Set<CharSequence>> e: properties.entrySet()) {
 70+ String property = e.getKey();
 71+
 72+ for (CharSequence v: e.getValue()) {
 73+ storeProperty(rcId, cid, name, property, v.toString());
 74+ }
 75+ }
5876
59 - for (CharSequence v: e.getValue()) {
60 - storeProperty(rcId, cid, name, property, v.toString());
 77+ if (buildConcepts) {
 78+ storeSupplements(rcId, cid, analyzerPage);
6179 }
6280 }
6381
64 - storeSupplements(rcId, cid, analyzerPage);
65 -
6682 return cid;
6783 }
6884
@@ -71,12 +87,18 @@
7288
7389 if (t!=ResourceType.ARTICLE
7490 && t!=ResourceType.CATEGORY
75 - && t!=ResourceType.SUPPLEMENT) return false;
 91+ && t!=ResourceType.SUPPLEMENT) {
 92+ return false;
 93+ }
7694
7795 if (t==ResourceType.SUPPLEMENT) {
7896 return true;
7997 }
8098
 99+ if (t==ResourceType.REDIRECT) {
 100+ return buildConcepts;
 101+ }
 102+
81103 if ( analyzerPage.getProperties().isEmpty()
82104 && analyzerPage.getSupplementedConcept()==null
83105 && analyzerPage.getSupplementLinks().isEmpty() ) {
@@ -90,11 +112,44 @@
91113
92114 public static void declareOptions(Arguments args) {
93115 AbstractImporter.declareOptions(args);
 116+
 117+ args.declare("attach", null, false, Boolean.class, "attach properties to existing thesaurus");
94118 }
95119
96120 @Override
97121 public void configure(Arguments args) throws Exception {
98122 super.configure(args);
 123+
 124+ if (args.isSet("attach")) buildConcepts = false;
99125 }
100126
 127+ protected boolean getPurgeData() {
 128+ return buildConcepts;
 129+ }
 130+
 131+ @Override
 132+ public void finish() throws PersistenceException {
 133+ ConceptBasedStoreBuilder store = buildConcepts ? this.store : this.propertyStore;
 134+ boolean resolveIdsFirst = buildConcepts ? true : false;
 135+
 136+ if (beginTask("PropertyImporter.finish", "finishImport")) {
 137+ store.finalizeImport();
 138+ endTask("PropertyImporter.finish", "finishImport");
 139+ }
 140+
 141+ if (resolveIdsFirst && beginTask("PropertyImporter.finish", "finishIdReferences#1")) {
 142+ store.finishIdReferences();
 143+ endTask("PropertyImporter.finish", "finishIdReferences#1");
 144+ }
 145+
 146+ if (beginTask("PropertyImporter.finish", "finishAliases")) {
 147+ store.finishAliases();
 148+ endTask("PropertyImporter.finish", "finishAliases");
 149+ }
 150+
 151+ if (!resolveIdsFirst && beginTask("PropertyImporter.finish", "finishIdReferences#2")) {
 152+ store.finishIdReferences();
 153+ endTask("PropertyImporter.finish", "finishIdReferences#2");
 154+ }
 155+ }
101156 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ImportApp.java
@@ -273,7 +273,7 @@
274274
275275 if (operation == Operation.FRESH) {
276276 section("-- purge --------------------------------------------------");
277 - initializeStores(true, getDropWarnings()); //FIXME: don't purge warning always... but when?!
 277+ initializeStores(getPurgeData(), getDropWarnings());
278278 }
279279 else {
280280 initializeStores(false, getDropWarnings());
@@ -314,6 +314,10 @@
315315 return false;
316316 }
317317
 318+ protected boolean getPurgeData() {
 319+ return true;
 320+ }
 321+
318322 public int getExitCode() {
319323 return exitCode;
320324 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ConceptImporter.java
@@ -60,16 +60,6 @@
6161 endTask("ConceptImporter.finish", "finishImport");
6262 }
6363
64 - if (storeProperties && beginTask("ConceptImporter.finish", "finishImport")) {
65 - propertyStore.finalizeImport();
66 - endTask("ConceptImporter.finish", "finishImport");
67 - }
68 -
69 - if (storeFlatText && beginTask("ConceptImporter.finish", "finishImport")) {
70 - textStore.finalizeImport();
71 - endTask("ConceptImporter.finish", "finishImport");
72 - }
73 -
7464 if (beginTask("ConceptImporter.finish", "finishBadLinks")) {
7565 store.finishBadLinks();
7666 endTask("ConceptImporter.finish", "finishBadLinks");
@@ -98,11 +88,6 @@
9989 endTask("ConceptImporter.finish", "finishAliases");
10090 }
10191
102 - if (propertyStore!=null && beginTask("ConceptImporter.finish", "propertyStore#finishAliases")) {
103 - propertyStore.finishAliases();
104 - endTask("ConceptImporter.finish", "propertyStore#finishAliases");
105 - }
106 -
10792 //TODO: finish aliases for textStore!
10893
10994 if (beginTask("ConceptImporter.finish", "finishRelations")) {
@@ -243,7 +228,7 @@
244229
245230 @Override
246231 public int importPage(WikiPage analyzerPage, Date timestamp) throws PersistenceException {
247 - ResourceType ptype = analyzerPage.getResourceType();
 232+ ResourceType rcType = analyzerPage.getResourceType();
248233 String name = analyzerPage.getConceptName();
249234 String rcName = analyzerPage.getResourceName();
250235 String text = analyzerPage.getText().toString();
@@ -251,18 +236,18 @@
252237 //String title = analyzerPage.getTitle().toString();
253238
254239 //TODO: check if page is stored. if up to date, skip. if older, update. if missing, create. optionally force update.
255 - int rcId = storeResource(rcName, ptype, timestamp);
 240+ int rcId = storeResource(rcName, rcType, timestamp);
256241
257242 if (storeFlatText) {
258 - textStore.storeRawText(rcId, rcName, ptype, text);
 243+ textStore.storeRawText(rcId, rcName, text);
259244 }
260245
261246 if (storeFlatText) {
262247 CharSequence plain = analyzerPage.getPlainText(false);
263 - textStore.storePlainText(rcId, rcName, ptype, plain.toString());
 248+ textStore.storePlainText(rcId, rcName, plain.toString());
264249 }
265250
266 - if (ptype == ResourceType.CATEGORY) {
 251+ if (rcType == ResourceType.CATEGORY) {
267252 List<WikiTextAnalyzer.WikiLink> links = analyzerPage.getLinks();
268253 linkTracker.step(links.size());
269254
@@ -290,7 +275,7 @@
291276 // need resolve-ids on langling, then!
292277 // beware aliased categories!
293278 }
294 - else if (ptype == ResourceType.ARTICLE || ptype == ResourceType.SUPPLEMENT) {
 279+ else if (rcType == ResourceType.ARTICLE || rcType == ResourceType.SUPPLEMENT) {
295280 conceptTracker.step();
296281
297282 //TODO: handle "other meanings" header (mini-disambig!)
@@ -392,7 +377,7 @@
393378
394379 //FIXME: store supplement links
395380 }
396 - else if (ptype == ResourceType.DISAMBIG) {
 381+ else if (rcType == ResourceType.DISAMBIG) {
397382 //storeConcept(rcId, name, ConceptType.NONE);
398383
399384 Set<CharSequence> terms = analyzerPage.getTitleTerms();
@@ -416,7 +401,7 @@
417402 }
418403 }
419404 }
420 - else if (ptype == ResourceType.LIST) {
 405+ else if (rcType == ResourceType.LIST) {
421406 //storeConcept(rcId, name, ConceptType.NONE);
422407
423408 //FIXME: extract fewer links... use disambig-logic?
@@ -428,37 +413,49 @@
429414 //TODO: extract concept name from "List of..." ?
430415 //FIXME: category-like interpretation!
431416 }
432 - else if (ptype == ResourceType.REDIRECT) {
433 - WikiTextAnalyzer.WikiLink link = analyzerPage.getRedirect();
434 -
435 - if (link==null) {
436 - warn(rcId, "bad redirect (no link)", "Text: "+StringUtils.clipString(text, 256, "..."), null);
437 - }
438 - else if (link.getInterwiki()!=null || link.getNamespace()!=0) {
439 - //redirects to other wikis or into another namespace are handeled as BAD page.
440 - out.info("skipped bad redirect "+rcName+" -> "+link);
441 - }
442 - else if (name.equals(link.getPage().toString())) {
443 - warn(rcId, "bad redirect (self-link)", "page "+name, null);
444 - }
445 - else {
446 - int conceptId = storeConcept(rcId, name, ConceptType.ALIAS);
447 - storePageTerms(rcId, analyzerPage.getTitleTerms(), -1, link.getPage().toString(), ExtractionRule.TERM_FROM_REDIRECT );
448 - storeConceptAlias(rcId, conceptId, name, -1, link.getPage().toString(), AliasScope.REDIRECT); //TODO: confidence?...
449 -
450 - //FIXME: redir to section!
451 - }
 417+ else if (rcType == ResourceType.REDIRECT) {
 418+ storeAlias(analyzerPage, rcId);
452419 }
453 - else if (ptype == ResourceType.BAD) {
 420+ else if (rcType == ResourceType.BAD) {
454421 out.info("skipped BAD page "+rcName);
455422 }
456423 else {
457 - out.warn("skipped page "+rcName+" ["+ptype+"]");
 424+ out.warn("skipped page "+rcName+" ["+rcType+"]");
458425 }
459426
460427 return rcId;
461428 }
462429
 430+ protected int storeAlias(WikiPage analyzerPage, int rcId) throws PersistenceException {
 431+ String name = analyzerPage.getConceptName();
 432+ String rcName = analyzerPage.getResourceName();
 433+ String text = analyzerPage.getText().toString();
 434+
 435+ WikiTextAnalyzer.WikiLink link = analyzerPage.getRedirect();
 436+
 437+ int conceptId = 0;
 438+
 439+ if (link==null) {
 440+ warn(rcId, "bad redirect (no link)", "Text: "+StringUtils.clipString(text, 256, "..."), null);
 441+ }
 442+ else if (link.getInterwiki()!=null || link.getNamespace()!=0) {
 443+ //redirects to other wikis or into another namespace are handeled as BAD page.
 444+ out.info("skipped bad redirect "+rcName+" -> "+link);
 445+ }
 446+ else if (name.equals(link.getPage().toString())) {
 447+ warn(rcId, "bad redirect (self-link)", "page "+name, null);
 448+ }
 449+ else {
 450+ conceptId = storeConcept(rcId, name, ConceptType.ALIAS);
 451+ storePageTerms(rcId, analyzerPage.getTitleTerms(), -1, link.getPage().toString(), ExtractionRule.TERM_FROM_REDIRECT );
 452+ storeConceptAlias(rcId, conceptId, name, -1, link.getPage().toString(), AliasScope.REDIRECT); //TODO: confidence?...
 453+
 454+ //FIXME: redir to section!
 455+ }
 456+
 457+ return conceptId;
 458+ }
 459+
463460 public static void declareOptions(Arguments args) {
464461 AbstractImporter.declareOptions(args);
465462
@@ -551,7 +548,6 @@
552549 for (CharSequence supp: supplementLinks) {
553550 storeConceptAlias(rcId, -1, supp.toString(), cid, name, AliasScope.SUPPLEMENT);
554551 }
555 -
556552 }
557553
558554 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java
@@ -32,14 +32,22 @@
3333
3434 public class DebugTextStoreBuilder implements TextStoreBuilder {
3535
36 - public void storePlainText(int textId, String name, ResourceType ptype, String text) throws PersistenceException {
37 - log("* storePlainText("+textId+", "+name+", "+ptype+", "+ptype+") *");
 36+ public void storePlainText(int rcId, String name, String text) throws PersistenceException {
 37+ log("* storePlainText("+rcId+", "+name+") *");
3838 }
3939
40 - public void storeRawText(int textId, String name, ResourceType ptype, String text) throws PersistenceException {
41 - log("* storeRawText("+textId+", "+name+", "+ptype+", "+ptype+") *");
 40+ public void storeRawText(int rcId, String name, String text) throws PersistenceException {
 41+ log("* storeRawText("+rcId+", "+name+") *");
4242 }
4343
 44+ public void finishAliases() throws PersistenceException {
 45+ log("* finishAliases *");
 46+ }
 47+
 48+ public void finishIdReferences() throws PersistenceException {
 49+ log("* finishIdReferences *");
 50+ }
 51+
4452 public void checkConsistency() throws PersistenceException {
4553 log("* checkConsistency *");
4654 }
@@ -125,11 +133,14 @@
126134 log("* finishAliases *");
127135 }
128136
129 -
130137 public void finalizeImport() throws PersistenceException {
131138 log("* finalizeImport *");
132139 }
133140
 141+ public void finishIdReferences() throws PersistenceException {
 142+ log("* finishIdReferences *");
 143+ }
 144+
134145 public void prepareImport() throws PersistenceException {
135146 log("* prepareImport *");
136147 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/TextStoreBuilder.java
@@ -1,13 +1,15 @@
22 package de.brightbyte.wikiword.store.builder;
33
44 import de.brightbyte.util.PersistenceException;
5 -import de.brightbyte.wikiword.ResourceType;
65
7 -public interface TextStoreBuilder extends WikiWordStoreBuilder {
8 - public abstract void storeRawText(int textId, String name, ResourceType ptype, String text)
 6+public interface TextStoreBuilder extends WikiWordStoreBuilder, IncrementalStoreBuilder {
 7+ public abstract void storeRawText(int rcId, String rcName, String text)
98 throws PersistenceException;
109
11 - public abstract void storePlainText(int textId, String name, ResourceType ptype, String text)
 10+ public abstract void storePlainText(int rcId, String rcName, String text)
1211 throws PersistenceException;
 12+
 13+ //public abstract void finishAliases() throws PersistenceException;
 14+ //public abstract void finishIdReferences() throws PersistenceException;
1315
1416 }
\ No newline at end of file
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/PropertyStoreBuilder.java
@@ -3,21 +3,12 @@
44 import de.brightbyte.util.PersistenceException;
55 import de.brightbyte.wikiword.store.WikiWordLocalStore;
66
7 -public interface PropertyStoreBuilder extends WikiWordStoreBuilder, WikiWordLocalStore {
 7+public interface PropertyStoreBuilder extends WikiWordStoreBuilder, WikiWordLocalStore, ConceptBasedStoreBuilder {
 8+
89 public abstract void storeProperty(int resourceId, int conceptId, String concept, String property, String value)
910 throws PersistenceException;
1011
11 - /*
12 - public abstract int storeConcept(int rcId, String name, ConceptType ctype)
13 - throws PersistenceException;
14 -
15 - public abstract int storeResource(String name, ResourceType ptype, Date time)
16 - throws PersistenceException;
17 -
18 - public abstract void storeConceptAlias(int rcId, int source, String sourceName, int target, String targetName, AliasScope scope)
19 - throws PersistenceException;
20 - */
21 -
2212 public abstract void finishAliases() throws PersistenceException;
 13+ public abstract void finishIdReferences() throws PersistenceException;
2314
2415 }
\ No newline at end of file
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordStoreBuilder.java
@@ -492,7 +492,35 @@
493493
494494 return executeChunkedUpdate("resolveRedirects", table.getName()+"."+relNameField+"+"+relIdField, sql, where, aliasTable, "source", chunkFactor);
495495 }
 496+
 497+
 498+ /**
 499+ * Builds id-references from name-references
 500+ */
 501+ protected int buildIdLinks(DatabaseTable table, String relNameField, String relIdField, int chunkFactor) throws PersistenceException {
 502+ DatabaseField nmField = table.getField(relNameField);
 503+ DatabaseField idField = table.getField(relIdField);
 504+
 505+ if (!(nmField instanceof ReferenceField)) throw new IllegalArgumentException(relNameField+" is not a reference field in table "+table.getName());
 506+ if (!(idField instanceof ReferenceField)) throw new IllegalArgumentException(relIdField+" is not a reference field in table "+table.getName());
 507+
 508+ String nmTable = ((ReferenceField)nmField).getTargetTable();
 509+ String idTable = ((ReferenceField)idField).getTargetTable();
 510+
 511+ if (!nmTable.equals(idTable)) throw new IllegalArgumentException(relNameField+" and "+relIdField+" in table "+table.getName()+" do not reference the same table: "+nmTable+" != "+idTable);
 512+ DatabaseTable target = getTable(nmTable);
496513
 514+ String targetNameField = ((ReferenceField)nmField).getTargetField();
 515+ String targetIdField = ((ReferenceField)idField).getTargetField();
 516+
 517+ String sql = "UPDATE "+table.getSQLName()+" as R JOIN "+target.getSQLName()+" as E "
 518+ + " ON R."+relNameField+" = E."+targetNameField+" "
 519+ + " SET R."+relIdField+" = E."+targetIdField+" ";
 520+ String where = " R."+relIdField+" IS NULL";
 521+
 522+ return executeChunkedUpdate("buildIdLinks", table.getName()+"."+relNameField, sql, where, target, targetIdField, chunkFactor);
 523+ }
 524+
497525 public void finalizeImport() throws PersistenceException {
498526 flush();
499527 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/ConceptBasedStoreBuilder.java
@@ -0,0 +1,10 @@
 2+package de.brightbyte.wikiword.store.builder;
 3+
 4+import de.brightbyte.util.PersistenceException;
 5+
 6+public interface ConceptBasedStoreBuilder extends WikiWordStoreBuilder {
 7+
 8+ public void finishIdReferences() throws PersistenceException;
 9+ public void finishAliases() throws PersistenceException;
 10+
 11+}
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java
@@ -12,12 +12,11 @@
1313
1414 import javax.sql.DataSource;
1515
16 -import org.ardverk.collection.PatriciaTrie;
17 -import org.ardverk.collection.StringKeyAnalyzer;
18 -
1916 import de.brightbyte.application.Agenda;
 17+import de.brightbyte.data.Pair;
2018 import de.brightbyte.data.PersistentIdManager;
2119 import de.brightbyte.data.cursor.CursorProcessor;
 20+import de.brightbyte.data.cursor.DataCursor;
2221 import de.brightbyte.data.cursor.DataSet;
2322 import de.brightbyte.db.DatabaseAccess;
2423 import de.brightbyte.db.DatabaseDataSet;
@@ -172,18 +171,39 @@
173172 }
174173 else {
175174 //FIXME: should fail if we are continuing a previous import, but the file doesn't exist.
176 - //FIXME: should failon partial load
 175+ //FIXME: should fail on partial load
177176 //XXX: could probably be skipped if continuing at a stage after dump reading
178177 if (idManager!=null) {
179 - log("loading persisted ID map..."+" memory used: "+(Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1024+"KB");
180 - idManager.load();
181 - log("Max persisted ID: "+idManager.getMaxId()+"; memory used: "+(Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1024+"KB");
 178+ if (idManager.fileExists()) {
 179+ log("loading persisted ID map..."+" memory used: "+(Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1024+"KB");
 180+ idManager.load();
 181+ log("Max persisted ID: "+idManager.getMaxId()+"; memory used: "+(Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1024+"KB");
 182+ } else {
 183+ log("building persisted ID map..."+" memory used: "+(Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1024+"KB");
 184+ DataCursor<Pair<String, Integer>> cursor = getConceptIdCursor();
 185+ idManager.slurp(cursor);
 186+ cursor.close();
 187+ log("Max persisted ID: "+idManager.getMaxId()+"; memory used: "+(Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1024+"KB");
 188+ }
182189 }
183190 }
184191
185192 super.initialize(purge, dropAll);
186193 }
187194
 195+ protected DataCursor<Pair<String, Integer>> getConceptIdCursor() throws PersistenceException {
 196+ String sql = "SELECT name, id from " + conceptTable.getSQLName();
 197+ ResultSet rs = executeQuery("getConceptIdCursor", sql);
 198+
 199+ DatabaseDataSet.Factory<Pair<String, Integer>> f = new DatabaseDataSet.Factory<Pair<String, Integer>>() {
 200+ public Pair<String, Integer> newInstance(ResultSet row) throws Exception {
 201+ return new Pair<String, Integer>(row.getString(1), row.getInt(2));
 202+ }
 203+ };
 204+
 205+ return new DatabaseDataSet.Cursor<Pair<String, Integer>>(rs, f);
 206+ }
 207+
188208 public ConceptType getConceptType(int type) {
189209 return corpus.getConceptTypes().getType(type);
190210 }
@@ -258,7 +278,6 @@
259279 throw new PersistenceException(e);
260280 }
261281 }
262 -
263282
264283 /**
265284 * @see de.brightbyte.wikiword.store.builder.LocalConceptStoreBuilder#storeResourceAbout(java.lang.String, de.brightbyte.wikiword.ResourceType, java.util.Date, int conceptId, String conceptName)
@@ -525,6 +544,10 @@
526545 }
527546
528547 public void finalizeImport() throws PersistenceException {
 548+ if (idManager!=null) { //delete temporary ID file
 549+ idManager.deleteFile();
 550+ }
 551+
529552 try {
530553 flush();
531554 if (beginTask("DatabaseLocalConceptStore.finishImport", "enableKeys")) {
@@ -534,6 +557,16 @@
535558 } catch (SQLException e) {
536559 throw new PersistenceException(e);
537560 }
 561+
 562+ if (propertyStore!=null && beginTask("finishAliases", "propertyStore.finalizeImport")) {
 563+ propertyStore.finalizeImport();
 564+ endTask("finishAliases", "propertyStore.finalizeImport");
 565+ }
 566+
 567+ if (textStore!=null && beginTask("finishAliases", "textStore.finalizeImport")) {
 568+ textStore.finalizeImport();
 569+ endTask("finishAliases", "textStore.finalizeImport");
 570+ }
538571 }
539572
540573 public void finishSections() throws PersistenceException {
@@ -744,6 +777,18 @@
745778 endTask("finishIdReferences", "buildIdLinks:alias", n+" references");
746779 }
747780 //if (beginTask("finishIdReferences", "buildIdLinks:reference")) buildIdLinks(referenceTable, "target_name", "target");
 781+
 782+ if (idManager==null && propertyStore!=null && beginTask("finishIdReferences", "propertyStore.finishIdReferences")) {
 783+ propertyStore.finishIdReferences();
 784+ endTask("finishIdReferences", "propertyStore.finishIdReferences");
 785+ }
 786+
 787+ /*
 788+ if (idManager==null && textStore!=null && beginTask("finishIdReferences", "textStore.finishIdReferences")) {
 789+ textStore.finishIdReferences();
 790+ endTask("finishIdReferences", "textStore.finishIdReferences");
 791+ }
 792+ */
748793 }
749794
750795 public void finishAliases() throws PersistenceException {
@@ -771,7 +816,19 @@
772817 endTask("finishAliases", "resolveRedirects:broad", n+" entries");
773818 }
774819
 820+ if (propertyStore!=null && beginTask("finishAliases", "propertyStore.finishAliases")) {
 821+ propertyStore.finishAliases();
 822+ endTask("finishAliases", "propertyStore.finishAliases");
 823+ }
 824+
775825 /*
 826+ if (textStore!=null && beginTask("finishAliases", "textStore.finishAliases")) {
 827+ textStore.finishAliases();
 828+ endTask("finishAliases", "textStore.finishAliases");
 829+ }
 830+ */
 831+
 832+ /*
776833 //NOTE: way too late for that!
777834 if (beginTask("finishAliases", "resolveRedirects:section")) {
778835 int n = resolveRedirects(sectionTable, "concept_name", null);
@@ -1128,33 +1185,6 @@
11291186 return executeChunkedUpdate("buildMeanings", "buildMeanings", sql, group, linkTable, "target");
11301187 }
11311188
1132 - /**
1133 - * Builds id-references from name-references
1134 - */
1135 - protected int buildIdLinks(DatabaseTable table, String relNameField, String relIdField, int chunkFactor) throws PersistenceException {
1136 - DatabaseField nmField = table.getField(relNameField);
1137 - DatabaseField idField = table.getField(relIdField);
1138 -
1139 - if (!(nmField instanceof ReferenceField)) throw new IllegalArgumentException(relNameField+" is not a reference field in table "+table.getName());
1140 - if (!(idField instanceof ReferenceField)) throw new IllegalArgumentException(relIdField+" is not a reference field in table "+table.getName());
1141 -
1142 - String nmTable = ((ReferenceField)nmField).getTargetTable();
1143 - String idTable = ((ReferenceField)idField).getTargetTable();
1144 -
1145 - if (!nmTable.equals(idTable)) throw new IllegalArgumentException(relNameField+" and "+relIdField+" in table "+table.getName()+" do not reference the same table: "+nmTable+" != "+idTable);
1146 - DatabaseTable target = getTable(nmTable);
1147 -
1148 - String targetNameField = ((ReferenceField)nmField).getTargetField();
1149 - String targetIdField = ((ReferenceField)idField).getTargetField();
1150 -
1151 - String sql = "UPDATE "+table.getSQLName()+" as R JOIN "+target.getSQLName()+" as E "
1152 - + " ON R."+relNameField+" = E."+targetNameField+" "
1153 - + " SET R."+relIdField+" = E."+targetIdField+" ";
1154 - String where = " R."+relIdField+" IS NULL";
1155 -
1156 - return executeChunkedUpdate("buildIdLinks", table.getName()+"."+relNameField, sql, where, target, targetIdField, chunkFactor);
1157 - }
1158 -
11591189 /////////////////////////////////////////////////////////////////////////////////////////////
11601190 protected class DatabaseLocalStatisticsStoreBuilder extends DatabaseStatisticsStoreBuilder {
11611191 protected EntityTable termTable;
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/IncrementalStoreBuilder.java
@@ -2,7 +2,7 @@
33
44 import de.brightbyte.util.PersistenceException;
55
6 -public interface IncrementalStoreBuilder {
 6+public interface IncrementalStoreBuilder extends WikiWordStoreBuilder {
77
88 public void deleteDataAfter(int delAfter, boolean inclusive) throws PersistenceException;
99
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/LocalConceptStoreBuilder.java
@@ -18,7 +18,7 @@
1919 * (generally by a WikiTextAnalyzer) may be written to. It may be backed by
2020 * a RDBMS, or some other way of storing the data.
2121 */
22 -public interface LocalConceptStoreBuilder extends WikiWordConceptStoreBuilder<LocalConcept>, IncrementalStoreBuilder {
 22+public interface LocalConceptStoreBuilder extends WikiWordConceptStoreBuilder<LocalConcept>, IncrementalStoreBuilder, ConceptBasedStoreBuilder {
2323
2424 public abstract void storeDefinition(int rcId, int conceptId, String definition)
2525 throws PersistenceException;
@@ -90,8 +90,6 @@
9191 public void finishSections() throws PersistenceException;
9292 public void finishBadLinks() throws PersistenceException;
9393 public void finishMissingConcepts() throws PersistenceException;
94 - public void finishIdReferences() throws PersistenceException;
95 - public void finishAliases() throws PersistenceException;
9694 public void finishRelations() throws PersistenceException;
9795 public void finishMeanings() throws PersistenceException;
9896 //public void finishConceptInfo() throws PersistenceException;
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseTextStoreBuilder.java
@@ -4,13 +4,15 @@
55 import java.sql.SQLException;
66
77 import de.brightbyte.application.Agenda;
 8+import de.brightbyte.data.PersistentIdManager;
89 import de.brightbyte.db.EntityTable;
910 import de.brightbyte.db.Inserter;
 11+import de.brightbyte.db.RelationTable;
1012 import de.brightbyte.util.PersistenceException;
1113 import de.brightbyte.wikiword.ConceptType;
1214 import de.brightbyte.wikiword.Corpus;
13 -import de.brightbyte.wikiword.ResourceType;
1415 import de.brightbyte.wikiword.TweakSet;
 16+import de.brightbyte.wikiword.schema.AliasScope;
1517 import de.brightbyte.wikiword.schema.LocalConceptStoreSchema;
1618 import de.brightbyte.wikiword.schema.TextStoreSchema;
1719
@@ -59,6 +61,8 @@
6062
6163 plainTextTable = (EntityTable)plainTextInserter.getTable();
6264 rawTextTable = (EntityTable)rawTextInserter.getTable();
 65+
 66+ //this.idManager = idManager;
6367 }
6468
6569 @Override
@@ -67,21 +71,22 @@
6872 deleteDataFrom(rcId, op, plainTextTable, "id");
6973 }
7074
 75+ /*
7176 protected int getResourceId(String title) throws SQLException {
7277 String sql = "select id from "+localConceptDatabase.getSQLTableName("resource")
7378 +" where name = "+localConceptDatabase.quoteString(title);
7479 return (Integer) localConceptDatabase.executeSingleValueQuery("getResourceId", sql);
75 - }
76 -
 80+ }*/
 81+
7782 /**
7883 * @see de.brightbyte.wikiword.store.builder.LocalConceptStoreBuilder#storeRawText(int, java.lang.String)
7984 */
80 - public void storeRawText(int textId, String title, String text) throws PersistenceException {
 85+ public void storeRawText(int rcId, String title, String text) throws PersistenceException {
8186 try {
8287 if (rawTextInserter==null) rawTextInserter = rawTextTable.getInserter();
83 - int rcId = getResourceId(title); //TODO: use join?
 88+ //if (rcId<=0) rcId = getResourceId(title); //TODO: use join?
8489
85 - rawTextInserter.updateInt("id", textId);
 90+ rawTextInserter.updateInt("id", rcId);
8691 rawTextInserter.updateInt("resource", rcId);
8792 rawTextInserter.updateString("text", text);
8893 rawTextInserter.updateRow();
@@ -93,12 +98,12 @@
9499 /**
95100 * @see de.brightbyte.wikiword.store.builder.LocalConceptStoreBuilder#storePlainText(int, java.lang.String)
96101 */
97 - public void storePlainText(int textId, String title, String text) throws PersistenceException {
 102+ public void storePlainText(int rcId, String title, String text) throws PersistenceException {
98103 try {
99104 if (plainTextInserter==null) plainTextInserter = plainTextTable.getInserter();
100 - int rcId = getResourceId(title); //TODO: use join?
 105+// if (rcId<=0) rcId = getResourceId(title); //TODO: use join?
101106
102 - plainTextInserter.updateInt("id", textId);
 107+ plainTextInserter.updateInt("id", rcId);
103108 plainTextInserter.updateInt("resource", rcId);
104109 plainTextInserter.updateString("text", text);
105110 plainTextInserter.updateRow();
@@ -107,6 +112,7 @@
108113 }
109114 }
110115
 116+ /*
111117 public void storePlainText(int textId, String name, ResourceType ptype, String text) throws PersistenceException {
112118 storePlainText(textId, name, text);
113119 }
@@ -114,7 +120,7 @@
115121 public void storeRawText(int textId, String name, ResourceType ptype, String text) throws PersistenceException {
116122 storeRawText(textId, name, text);
117123 }
118 -
 124+ */
119125 public ConceptType getConceptType(int type) {
120126 return localConceptDatabase.getConceptType(type);
121127 }
@@ -122,4 +128,21 @@
123129 public Corpus getCorpus() {
124130 return ((TextStoreSchema)database).getCorpus();
125131 }
 132+
 133+ /*
 134+ public void finishAliases() throws PersistenceException {
 135+ if (beginTask("DatabaseTextStoreBuilder.finishAliases", "resolveRedirects:property")) {
 136+ RelationTable aliasTable = (RelationTable)database.getTable("alias");
 137+ int n = resolveRedirects(aliasTable, rawTextTable, "concept_name", idManager!=null ? "concept" : null, AliasScope.REDIRECT, 3, null, null);
 138+ endTask("DatabaseTextStoreBuilder.finishAliases", "resolveRedirects:property", n+" entries");
 139+ }
 140+ }
 141+
 142+ public void finishIdReferences() throws PersistenceException {
 143+ if (idManager==null && beginTask("DatabaseTextStoreBuilder.finishIdReferences", "buildIdLinks:property")) {
 144+ int n = buildIdLinks(rawTextTable, "concept_name", "concept", 1);
 145+ endTask("DatabaseTextStoreBuilder.finishIdReferences", "buildIdLinks:property", n+" references");
 146+ }
 147+ }
 148+ */
126149 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabasePropertyStoreBuilder.java
@@ -4,6 +4,7 @@
55 import java.sql.SQLException;
66
77 import de.brightbyte.application.Agenda;
 8+import de.brightbyte.data.PersistentIdManager;
89 import de.brightbyte.db.Inserter;
910 import de.brightbyte.db.RelationTable;
1011 import de.brightbyte.util.PersistenceException;
@@ -17,12 +18,13 @@
1819
1920 protected RelationTable propertyTable;
2021 protected Inserter propertyInserter;
21 - private LocalConceptStoreSchema conceptStoreSchema;
 22+ protected LocalConceptStoreSchema conceptStoreSchema;
 23+ protected PersistentIdManager idManager;
2224
2325 public DatabasePropertyStoreBuilder(Corpus corpus, Connection connection, TweakSet tweaks) throws SQLException, PersistenceException {
2426 this(new LocalConceptStoreSchema(corpus, connection, tweaks, true),
2527 new PropertyStoreSchema(corpus, connection, tweaks, true),
26 - tweaks, null);
 28+ null, tweaks, null);
2729 }
2830
2931 public DatabasePropertyStoreBuilder(DatabaseLocalConceptStoreBuilder conceptStore, TweakSet tweaks) throws SQLException, PersistenceException {
@@ -32,11 +34,12 @@
3335 new PropertyStoreSchema(conceptStore.getCorpus(),
3436 conceptStore.getDatabaseAccess().getConnection(),
3537 tweaks, true),
 38+ conceptStore.idManager,
3639 tweaks,
3740 conceptStore.getAgenda());
3841 }
3942
40 - protected DatabasePropertyStoreBuilder(LocalConceptStoreSchema conceptStoreSchema, PropertyStoreSchema database, TweakSet tweaks, Agenda agenda) throws SQLException, PersistenceException {
 43+ protected DatabasePropertyStoreBuilder(LocalConceptStoreSchema conceptStoreSchema, PropertyStoreSchema database, PersistentIdManager idManager, TweakSet tweaks, Agenda agenda) throws SQLException, PersistenceException {
4144 super(database, tweaks, agenda);
4245
4346 //this.conceptStore = conceptStore;
@@ -45,6 +48,7 @@
4649 this.propertyTable = (RelationTable)propertyInserter.getTable();
4750
4851 this.conceptStoreSchema = conceptStoreSchema;
 52+ this.idManager = idManager;
4953 }
5054
5155 @Override
@@ -62,16 +66,6 @@
6367 super.flush();
6468 }
6569
66 - protected int getConceptId(String title) throws SQLException {
67 - //String sql = "select id from "+localConceptDatabase.getSQLTableName("resource")
68 - // +" where name = "+localConceptDatabase.quoteString(title);
69 - //return (Integer) localConceptDatabase.executeSingleValueQuery("getResourceId", sql);
70 -
71 - //TODO: get concept id
72 - throw new UnsupportedOperationException();
73 - //return -1;
74 - }
75 -
7670 /**
7771 * @see de.brightbyte.wikiword.store.builder.LocalConceptStoreBuilder#storeRawText(int, java.lang.String)
7872 */
@@ -80,7 +74,8 @@
8175 //int cId = getConceptId(name); //TODO: use join? //FIXME: when not provided
8276
8377 propertyInserter.updateInt("resource", resourceId);
84 - if (concept>0) propertyInserter.updateInt("concept", concept); //FIXME: id cache!
 78+ if (concept>0) propertyInserter.updateInt("concept", concept);
 79+ else if (idManager!=null) propertyInserter.updateInt("concept", idManager.aquireId(name));
8580 propertyInserter.updateString("concept_name", name);
8681 propertyInserter.updateString("property", property);
8782 propertyInserter.updateString("value", value);
@@ -108,12 +103,17 @@
109104 }
110105
111106 public void finishAliases() throws PersistenceException {
112 -
113 - if (beginTask("finishAliases", "resolveRedirects:property")) {
 107+ if (beginTask("DatabasePropertyStoreBuilder.finishAliases", "resolveRedirects:property")) {
114108 RelationTable aliasTable = (RelationTable)conceptStoreSchema.getTable("alias");
115 -
116 - int n = resolveRedirects(aliasTable, propertyTable, "concept_name", "concept", AliasScope.REDIRECT, 3, null, null);
117 - endTask("finishAliases", "resolveRedirects:property", n+" entries");
 109+ int n = resolveRedirects(aliasTable, propertyTable, "concept_name", idManager!=null ? "concept" : null, AliasScope.REDIRECT, 3, null, null);
 110+ endTask("DatabasePropertyStoreBuilder.finishAliases", "resolveRedirects:property", n+" entries");
118111 }
119112 }
 113+
 114+ public void finishIdReferences() throws PersistenceException {
 115+ if (idManager==null && beginTask("DatabasePropertyStoreBuilder.finishIdReferences", "buildIdLinks:property")) {
 116+ int n = buildIdLinks(propertyTable, "concept_name", "concept", 1);
 117+ endTask("DatabasePropertyStoreBuilder.finishIdReferences", "buildIdLinks:property", n+" references");
 118+ }
 119+ }
120120 }
\ No newline at end of file

Status & tagging log