r64309 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r64308‎ | r64309 | r64310 >
Date:20:24, 28 March 2010
Author:daniel
Status:deferred
Tags:
Comment:
refactored DAO model (untested, but compiles now)
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/ConceptRelations.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/LocalConcept.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/TermReference.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/WikiWordConcept.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/rdf/LocalConceptSkosProperties.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/rdf/RdfOutput.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/ConceptInfoStoreSchema.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/CalculatedProximityStore.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseGlobalConceptStore.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseLocalConceptStore.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseConceptInfoStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordConceptStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/LocalConceptStoreBuilder.java (modified) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/WikiWordConceptStoreBuilder.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/CalculatedProximityStore.java
@@ -42,7 +42,7 @@
4343 double prox = getProximity(centerFeatures, f.getFeatureVector());
4444 if (prox<minProximity) continue;
4545
46 - return newConcept(f.getId(), f.getName(), 1, prox);
 46+ return newConcept(f.getId(), f.getConcept().getName(), f.getConcept().getType(), 1, prox);
4747 } ;
4848
4949 return null;
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseLocalConceptStore.java
@@ -31,9 +31,6 @@
3232 import de.brightbyte.wikiword.schema.LocalConceptStoreSchema;
3333 import de.brightbyte.wikiword.schema.LocalStatisticsStoreSchema;
3434 import de.brightbyte.wikiword.schema.StatisticsStoreSchema;
35 -import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore.DatabaseConceptInfoStore;
36 -import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore.DatabaseStatisticsStore;
37 -import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore.DatabaseConceptInfoStore.ConceptFactory;
3835
3936 /**
4037 * A LocalConceptStore implemented based upon a {@link de.brightbyte.db.DatabaseSchema} object,
@@ -295,7 +292,7 @@
296293 }
297294
298295 @Override
299 - protected LocalConcept newConcept(Map<String, Object> m) {
 296+ protected LocalConcept newConcept(Map<String, Object> m) throws PersistenceException {
300297 int id = asInt(m.get("cId"));
301298 String name = asString(m.get("cName"));
302299 ConceptType type = corpus.getConceptTypes().getType(asInt(m.get("cType")));
@@ -317,7 +314,7 @@
318315 LocalConcept[] outlinks = LocalConcept.parseList( asString(m.get("rOutlinks")), getConceptFactory(), ((ConceptInfoStoreSchema)database).outLinksReferenceListEntry );
319316 LocalConcept[] broader = LocalConcept.parseList( asString(m.get("rBroader")), getConceptFactory(), ((ConceptInfoStoreSchema)database).broaderReferenceListEntry );
320317 LocalConcept[] narrower = LocalConcept.parseList( asString(m.get("rNarrower")), getConceptFactory(), ((ConceptInfoStoreSchema)database).narrowerReferenceListEntry );
321 - TranslationReference[] langlinks = TranslationReference.parseList( asString(m.get("rLanglinks")), ((ConceptInfoStoreSchema)database).langlinkReferenceListEntry );
 318+ LocalConcept[] langlinks = LocalConcept.parseList( asString(m.get("rLanglinks")), getConceptFactory(), ((ConceptInfoStoreSchema)database).langlinkReferenceListEntry );
322319 LocalConcept[] similar = LocalConcept.parseList( asString(m.get("rSimilar")), getConceptFactory(), ((ConceptInfoStoreSchema)database).similarReferenceListEntry );
323320 LocalConcept[] related = LocalConcept.parseList( asString(m.get("rRelated")), getConceptFactory(), ((ConceptInfoStoreSchema)database).relatedReferenceListEntry );
324321 TermReference[] terms = TermReference.parseList( asString(m.get("dTerms")), getConceptFactory(), ((ConceptInfoStoreSchema)database).termReferenceListEntry );
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseGlobalConceptStore.java
@@ -28,9 +28,6 @@
2929 import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema;
3030 import de.brightbyte.wikiword.schema.GlobalConceptStoreSchema;
3131 import de.brightbyte.wikiword.schema.StatisticsStoreSchema;
32 -import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore.DatabaseConceptInfoStore;
33 -import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore.DatabaseStatisticsStore;
34 -import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore.DatabaseConceptInfoStore.ConceptFactory;
3532
3633 /**
3734 * A GlobalConceptStore implemented based upon a {@link de.brightbyte.db.DatabaseSchema} object,
@@ -268,18 +265,22 @@
269266
270267 Corpus[] languages = ((GlobalConceptStoreSchema)DatabaseGlobalConceptStore.this.database).getLanguages(langBits);
271268
272 - GlobalConcept ref = new GlobalConcept(id, name, cardinality, relevance);
273 - GlobalConcept[] inlinks = GlobalConcept.parseList( asString(m.get("rInlinks")), ((ConceptInfoStoreSchema)database).inLinksReferenceListEntry );
274 - GlobalConcept[] outlinks = GlobalConcept.parseList( asString(m.get("rOutlinks")), ((ConceptInfoStoreSchema)database).outLinksReferenceListEntry );
275 - GlobalConcept[] broader = GlobalConcept.parseList( asString(m.get("rBroader")), ((ConceptInfoStoreSchema)database).broaderReferenceListEntry );
276 - GlobalConcept[] narrower = GlobalConcept.parseList( asString(m.get("rNarrower")), ((ConceptInfoStoreSchema)database).narrowerReferenceListEntry );
277 - TranslationReference[] langlinks = TranslationReference.parseList( asString(m.get("rLanglinks")), ((ConceptInfoStoreSchema)database).langlinkReferenceListEntry );
278 - GlobalConcept[] similar = GlobalConcept.parseList( asString(m.get("rSimilar")), ((ConceptInfoStoreSchema)database).similarReferenceListEntry );
279 - GlobalConcept[] related = GlobalConcept.parseList( asString(m.get("rRelated")), ((ConceptInfoStoreSchema)database).relatedReferenceListEntry );
 269+ GlobalConcept concept = new GlobalConcept(getDatasetIdentifier(), id, type);
 270+ concept.setCardinality(cardinality);
 271+ concept.setRelevance(relevance);
280272
 273+ GlobalConcept[] inlinks = GlobalConcept.parseList( asString(m.get("rInlinks")), getConceptFactory(), ((ConceptInfoStoreSchema)database).inLinksReferenceListEntry );
 274+ GlobalConcept[] outlinks = GlobalConcept.parseList( asString(m.get("rOutlinks")), getConceptFactory(), ((ConceptInfoStoreSchema)database).outLinksReferenceListEntry );
 275+ GlobalConcept[] broader = GlobalConcept.parseList( asString(m.get("rBroader")), getConceptFactory(), ((ConceptInfoStoreSchema)database).broaderReferenceListEntry );
 276+ GlobalConcept[] narrower = GlobalConcept.parseList( asString(m.get("rNarrower")), getConceptFactory(), ((ConceptInfoStoreSchema)database).narrowerReferenceListEntry );
 277+ GlobalConcept[] langlinks = GlobalConcept.parseList( asString(m.get("rLanglinks")), getConceptFactory(), ((ConceptInfoStoreSchema)database).langlinkReferenceListEntry );
 278+ GlobalConcept[] similar = GlobalConcept.parseList( asString(m.get("rSimilar")), getConceptFactory(), ((ConceptInfoStoreSchema)database).similarReferenceListEntry );
 279+ GlobalConcept[] related = GlobalConcept.parseList( asString(m.get("rRelated")), getConceptFactory(), ((ConceptInfoStoreSchema)database).relatedReferenceListEntry );
 280+
281281 ConceptRelations<GlobalConcept> relations = new ConceptRelations<GlobalConcept>(broader, narrower, inlinks, outlinks, similar, related, langlinks);
 282+ concept.setRelations(relations);
282283
283 - return new GlobalConcept(ref, getDatasetIdentifier(), languages, type, DatabaseGlobalConceptStore.this, relations, null);
 284+ return concept;
284285 } catch (SQLException e) {
285286 throw new PersistenceException(e);
286287 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/ConceptInfoStoreSchema.java
@@ -13,22 +13,24 @@
1414 import de.brightbyte.wikiword.DatasetIdentifier;
1515 import de.brightbyte.wikiword.TweakSet;
1616 import de.brightbyte.wikiword.model.WikiWordConcept;
 17+import static de.brightbyte.wikiword.model.WikiWordConcept.*;
1718
1819 public class ConceptInfoStoreSchema extends WikiWordStoreSchema {
1920 public final String referenceSeparator; //info record separator
2021 public final String referenceFieldSeparator; //info field separator
 22+ public final String languagePrefixSeparator; //language prefix separator
2123
22 - public final Pattern referenceSeparatorPattern; //info record separator
23 - public final Pattern referenceFieldSeparatorPattern; //info field separator
 24+ public final Pattern conceptSeparatorPattern; //info record separator
 25+ public final Pattern conceptFieldSeparatorPattern; //info field separator
 26+ public final Pattern languagePrefixPattern; //language prefix separator
2427
25 - public class ReferenceListEntrySpec extends WikiWordConcept.ListFormatSpec {
 28+ public class ConceptListEntrySpec extends WikiWordConcept.ListFormatSpec {
2629
2730 public final String joinField;
2831 public final String valueExpression;
2932
30 - public ReferenceListEntrySpec(String field, String expression, boolean useId, boolean useName, boolean useCardinality, boolean useRelevance) {
31 - super(referenceSeparatorPattern, referenceFieldSeparatorPattern, useId, useName,
32 - useCardinality, useRelevance);
 33+ public ConceptListEntrySpec(String field, String expression, int flags) {
 34+ super(conceptSeparatorPattern, conceptFieldSeparatorPattern, languagePrefixPattern, flags);
3335
3436 this.joinField = field;
3537 this.valueExpression = expression;
@@ -36,18 +38,18 @@
3739
3840 }
3941
40 - public final ReferenceListEntrySpec langlinkReferenceListEntry;
 42+ public final ConceptListEntrySpec langlinkReferenceListEntry;
4143
42 - public final ReferenceListEntrySpec termReferenceListEntry;
43 - public final ReferenceListEntrySpec broaderReferenceListEntry;
44 - public final ReferenceListEntrySpec narrowerReferenceListEntry;
45 - public final ReferenceListEntrySpec inLinksReferenceListEntry;
46 - public final ReferenceListEntrySpec outLinksReferenceListEntry;
47 - public final ReferenceListEntrySpec similarReferenceListEntry;
48 - public final ReferenceListEntrySpec relatedReferenceListEntry;
49 - public final ReferenceListEntrySpec related2ReferenceListEntry;
50 - public final ReferenceListEntrySpec featureReferenceListEntry;
51 - public final ReferenceListEntrySpec proximityReferenceListEntry;
 44+ public final ConceptListEntrySpec termReferenceListEntry;
 45+ public final ConceptListEntrySpec broaderReferenceListEntry;
 46+ public final ConceptListEntrySpec narrowerReferenceListEntry;
 47+ public final ConceptListEntrySpec inLinksReferenceListEntry;
 48+ public final ConceptListEntrySpec outLinksReferenceListEntry;
 49+ public final ConceptListEntrySpec similarReferenceListEntry;
 50+ public final ConceptListEntrySpec relatedReferenceListEntry;
 51+ public final ConceptListEntrySpec related2ReferenceListEntry;
 52+ public final ConceptListEntrySpec featureReferenceListEntry;
 53+ public final ConceptListEntrySpec proximityReferenceListEntry;
5254
5355 protected EntityTable conceptInfoTable;
5456 protected EntityTable conceptDescriptionTable;
@@ -65,52 +67,54 @@
6668
6769 referenceSeparator = tweaks.getTweak("dbstore.cacheReferenceSeparator", "\u001E"); //ASCII Record Separator
6870 referenceFieldSeparator = tweaks.getTweak("dbstore.cacheReferenceFieldSeparator", "\u001F"); //ASCII Field Separator
69 - referenceSeparatorPattern = Pattern.compile(referenceSeparator.replaceAll("[^$(){}\\[\\]\\\\]", "\\\\$0"));
70 - referenceFieldSeparatorPattern = Pattern.compile(referenceFieldSeparator.replaceAll("[^$(){}\\[\\]\\\\]", "\\\\$0"));
 71+ languagePrefixSeparator = tweaks.getTweak("dbstore.languagePrefixSeparator", ":"); //ASCII Field Separator
 72+ conceptSeparatorPattern = Pattern.compile(referenceSeparator.replaceAll("[^$(){}\\[\\]\\\\]", "\\\\$0"));
 73+ conceptFieldSeparatorPattern = Pattern.compile(referenceFieldSeparator.replaceAll("[^$(){}\\[\\]\\\\]", "\\\\$0"));
 74+ languagePrefixPattern = Pattern.compile(languagePrefixSeparator.replaceAll("[^$(){}\\[\\]\\\\]", "\\\\$0"));
7175
7276 langlinkReferenceListEntry =
73 - new ReferenceListEntrySpec("language, target", "concat(language, ':', target)",
74 - false, true, false, false);
 77+ new ConceptListEntrySpec("language, target", "concat(language, ':', target)",
 78+ LIST_FORMAT_USE_NAME | LIST_FORMAT_USE_LANGUAGE_PREFIX );
7579
7680 termReferenceListEntry =
77 - new ReferenceListEntrySpec("term_text", fields("term_text", "freq"),
78 - false, true, true, false);
 81+ new ConceptListEntrySpec("term_text", fields("term_text", "freq"),
 82+ LIST_FORMAT_USE_NAME | LIST_FORMAT_USE_CARDINALITY );
7983
8084 broaderReferenceListEntry =
81 - new ReferenceListEntrySpec("broad", cacheNames ? fields("broad", "broad_name", "if (lhs is null or lhs = 0, 0, 1/lhs)") : fields("broad", "if (lhs is null or lhs = 0, 0, 1/lhs)"),
82 - true, cacheNames, false, true);
 85+ new ConceptListEntrySpec("broad", cacheNames ? fields("broad", "broad_name", "if (lhs is null or lhs = 0, 0, 1/lhs)") : fields("broad", "if (lhs is null or lhs = 0, 0, 1/lhs)"),
 86+ LIST_FORMAT_USE_ID | (cacheNames ? LIST_FORMAT_USE_NAME : 0) | LIST_FORMAT_USE_RELEVANCE );
8387
8488 narrowerReferenceListEntry =
85 - new ReferenceListEntrySpec("narrow", cacheNames ? fields("narrow", "narrow_name", "if (lhs is null or lhs = 0, 0, 1/lhs)") : fields("narrow", "if (lhs is null or lhs = 0, 0, 1/lhs)"),
86 - true, cacheNames, false, true);
 89+ new ConceptListEntrySpec("narrow", cacheNames ? fields("narrow", "narrow_name", "if (lhs is null or lhs = 0, 0, 1/lhs)") : fields("narrow", "if (lhs is null or lhs = 0, 0, 1/lhs)"),
 90+ LIST_FORMAT_USE_ID | (cacheNames ? LIST_FORMAT_USE_NAME : 0) | LIST_FORMAT_USE_RELEVANCE);
8791
8892 inLinksReferenceListEntry =
89 - new ReferenceListEntrySpec("anchor", cacheNames ? fields("anchor", "anchor_name", "idf") : fields("anchor", "idf"),
90 - true, cacheNames, false, true);
 93+ new ConceptListEntrySpec("anchor", cacheNames ? fields("anchor", "anchor_name", "idf") : fields("anchor", "idf"),
 94+ LIST_FORMAT_USE_ID | (cacheNames ? LIST_FORMAT_USE_NAME : 0) | LIST_FORMAT_USE_RELEVANCE );
9195
9296 outLinksReferenceListEntry =
93 - new ReferenceListEntrySpec("target", cacheNames ? fields("target", "target_name", "idf") : fields("target", "idf"),
94 - true, cacheNames, false, true);
 97+ new ConceptListEntrySpec("target", cacheNames ? fields("target", "target_name", "idf") : fields("target", "idf"),
 98+ LIST_FORMAT_USE_ID | (cacheNames ? LIST_FORMAT_USE_NAME : 0) | LIST_FORMAT_USE_RELEVANCE );
9599
96100 similarReferenceListEntry =
97 - new ReferenceListEntrySpec("concept2", fields("concept2", "langmatch"), //TODO: frequency for similar from langref(!)
98 - true, false, true, false); //TODO: name?... in relation table?... //XXX: why no score
 101+ new ConceptListEntrySpec("concept2", fields("concept2", "langmatch"), //TODO: frequency for similar from langref(!)
 102+ LIST_FORMAT_USE_ID | LIST_FORMAT_USE_CARDINALITY); //TODO: name?... in relation table?... //XXX: why no score
99103
100104 relatedReferenceListEntry =
101 - new ReferenceListEntrySpec("concept2", "concept2",
102 - true, false, false, false); //TODO: name?... in relation table?... //XXX: why no score
 105+ new ConceptListEntrySpec("concept2", "concept2",
 106+ LIST_FORMAT_USE_ID); //TODO: name?... in relation table?... //XXX: why no score
103107
104108 related2ReferenceListEntry =
105 - new ReferenceListEntrySpec("concept1","concept1",
106 - true, false, false, false); //TODO: name?... in relation table?... //XXX: why no score
 109+ new ConceptListEntrySpec("concept1","concept1",
 110+ LIST_FORMAT_USE_ID); //TODO: name?... in relation table?... //XXX: why no score
107111
108112 featureReferenceListEntry =
109 - new ReferenceListEntrySpec("target",fields("target", "weight"),
110 - true, false, false, true);
 113+ new ConceptListEntrySpec("target",fields("target", "weight"),
 114+ LIST_FORMAT_USE_ID | LIST_FORMAT_USE_RELEVANCE );
111115
112116 proximityReferenceListEntry =
113 - new ReferenceListEntrySpec("target",fields("target", "proximity"),
114 - true, false, false, true);
 117+ new ConceptListEntrySpec("target",fields("target", "proximity"),
 118+ LIST_FORMAT_USE_ID | LIST_FORMAT_USE_RELEVANCE );
115119
116120 init(tweaks, description);
117121 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/LocalConcept.java
@@ -1,52 +1,19 @@
22 package de.brightbyte.wikiword.model;
33
4 -import java.text.Collator;
5 -import java.util.Comparator;
6 -import java.util.Locale;
7 -
84 import de.brightbyte.wikiword.ConceptType;
95 import de.brightbyte.wikiword.Corpus;
106
117 public class LocalConcept extends WikiWordConcept {
128
13 - public static class ByName implements Comparator<LocalConcept> {
14 - protected Collator collator;
15 -
16 - public ByName() {
17 - this((Collator)null);
18 - }
19 -
20 - public ByName(Locale locale) {
21 - this.collator = Collator.getInstance(locale);
22 - }
23 -
24 - public ByName(Collator collator) {
25 - this.collator = collator;
26 - }
27 -
28 - public int compare(LocalConcept a, LocalConcept b) {
29 - if (collator==null) return a.getName().compareTo(b.getName());
30 - else return collator.compare(a.getName(), b.getName());
31 - }
32 - };
33 -
34 - protected String name;
359 protected String definition;
3610 protected String language;
3711 protected WikiWordResource resource;
3812
3913 public LocalConcept(Corpus corpus, int id, ConceptType type, String name) {
4014 super(corpus, id, type);
41 -
42 - this.name = name;
 15+ setName(name);
4316 }
4417
45 - @Override
46 - public String toString() {
47 - if (name!=null) return "#"+id+":[["+name+"]]";
48 - else return "#"+id;
49 - }
50 -
5118 public Corpus getCorpus() {
5219 return (Corpus)getDatasetIdentifier();
5320 }
@@ -79,13 +46,4 @@
8047 this.language = language;
8148 }
8249
83 - public String getName() {
84 - return name;
85 - }
86 -
87 - public void setName(String name) {
88 - if (this.name!=null) throw new IllegalStateException("property already initialized");
89 - this.name = name;
90 - }
91 -
9250 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/TermReference.java
@@ -1,7 +1,11 @@
22 package de.brightbyte.wikiword.model;
33
 4+import de.brightbyte.util.PersistenceException;
 5+import de.brightbyte.wikiword.model.WikiWordConcept.Factory;
 6+import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema.ConceptListEntrySpec;
47
58
 9+
610 public class TermReference {
711
812 private String term;
@@ -67,4 +71,21 @@
6872 return true;
6973 }
7074
 75+ public static TermReference[] parseList(String s, Factory<LocalConcept> factory, ConceptListEntrySpec spec) throws PersistenceException {
 76+ LocalConcept[] concepts = WikiWordConcept.parseList(s, factory, spec); //XXX: this is a terrible, terrible hack.
 77+ TermReference[] terms = new TermReference[concepts.length];
 78+
 79+ for (int i=0; i<terms.length; i++) {
 80+ WikiWordConcept dummy = concepts[i];
 81+
 82+ String term = dummy.getName(); //UGHA!
 83+ double score = dummy.getCardinality();
 84+
 85+ WikiWordConcept target = factory.newInstance(dummy.getId(), null, dummy.getType());
 86+ terms[i] = new TermReference(term, target, score);
 87+ }
 88+
 89+ return terms;
 90+ }
 91+
7192 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/WikiWordConcept.java
@@ -1,7 +1,9 @@
22 package de.brightbyte.wikiword.model;
33
 4+import java.text.Collator;
45 import java.util.Arrays;
56 import java.util.Comparator;
 7+import java.util.Locale;
68 import java.util.regex.Pattern;
79
810 import de.brightbyte.data.measure.Measure;
@@ -10,24 +12,61 @@
1113 import de.brightbyte.wikiword.DatasetIdentifier;
1214
1315 public abstract class WikiWordConcept {
14 - protected DatasetIdentifier dataset;
15 - protected ConceptType type;
 16+ public static class ByName implements Comparator<WikiWordConcept> {
 17+ protected Collator collator;
 18+
 19+ public ByName() {
 20+ this((Collator)null);
 21+ }
 22+
 23+ public ByName(Locale locale) {
 24+ this.collator = Collator.getInstance(locale);
 25+ }
 26+
 27+ public ByName(Collator collator) {
 28+ this.collator = collator;
 29+ }
 30+
 31+ public int compare(WikiWordConcept a, WikiWordConcept b) {
 32+ if (collator==null) return a.getName().compareTo(b.getName());
 33+ else return collator.compare(a.getName(), b.getName());
 34+ }
 35+ };
1636
17 - protected int id;
 37+ private DatasetIdentifier dataset;
 38+ private ConceptType type;
 39+
 40+ private int id;
 41+ private String name;
1842
19 - protected int cardinality = 1;
20 - protected double relevance = 1;
 43+ private int cardinality = 1;
 44+ private double relevance = 1;
2145
22 - protected ConceptRelations relations;
23 - protected ConceptFeatures features;
24 - protected TermReference[] terms;
 46+ private ConceptRelations relations;
 47+ private ConceptFeatures features;
 48+ private TermReference[] terms;
2549
2650 public WikiWordConcept(DatasetIdentifier dataset, int id, ConceptType type) {
2751 this.id = id;
2852 this.dataset = dataset;
2953 this.type = type;
3054 }
 55+
 56+ @Override
 57+ public String toString() {
 58+ if (name!=null) return "#"+id+":[["+name+"]]";
 59+ else return "#"+id;
 60+ }
 61+
 62+ public String getName() {
 63+ return name;
 64+ }
3165
 66+ public void setName(String name) {
 67+ if (this.name!=null) throw new IllegalStateException("property already initialized");
 68+ this.name = name;
 69+ }
 70+
3271 public DatasetIdentifier getDatasetIdentifier() {
3372 return dataset;
3473 }
@@ -108,13 +147,7 @@
109148
110149 return id == other.id;
111150 }
112 -
113151
114 - @Override
115 - public String toString() {
116 - return "#"+id;
117 - }
118 -
119152 public static final Measure<WikiWordConcept> theCardinality = new Measure<WikiWordConcept>(){
120153 public double measure(WikiWordConcept r) {
121154 return r.getCardinality();
@@ -144,30 +177,43 @@
145178 else return (int)(rb-ra) +1;
146179 }
147180 };
148 -
 181+
 182+ public static final Comparator<WikiWordConcept> byName = new ByName();
 183+
149184 public static interface Factory<T extends WikiWordConcept> {
150185 public T newInstance(int id, String name, ConceptType type) throws PersistenceException;
151186 public T newInstance(int id, String name, ConceptType type, int card, double rel) throws PersistenceException;
152187 public T[] newArray(int size);
153188 }
154189
 190+ public static final int LIST_FORMAT_USE_ID = 0x0001;
 191+ public static final int LIST_FORMAT_USE_NAME = 0x0001;
 192+ public static final int LIST_FORMAT_USE_CARDINALITY = 0x0010;
 193+ public static final int LIST_FORMAT_USE_RELEVANCE = 0x0020;
 194+ public static final int LIST_FORMAT_USE_LANGUAGE_PREFIX= 0x0100;
 195+
155196 public static class ListFormatSpec {
156197 public final boolean useId;
157198 public final boolean useName;
158199 public final boolean useCardinality;
159200 public final boolean useRelevance;
 201+ public final boolean useLanguagePrefix;
160202 public final Pattern referenceSeparator;
161203 public final Pattern fieldSeparator;
 204+ public final Pattern prefixSeparator;
162205 public final int width;
163206
164 - public ListFormatSpec(Pattern referenceSeparator, Pattern fieldSeparator, boolean useId, boolean useName, boolean useCardinality, boolean useRelevance) {
 207+ public ListFormatSpec(Pattern referenceSeparator, Pattern fieldSeparator, Pattern prefixSeparator, int flags) {
165208 this.referenceSeparator = referenceSeparator;
166209 this.fieldSeparator = fieldSeparator;
167 - this.useId = useId;
168 - this.useName = useName;
169 - this.useCardinality = useCardinality;
170 - this.useRelevance = useRelevance;
 210+ this.prefixSeparator = prefixSeparator;
171211
 212+ useId = (flags & LIST_FORMAT_USE_ID) > 0;
 213+ useName = (flags & LIST_FORMAT_USE_NAME) > 0;
 214+ useCardinality = (flags & LIST_FORMAT_USE_CARDINALITY) > 0;
 215+ useRelevance = (flags & LIST_FORMAT_USE_RELEVANCE) > 0;
 216+ useLanguagePrefix = (flags & LIST_FORMAT_USE_LANGUAGE_PREFIX) > 0;
 217+
172218 int w = 0;
173219 if (useId) w++;
174220 if (useName) w++;
@@ -208,8 +254,19 @@
209255 if (rr.length>=i+1 && spec.useName) name = rr[i++];
210256 if (rr.length>=i+1 && spec.useCardinality) cardinality = rr[i].length()==0 ? -1 : Integer.parseInt(rr[i++]);
211257 if (rr.length>=i+1 && spec.useRelevance) relevance = rr[i].length()==0 ? -1 : Double.parseDouble(rr[i++]);
 258+ String lang = null;
212259
213 - a[c++] = factory.newInstance(id, name, null, cardinality, relevance);
 260+ if (spec.useLanguagePrefix) {
 261+ String[] nn = spec.prefixSeparator.split(name, 2);
 262+ if (nn.length>1) {
 263+ name = nn[1];
 264+ lang = nn[0];
 265+ }
 266+ }
 267+
 268+ T t = factory.newInstance(id, name, null, cardinality, relevance);
 269+ if (lang!=null && t instanceof LocalConcept) ((LocalConcept)t).setLanguage(lang);
 270+ a[c++] = t;
214271 }
215272
216273 if (c<a.length) { //should not happen, but might, if values get truncated
@@ -220,7 +277,7 @@
221278
222279 if (spec.useRelevance) Arrays.sort(a, byRelevance);
223280 else if (spec.useCardinality) Arrays.sort(a, byCardinality);
224 - //else if (spec.useName) Arrays.sort(a, byName);
 281+ else if (spec.useName) Arrays.sort(a, byName);
225282
226283 return a;
227284 }
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/ConceptRelations.java
@@ -1,6 +1,5 @@
22 package de.brightbyte.wikiword.model;
33
4 -import java.util.Map;
54
65 public class ConceptRelations<R extends WikiWordConcept> {
76
@@ -10,7 +9,7 @@
1110 protected R[] narrower;
1211 protected R[] similar;
1312 protected R[] related;
14 - protected Map<String, LocalConcept> langlinks;
 13+ protected R[] langlinks;
1514
1615 //TODO: inlinks, outlinks, coocc, co-coocc
1716
@@ -20,7 +19,7 @@
2120 R[] outlinks,
2221 R[] similar,
2322 R[] related,
24 - Map<String, LocalConcept> langlinks) {
 23+ R[] langlinks) {
2524
2625 if (inlinks==null) throw new NullPointerException();
2726 if (outlinks==null) throw new NullPointerException();
@@ -43,7 +42,7 @@
4443 return broader;
4544 }
4645
47 - public Map<String, LocalConcept> getLanglinks() {
 46+ public R[] getLanglinks() {
4847 return langlinks;
4948 }
5049
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/rdf/LocalConceptSkosProperties.java
@@ -49,7 +49,7 @@
5050 TermReference[] tt = concept.getTerms();
5151 for (TermReference t: tt) {
5252 //if (t.getName().equals(name)) continue; //NOTE: for prefLabel
53 - setLiteralProperty(about, skos.altLabel, t.getName(), lang);
 53+ setLiteralProperty(about, skos.altLabel, t.getTerm(), lang);
5454 }
5555
5656 //TODO: idf, lhs
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/rdf/RdfOutput.java
@@ -27,7 +27,7 @@
2828
2929 @SuppressWarnings("unchecked")
3030 public RdfOutput(WikiWordIdentifiers identifiers, String platform, Writer writer, String format, DatasetIdentifier ds) throws RdfException, PersistenceException {
31 - this(identifiers, (RdfPlatform<V, R, A, W>)RdfPlatforms.newPlatform(platform));
 31+ this(identifiers, (RdfPlatform<V, R, A, W>)(Object)RdfPlatforms.newPlatform(platform)); //XXX: ugly insane cast to avoid some compilers complaining.
3232 init(this.platform.newWriter(writer, format), ds);
3333 }
3434
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/WikiWordConceptStoreBuilder.java
@@ -2,7 +2,6 @@
33
44 import de.brightbyte.util.PersistenceException;
55 import de.brightbyte.wikiword.model.WikiWordConcept;
6 -import de.brightbyte.wikiword.model.WikiWordConceptReference;
76 import de.brightbyte.wikiword.store.WikiWordConceptStore;
87 import de.brightbyte.wikiword.store.WikiWordConceptStoreBase;
98
@@ -12,6 +11,6 @@
1312 public ConceptInfoStoreBuilder<T> getConceptInfoStoreBuilder() throws PersistenceException;
1413 public ProximityStoreBuilder getProximityStoreBuilder() throws PersistenceException;
1514
16 - public WikiWordConceptStore<T, ? extends WikiWordConceptReference<T>> getConceptStore() throws PersistenceException;
 15+ public WikiWordConceptStore<T> getConceptStore() throws PersistenceException;
1716
1817 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java
@@ -19,8 +19,6 @@
2020 import de.brightbyte.wikiword.ExtractionRule;
2121 import de.brightbyte.wikiword.ResourceType;
2222 import de.brightbyte.wikiword.model.LocalConcept;
23 -import de.brightbyte.wikiword.model.LocalConceptReference;
24 -import de.brightbyte.wikiword.model.WikiWordConceptReference;
2523 import de.brightbyte.wikiword.schema.AliasScope;
2624 import de.brightbyte.wikiword.store.GroupNameTranslator;
2725 import de.brightbyte.wikiword.store.WikiWordConceptStore;
@@ -834,11 +832,11 @@
835833 log("+ storeWarning: rcId="+rcId+", problem="+problem+", details="+details);
836834 }
837835
838 - public WikiWordConceptStore<LocalConcept, WikiWordConceptReference<LocalConcept>> getConceptStore() throws PersistenceException {
 836+ public WikiWordConceptStore<LocalConcept> getConceptStore() throws PersistenceException {
839837 return null; //XXX...
840838 }
841839
842 - public DataSet<LocalConceptReference> listUnknownConcepts() throws PersistenceException {
 840+ public DataSet<LocalConcept> listUnknownConcepts() throws PersistenceException {
843841 return null; //XXX...
844842 }
845843
@@ -846,7 +844,7 @@
847845 //noop
848846 }
849847
850 - public int processUnknownConcepts(CursorProcessor<LocalConceptReference> processor) throws PersistenceException {
 848+ public int processUnknownConcepts(CursorProcessor<LocalConcept> processor) throws PersistenceException {
851849 //noop
852850 return 0;
853851 }
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java
@@ -1,7 +1,5 @@
22 package de.brightbyte.wikiword.store.builder;
33
4 -import static de.brightbyte.db.DatabaseUtil.asString;
5 -
64 import java.io.File;
75 import java.sql.Connection;
86 import java.sql.ResultSet;
@@ -38,7 +36,6 @@
3937 import de.brightbyte.wikiword.TweakSet;
4038 import de.brightbyte.wikiword.builder.NameMaps;
4139 import de.brightbyte.wikiword.model.LocalConcept;
42 -import de.brightbyte.wikiword.model.LocalConceptReference;
4340 import de.brightbyte.wikiword.schema.AliasScope;
4441 import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema;
4542 import de.brightbyte.wikiword.schema.LocalConceptStoreSchema;
@@ -1453,16 +1450,18 @@
14541451 return new DatabaseLocalConceptStore((LocalConceptStoreSchema)database, tweaks);
14551452 }
14561453
1457 - protected static DatabaseDataSet.Factory<LocalConceptReference> localConceptReferenceFactory = new DatabaseDataSet.Factory<LocalConceptReference>() {
1458 - public LocalConceptReference newInstance(ResultSet row) throws SQLException, PersistenceException {
 1454+ protected DatabaseDataSet.Factory<LocalConcept> localConceptReferenceFactory = new DatabaseDataSet.Factory<LocalConcept>() {
 1455+ public LocalConcept newInstance(ResultSet row) throws SQLException, PersistenceException {
14591456 int id = row.getInt("id");
1460 - String name = asString(row.getObject("name"));
 1457+ String name = DatabaseUtil.asString(row.getObject("name"));
 1458+ //FIXME: type?!
14611459
1462 - return new LocalConceptReference(id, name, -1, -1);
 1460+ LocalConcept concept = new LocalConcept(getCorpus(), id, null, name);
 1461+ return concept;
14631462 }
14641463 };
14651464
1466 - public int processUnknownConcepts(final CursorProcessor<LocalConceptReference> processor) throws PersistenceException {
 1465+ public int processUnknownConcepts(final CursorProcessor<LocalConcept> processor) throws PersistenceException {
14671466 String sql = "SELECT * FROM "+conceptTable.getSQLName();
14681467 String where = "type = "+ConceptType.UNKNOWN.getCode();
14691468
@@ -1483,7 +1482,7 @@
14841483 return ds;
14851484 }*/
14861485
1487 - public DataSet<LocalConceptReference> listUnknownConcepts() throws PersistenceException {
 1486+ public DataSet<LocalConcept> listUnknownConcepts() throws PersistenceException {
14881487 String sql = "SELECT * FROM "+conceptTable.getSQLName();
14891488 String where = "type = "+ConceptType.UNKNOWN.getCode();
14901489
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseConceptInfoStoreBuilder.java
@@ -12,7 +12,7 @@
1313 import de.brightbyte.wikiword.model.WikiWordConcept;
1414 import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema;
1515 import de.brightbyte.wikiword.schema.WikiWordConceptStoreSchema;
16 -import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema.ReferenceListEntrySpec;
 16+import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema.ConceptListEntrySpec;
1717
1818 public abstract class DatabaseConceptInfoStoreBuilder<T extends WikiWordConcept>
1919 extends DatabaseWikiWordStoreBuilder
@@ -118,7 +118,7 @@
119119 protected int buildConceptPropertyCache(
120120 final DatabaseTable cacheTable, final String cacheIdField,
121121 final String propertyField, final String realtion, final String relConceptField,
122 - final ReferenceListEntrySpec spec, final boolean append, final String threshold,
 122+ final ConceptListEntrySpec spec, final boolean append, final String threshold,
123123 final int chunkFactor) throws PersistenceException {
124124
125125 final DatabaseTable relationTable = conceptStore.getDatabaseAccess().getTable(realtion);
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordConceptStoreBuilder.java
@@ -24,7 +24,6 @@
2525 import de.brightbyte.wikiword.ConceptType;
2626 import de.brightbyte.wikiword.TweakSet;
2727 import de.brightbyte.wikiword.model.WikiWordConcept;
28 -import de.brightbyte.wikiword.model.WikiWordConceptReference;
2928 import de.brightbyte.wikiword.schema.WikiWordConceptStoreSchema;
3029 import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore;
3130
@@ -425,16 +424,16 @@
426425
427426 protected abstract DatabaseStatisticsStoreBuilder newStatisticsStoreBuilder() throws SQLException, PersistenceException;
428427 protected abstract DatabaseConceptInfoStoreBuilder<T> newConceptInfoStoreBuilder() throws SQLException, PersistenceException;
429 - protected abstract DatabaseWikiWordConceptStore<T, ? extends WikiWordConceptReference<T>> newConceptStore() throws SQLException, PersistenceException;
 428+ protected abstract DatabaseWikiWordConceptStore<T> newConceptStore() throws SQLException, PersistenceException;
430429 protected abstract DatabaseProximityStoreBuilder newProximityStoreBuilder() throws SQLException;
431430
432431 private DatabaseStatisticsStoreBuilder statsStore;
433432 private DatabaseProximityStoreBuilder proximityStore;
434433 private DatabaseConceptInfoStoreBuilder<T> infoStore;
435434
436 - private DatabaseWikiWordConceptStore<T, ? extends WikiWordConceptReference<T>> conceptStore;
 435+ private DatabaseWikiWordConceptStore<T> conceptStore;
437436
438 - public DatabaseWikiWordConceptStore<T, ? extends WikiWordConceptReference<T>> getConceptStore() throws PersistenceException {
 437+ public DatabaseWikiWordConceptStore<T> getConceptStore() throws PersistenceException {
439438 try {
440439 if (conceptStore==null) conceptStore = newConceptStore();
441440 return conceptStore;
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/LocalConceptStoreBuilder.java
@@ -2,15 +2,14 @@
33
44 import java.util.Date;
55
6 -import de.brightbyte.data.cursor.DataSet;
76 import de.brightbyte.data.cursor.CursorProcessor;
 7+import de.brightbyte.data.cursor.DataSet;
88 import de.brightbyte.util.PersistenceException;
99 import de.brightbyte.wikiword.ConceptType;
1010 import de.brightbyte.wikiword.Corpus;
1111 import de.brightbyte.wikiword.ExtractionRule;
1212 import de.brightbyte.wikiword.ResourceType;
1313 import de.brightbyte.wikiword.model.LocalConcept;
14 -import de.brightbyte.wikiword.model.LocalConceptReference;
1514 import de.brightbyte.wikiword.schema.AliasScope;
1615
1716 /**
@@ -98,8 +97,8 @@
9998 public Corpus getCorpus();
10099
101100 public void resetTermsForUnknownConcepts() throws PersistenceException;
102 - public DataSet<LocalConceptReference> listUnknownConcepts() throws PersistenceException;
103 - public int processUnknownConcepts(CursorProcessor<LocalConceptReference> processor) throws PersistenceException;
 101+ public DataSet<LocalConcept> listUnknownConcepts() throws PersistenceException;
 102+ public int processUnknownConcepts(CursorProcessor<LocalConcept> processor) throws PersistenceException;
104103
105104 public TextStoreBuilder getTextStoreBuilder() throws PersistenceException;
106105 public PropertyStoreBuilder getPropertyStoreBuilder() throws PersistenceException;

Status & tagging log