Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/CalculatedProximityStore.java |
— | — | @@ -42,7 +42,7 @@ |
43 | 43 | double prox = getProximity(centerFeatures, f.getFeatureVector()); |
44 | 44 | if (prox<minProximity) continue; |
45 | 45 | |
46 | | - return newConcept(f.getId(), f.getName(), 1, prox); |
| 46 | + return newConcept(f.getId(), f.getConcept().getName(), f.getConcept().getType(), 1, prox); |
47 | 47 | } ; |
48 | 48 | |
49 | 49 | return null; |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseLocalConceptStore.java |
— | — | @@ -31,9 +31,6 @@ |
32 | 32 | import de.brightbyte.wikiword.schema.LocalConceptStoreSchema; |
33 | 33 | import de.brightbyte.wikiword.schema.LocalStatisticsStoreSchema; |
34 | 34 | import de.brightbyte.wikiword.schema.StatisticsStoreSchema; |
35 | | -import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore.DatabaseConceptInfoStore; |
36 | | -import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore.DatabaseStatisticsStore; |
37 | | -import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore.DatabaseConceptInfoStore.ConceptFactory; |
38 | 35 | |
39 | 36 | /** |
40 | 37 | * A LocalConceptStore implemented based upon a {@link de.brightbyte.db.DatabaseSchema} object, |
— | — | @@ -295,7 +292,7 @@ |
296 | 293 | } |
297 | 294 | |
298 | 295 | @Override |
299 | | - protected LocalConcept newConcept(Map<String, Object> m) { |
| 296 | + protected LocalConcept newConcept(Map<String, Object> m) throws PersistenceException { |
300 | 297 | int id = asInt(m.get("cId")); |
301 | 298 | String name = asString(m.get("cName")); |
302 | 299 | ConceptType type = corpus.getConceptTypes().getType(asInt(m.get("cType"))); |
— | — | @@ -317,7 +314,7 @@ |
318 | 315 | LocalConcept[] outlinks = LocalConcept.parseList( asString(m.get("rOutlinks")), getConceptFactory(), ((ConceptInfoStoreSchema)database).outLinksReferenceListEntry ); |
319 | 316 | LocalConcept[] broader = LocalConcept.parseList( asString(m.get("rBroader")), getConceptFactory(), ((ConceptInfoStoreSchema)database).broaderReferenceListEntry ); |
320 | 317 | LocalConcept[] narrower = LocalConcept.parseList( asString(m.get("rNarrower")), getConceptFactory(), ((ConceptInfoStoreSchema)database).narrowerReferenceListEntry ); |
321 | | - TranslationReference[] langlinks = TranslationReference.parseList( asString(m.get("rLanglinks")), ((ConceptInfoStoreSchema)database).langlinkReferenceListEntry ); |
| 318 | + LocalConcept[] langlinks = LocalConcept.parseList( asString(m.get("rLanglinks")), getConceptFactory(), ((ConceptInfoStoreSchema)database).langlinkReferenceListEntry ); |
322 | 319 | LocalConcept[] similar = LocalConcept.parseList( asString(m.get("rSimilar")), getConceptFactory(), ((ConceptInfoStoreSchema)database).similarReferenceListEntry ); |
323 | 320 | LocalConcept[] related = LocalConcept.parseList( asString(m.get("rRelated")), getConceptFactory(), ((ConceptInfoStoreSchema)database).relatedReferenceListEntry ); |
324 | 321 | TermReference[] terms = TermReference.parseList( asString(m.get("dTerms")), getConceptFactory(), ((ConceptInfoStoreSchema)database).termReferenceListEntry ); |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/store/DatabaseGlobalConceptStore.java |
— | — | @@ -28,9 +28,6 @@ |
29 | 29 | import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema; |
30 | 30 | import de.brightbyte.wikiword.schema.GlobalConceptStoreSchema; |
31 | 31 | import de.brightbyte.wikiword.schema.StatisticsStoreSchema; |
32 | | -import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore.DatabaseConceptInfoStore; |
33 | | -import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore.DatabaseStatisticsStore; |
34 | | -import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore.DatabaseConceptInfoStore.ConceptFactory; |
35 | 32 | |
36 | 33 | /** |
37 | 34 | * A GlobalConceptStore implemented based upon a {@link de.brightbyte.db.DatabaseSchema} object, |
— | — | @@ -268,18 +265,22 @@ |
269 | 266 | |
270 | 267 | Corpus[] languages = ((GlobalConceptStoreSchema)DatabaseGlobalConceptStore.this.database).getLanguages(langBits); |
271 | 268 | |
272 | | - GlobalConcept ref = new GlobalConcept(id, name, cardinality, relevance); |
273 | | - GlobalConcept[] inlinks = GlobalConcept.parseList( asString(m.get("rInlinks")), ((ConceptInfoStoreSchema)database).inLinksReferenceListEntry ); |
274 | | - GlobalConcept[] outlinks = GlobalConcept.parseList( asString(m.get("rOutlinks")), ((ConceptInfoStoreSchema)database).outLinksReferenceListEntry ); |
275 | | - GlobalConcept[] broader = GlobalConcept.parseList( asString(m.get("rBroader")), ((ConceptInfoStoreSchema)database).broaderReferenceListEntry ); |
276 | | - GlobalConcept[] narrower = GlobalConcept.parseList( asString(m.get("rNarrower")), ((ConceptInfoStoreSchema)database).narrowerReferenceListEntry ); |
277 | | - TranslationReference[] langlinks = TranslationReference.parseList( asString(m.get("rLanglinks")), ((ConceptInfoStoreSchema)database).langlinkReferenceListEntry ); |
278 | | - GlobalConcept[] similar = GlobalConcept.parseList( asString(m.get("rSimilar")), ((ConceptInfoStoreSchema)database).similarReferenceListEntry ); |
279 | | - GlobalConcept[] related = GlobalConcept.parseList( asString(m.get("rRelated")), ((ConceptInfoStoreSchema)database).relatedReferenceListEntry ); |
| 269 | + GlobalConcept concept = new GlobalConcept(getDatasetIdentifier(), id, type); |
| 270 | + concept.setCardinality(cardinality); |
| 271 | + concept.setRelevance(relevance); |
280 | 272 | |
| 273 | + GlobalConcept[] inlinks = GlobalConcept.parseList( asString(m.get("rInlinks")), getConceptFactory(), ((ConceptInfoStoreSchema)database).inLinksReferenceListEntry ); |
| 274 | + GlobalConcept[] outlinks = GlobalConcept.parseList( asString(m.get("rOutlinks")), getConceptFactory(), ((ConceptInfoStoreSchema)database).outLinksReferenceListEntry ); |
| 275 | + GlobalConcept[] broader = GlobalConcept.parseList( asString(m.get("rBroader")), getConceptFactory(), ((ConceptInfoStoreSchema)database).broaderReferenceListEntry ); |
| 276 | + GlobalConcept[] narrower = GlobalConcept.parseList( asString(m.get("rNarrower")), getConceptFactory(), ((ConceptInfoStoreSchema)database).narrowerReferenceListEntry ); |
| 277 | + GlobalConcept[] langlinks = GlobalConcept.parseList( asString(m.get("rLanglinks")), getConceptFactory(), ((ConceptInfoStoreSchema)database).langlinkReferenceListEntry ); |
| 278 | + GlobalConcept[] similar = GlobalConcept.parseList( asString(m.get("rSimilar")), getConceptFactory(), ((ConceptInfoStoreSchema)database).similarReferenceListEntry ); |
| 279 | + GlobalConcept[] related = GlobalConcept.parseList( asString(m.get("rRelated")), getConceptFactory(), ((ConceptInfoStoreSchema)database).relatedReferenceListEntry ); |
| 280 | + |
281 | 281 | ConceptRelations<GlobalConcept> relations = new ConceptRelations<GlobalConcept>(broader, narrower, inlinks, outlinks, similar, related, langlinks); |
| 282 | + concept.setRelations(relations); |
282 | 283 | |
283 | | - return new GlobalConcept(ref, getDatasetIdentifier(), languages, type, DatabaseGlobalConceptStore.this, relations, null); |
| 284 | + return concept; |
284 | 285 | } catch (SQLException e) { |
285 | 286 | throw new PersistenceException(e); |
286 | 287 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/schema/ConceptInfoStoreSchema.java |
— | — | @@ -13,22 +13,24 @@ |
14 | 14 | import de.brightbyte.wikiword.DatasetIdentifier; |
15 | 15 | import de.brightbyte.wikiword.TweakSet; |
16 | 16 | import de.brightbyte.wikiword.model.WikiWordConcept; |
| 17 | +import static de.brightbyte.wikiword.model.WikiWordConcept.*; |
17 | 18 | |
18 | 19 | public class ConceptInfoStoreSchema extends WikiWordStoreSchema { |
19 | 20 | public final String referenceSeparator; //info record separator |
20 | 21 | public final String referenceFieldSeparator; //info field separator |
| 22 | + public final String languagePrefixSeparator; //language prefix separator |
21 | 23 | |
22 | | - public final Pattern referenceSeparatorPattern; //info record separator |
23 | | - public final Pattern referenceFieldSeparatorPattern; //info field separator |
| 24 | + public final Pattern conceptSeparatorPattern; //info record separator |
| 25 | + public final Pattern conceptFieldSeparatorPattern; //info field separator |
| 26 | + public final Pattern languagePrefixPattern; //language prefix separator |
24 | 27 | |
25 | | - public class ReferenceListEntrySpec extends WikiWordConcept.ListFormatSpec { |
| 28 | + public class ConceptListEntrySpec extends WikiWordConcept.ListFormatSpec { |
26 | 29 | |
27 | 30 | public final String joinField; |
28 | 31 | public final String valueExpression; |
29 | 32 | |
30 | | - public ReferenceListEntrySpec(String field, String expression, boolean useId, boolean useName, boolean useCardinality, boolean useRelevance) { |
31 | | - super(referenceSeparatorPattern, referenceFieldSeparatorPattern, useId, useName, |
32 | | - useCardinality, useRelevance); |
| 33 | + public ConceptListEntrySpec(String field, String expression, int flags) { |
| 34 | + super(conceptSeparatorPattern, conceptFieldSeparatorPattern, languagePrefixPattern, flags); |
33 | 35 | |
34 | 36 | this.joinField = field; |
35 | 37 | this.valueExpression = expression; |
— | — | @@ -36,18 +38,18 @@ |
37 | 39 | |
38 | 40 | } |
39 | 41 | |
40 | | - public final ReferenceListEntrySpec langlinkReferenceListEntry; |
| 42 | + public final ConceptListEntrySpec langlinkReferenceListEntry; |
41 | 43 | |
42 | | - public final ReferenceListEntrySpec termReferenceListEntry; |
43 | | - public final ReferenceListEntrySpec broaderReferenceListEntry; |
44 | | - public final ReferenceListEntrySpec narrowerReferenceListEntry; |
45 | | - public final ReferenceListEntrySpec inLinksReferenceListEntry; |
46 | | - public final ReferenceListEntrySpec outLinksReferenceListEntry; |
47 | | - public final ReferenceListEntrySpec similarReferenceListEntry; |
48 | | - public final ReferenceListEntrySpec relatedReferenceListEntry; |
49 | | - public final ReferenceListEntrySpec related2ReferenceListEntry; |
50 | | - public final ReferenceListEntrySpec featureReferenceListEntry; |
51 | | - public final ReferenceListEntrySpec proximityReferenceListEntry; |
| 44 | + public final ConceptListEntrySpec termReferenceListEntry; |
| 45 | + public final ConceptListEntrySpec broaderReferenceListEntry; |
| 46 | + public final ConceptListEntrySpec narrowerReferenceListEntry; |
| 47 | + public final ConceptListEntrySpec inLinksReferenceListEntry; |
| 48 | + public final ConceptListEntrySpec outLinksReferenceListEntry; |
| 49 | + public final ConceptListEntrySpec similarReferenceListEntry; |
| 50 | + public final ConceptListEntrySpec relatedReferenceListEntry; |
| 51 | + public final ConceptListEntrySpec related2ReferenceListEntry; |
| 52 | + public final ConceptListEntrySpec featureReferenceListEntry; |
| 53 | + public final ConceptListEntrySpec proximityReferenceListEntry; |
52 | 54 | |
53 | 55 | protected EntityTable conceptInfoTable; |
54 | 56 | protected EntityTable conceptDescriptionTable; |
— | — | @@ -65,52 +67,54 @@ |
66 | 68 | |
67 | 69 | referenceSeparator = tweaks.getTweak("dbstore.cacheReferenceSeparator", "\u001E"); //ASCII Record Separator |
68 | 70 | referenceFieldSeparator = tweaks.getTweak("dbstore.cacheReferenceFieldSeparator", "\u001F"); //ASCII Field Separator |
69 | | - referenceSeparatorPattern = Pattern.compile(referenceSeparator.replaceAll("[^$(){}\\[\\]\\\\]", "\\\\$0")); |
70 | | - referenceFieldSeparatorPattern = Pattern.compile(referenceFieldSeparator.replaceAll("[^$(){}\\[\\]\\\\]", "\\\\$0")); |
| 71 | + languagePrefixSeparator = tweaks.getTweak("dbstore.languagePrefixSeparator", ":"); //ASCII Field Separator |
| 72 | + conceptSeparatorPattern = Pattern.compile(referenceSeparator.replaceAll("[^$(){}\\[\\]\\\\]", "\\\\$0")); |
| 73 | + conceptFieldSeparatorPattern = Pattern.compile(referenceFieldSeparator.replaceAll("[^$(){}\\[\\]\\\\]", "\\\\$0")); |
| 74 | + languagePrefixPattern = Pattern.compile(languagePrefixSeparator.replaceAll("[^$(){}\\[\\]\\\\]", "\\\\$0")); |
71 | 75 | |
72 | 76 | langlinkReferenceListEntry = |
73 | | - new ReferenceListEntrySpec("language, target", "concat(language, ':', target)", |
74 | | - false, true, false, false); |
| 77 | + new ConceptListEntrySpec("language, target", "concat(language, ':', target)", |
| 78 | + LIST_FORMAT_USE_NAME | LIST_FORMAT_USE_LANGUAGE_PREFIX ); |
75 | 79 | |
76 | 80 | termReferenceListEntry = |
77 | | - new ReferenceListEntrySpec("term_text", fields("term_text", "freq"), |
78 | | - false, true, true, false); |
| 81 | + new ConceptListEntrySpec("term_text", fields("term_text", "freq"), |
| 82 | + LIST_FORMAT_USE_NAME | LIST_FORMAT_USE_CARDINALITY ); |
79 | 83 | |
80 | 84 | broaderReferenceListEntry = |
81 | | - new ReferenceListEntrySpec("broad", cacheNames ? fields("broad", "broad_name", "if (lhs is null or lhs = 0, 0, 1/lhs)") : fields("broad", "if (lhs is null or lhs = 0, 0, 1/lhs)"), |
82 | | - true, cacheNames, false, true); |
| 85 | + new ConceptListEntrySpec("broad", cacheNames ? fields("broad", "broad_name", "if (lhs is null or lhs = 0, 0, 1/lhs)") : fields("broad", "if (lhs is null or lhs = 0, 0, 1/lhs)"), |
| 86 | + LIST_FORMAT_USE_ID | (cacheNames ? LIST_FORMAT_USE_NAME : 0) | LIST_FORMAT_USE_RELEVANCE ); |
83 | 87 | |
84 | 88 | narrowerReferenceListEntry = |
85 | | - new ReferenceListEntrySpec("narrow", cacheNames ? fields("narrow", "narrow_name", "if (lhs is null or lhs = 0, 0, 1/lhs)") : fields("narrow", "if (lhs is null or lhs = 0, 0, 1/lhs)"), |
86 | | - true, cacheNames, false, true); |
| 89 | + new ConceptListEntrySpec("narrow", cacheNames ? fields("narrow", "narrow_name", "if (lhs is null or lhs = 0, 0, 1/lhs)") : fields("narrow", "if (lhs is null or lhs = 0, 0, 1/lhs)"), |
| 90 | + LIST_FORMAT_USE_ID | (cacheNames ? LIST_FORMAT_USE_NAME : 0) | LIST_FORMAT_USE_RELEVANCE); |
87 | 91 | |
88 | 92 | inLinksReferenceListEntry = |
89 | | - new ReferenceListEntrySpec("anchor", cacheNames ? fields("anchor", "anchor_name", "idf") : fields("anchor", "idf"), |
90 | | - true, cacheNames, false, true); |
| 93 | + new ConceptListEntrySpec("anchor", cacheNames ? fields("anchor", "anchor_name", "idf") : fields("anchor", "idf"), |
| 94 | + LIST_FORMAT_USE_ID | (cacheNames ? LIST_FORMAT_USE_NAME : 0) | LIST_FORMAT_USE_RELEVANCE ); |
91 | 95 | |
92 | 96 | outLinksReferenceListEntry = |
93 | | - new ReferenceListEntrySpec("target", cacheNames ? fields("target", "target_name", "idf") : fields("target", "idf"), |
94 | | - true, cacheNames, false, true); |
| 97 | + new ConceptListEntrySpec("target", cacheNames ? fields("target", "target_name", "idf") : fields("target", "idf"), |
| 98 | + LIST_FORMAT_USE_ID | (cacheNames ? LIST_FORMAT_USE_NAME : 0) | LIST_FORMAT_USE_RELEVANCE ); |
95 | 99 | |
96 | 100 | similarReferenceListEntry = |
97 | | - new ReferenceListEntrySpec("concept2", fields("concept2", "langmatch"), //TODO: frequency for similar from langref(!) |
98 | | - true, false, true, false); //TODO: name?... in relation table?... //XXX: why no score |
| 101 | + new ConceptListEntrySpec("concept2", fields("concept2", "langmatch"), //TODO: frequency for similar from langref(!) |
| 102 | + LIST_FORMAT_USE_ID | LIST_FORMAT_USE_CARDINALITY); //TODO: name?... in relation table?... //XXX: why no score |
99 | 103 | |
100 | 104 | relatedReferenceListEntry = |
101 | | - new ReferenceListEntrySpec("concept2", "concept2", |
102 | | - true, false, false, false); //TODO: name?... in relation table?... //XXX: why no score |
| 105 | + new ConceptListEntrySpec("concept2", "concept2", |
| 106 | + LIST_FORMAT_USE_ID); //TODO: name?... in relation table?... //XXX: why no score |
103 | 107 | |
104 | 108 | related2ReferenceListEntry = |
105 | | - new ReferenceListEntrySpec("concept1","concept1", |
106 | | - true, false, false, false); //TODO: name?... in relation table?... //XXX: why no score |
| 109 | + new ConceptListEntrySpec("concept1","concept1", |
| 110 | + LIST_FORMAT_USE_ID); //TODO: name?... in relation table?... //XXX: why no score |
107 | 111 | |
108 | 112 | featureReferenceListEntry = |
109 | | - new ReferenceListEntrySpec("target",fields("target", "weight"), |
110 | | - true, false, false, true); |
| 113 | + new ConceptListEntrySpec("target",fields("target", "weight"), |
| 114 | + LIST_FORMAT_USE_ID | LIST_FORMAT_USE_RELEVANCE ); |
111 | 115 | |
112 | 116 | proximityReferenceListEntry = |
113 | | - new ReferenceListEntrySpec("target",fields("target", "proximity"), |
114 | | - true, false, false, true); |
| 117 | + new ConceptListEntrySpec("target",fields("target", "proximity"), |
| 118 | + LIST_FORMAT_USE_ID | LIST_FORMAT_USE_RELEVANCE ); |
115 | 119 | |
116 | 120 | init(tweaks, description); |
117 | 121 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/LocalConcept.java |
— | — | @@ -1,52 +1,19 @@ |
2 | 2 | package de.brightbyte.wikiword.model; |
3 | 3 | |
4 | | -import java.text.Collator; |
5 | | -import java.util.Comparator; |
6 | | -import java.util.Locale; |
7 | | - |
8 | 4 | import de.brightbyte.wikiword.ConceptType; |
9 | 5 | import de.brightbyte.wikiword.Corpus; |
10 | 6 | |
11 | 7 | public class LocalConcept extends WikiWordConcept { |
12 | 8 | |
13 | | - public static class ByName implements Comparator<LocalConcept> { |
14 | | - protected Collator collator; |
15 | | - |
16 | | - public ByName() { |
17 | | - this((Collator)null); |
18 | | - } |
19 | | - |
20 | | - public ByName(Locale locale) { |
21 | | - this.collator = Collator.getInstance(locale); |
22 | | - } |
23 | | - |
24 | | - public ByName(Collator collator) { |
25 | | - this.collator = collator; |
26 | | - } |
27 | | - |
28 | | - public int compare(LocalConcept a, LocalConcept b) { |
29 | | - if (collator==null) return a.getName().compareTo(b.getName()); |
30 | | - else return collator.compare(a.getName(), b.getName()); |
31 | | - } |
32 | | - }; |
33 | | - |
34 | | - protected String name; |
35 | 9 | protected String definition; |
36 | 10 | protected String language; |
37 | 11 | protected WikiWordResource resource; |
38 | 12 | |
39 | 13 | public LocalConcept(Corpus corpus, int id, ConceptType type, String name) { |
40 | 14 | super(corpus, id, type); |
41 | | - |
42 | | - this.name = name; |
| 15 | + setName(name); |
43 | 16 | } |
44 | 17 | |
45 | | - @Override |
46 | | - public String toString() { |
47 | | - if (name!=null) return "#"+id+":[["+name+"]]"; |
48 | | - else return "#"+id; |
49 | | - } |
50 | | - |
51 | 18 | public Corpus getCorpus() { |
52 | 19 | return (Corpus)getDatasetIdentifier(); |
53 | 20 | } |
— | — | @@ -79,13 +46,4 @@ |
80 | 47 | this.language = language; |
81 | 48 | } |
82 | 49 | |
83 | | - public String getName() { |
84 | | - return name; |
85 | | - } |
86 | | - |
87 | | - public void setName(String name) { |
88 | | - if (this.name!=null) throw new IllegalStateException("property already initialized"); |
89 | | - this.name = name; |
90 | | - } |
91 | | - |
92 | 50 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/TermReference.java |
— | — | @@ -1,7 +1,11 @@ |
2 | 2 | package de.brightbyte.wikiword.model; |
3 | 3 | |
| 4 | +import de.brightbyte.util.PersistenceException; |
| 5 | +import de.brightbyte.wikiword.model.WikiWordConcept.Factory; |
| 6 | +import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema.ConceptListEntrySpec; |
4 | 7 | |
5 | 8 | |
| 9 | + |
6 | 10 | public class TermReference { |
7 | 11 | |
8 | 12 | private String term; |
— | — | @@ -67,4 +71,21 @@ |
68 | 72 | return true; |
69 | 73 | } |
70 | 74 | |
| 75 | + public static TermReference[] parseList(String s, Factory<LocalConcept> factory, ConceptListEntrySpec spec) throws PersistenceException { |
| 76 | + LocalConcept[] concepts = WikiWordConcept.parseList(s, factory, spec); //XXX: this is a terrible, terrible hack. |
| 77 | + TermReference[] terms = new TermReference[concepts.length]; |
| 78 | + |
| 79 | + for (int i=0; i<terms.length; i++) { |
| 80 | + WikiWordConcept dummy = concepts[i]; |
| 81 | + |
| 82 | + String term = dummy.getName(); //UGHA! |
| 83 | + double score = dummy.getCardinality(); |
| 84 | + |
| 85 | + WikiWordConcept target = factory.newInstance(dummy.getId(), null, dummy.getType()); |
| 86 | + terms[i] = new TermReference(term, target, score); |
| 87 | + } |
| 88 | + |
| 89 | + return terms; |
| 90 | + } |
| 91 | + |
71 | 92 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/WikiWordConcept.java |
— | — | @@ -1,7 +1,9 @@ |
2 | 2 | package de.brightbyte.wikiword.model; |
3 | 3 | |
| 4 | +import java.text.Collator; |
4 | 5 | import java.util.Arrays; |
5 | 6 | import java.util.Comparator; |
| 7 | +import java.util.Locale; |
6 | 8 | import java.util.regex.Pattern; |
7 | 9 | |
8 | 10 | import de.brightbyte.data.measure.Measure; |
— | — | @@ -10,24 +12,61 @@ |
11 | 13 | import de.brightbyte.wikiword.DatasetIdentifier; |
12 | 14 | |
13 | 15 | public abstract class WikiWordConcept { |
14 | | - protected DatasetIdentifier dataset; |
15 | | - protected ConceptType type; |
| 16 | + public static class ByName implements Comparator<WikiWordConcept> { |
| 17 | + protected Collator collator; |
| 18 | + |
| 19 | + public ByName() { |
| 20 | + this((Collator)null); |
| 21 | + } |
| 22 | + |
| 23 | + public ByName(Locale locale) { |
| 24 | + this.collator = Collator.getInstance(locale); |
| 25 | + } |
| 26 | + |
| 27 | + public ByName(Collator collator) { |
| 28 | + this.collator = collator; |
| 29 | + } |
| 30 | + |
| 31 | + public int compare(WikiWordConcept a, WikiWordConcept b) { |
| 32 | + if (collator==null) return a.getName().compareTo(b.getName()); |
| 33 | + else return collator.compare(a.getName(), b.getName()); |
| 34 | + } |
| 35 | + }; |
16 | 36 | |
17 | | - protected int id; |
| 37 | + private DatasetIdentifier dataset; |
| 38 | + private ConceptType type; |
| 39 | + |
| 40 | + private int id; |
| 41 | + private String name; |
18 | 42 | |
19 | | - protected int cardinality = 1; |
20 | | - protected double relevance = 1; |
| 43 | + private int cardinality = 1; |
| 44 | + private double relevance = 1; |
21 | 45 | |
22 | | - protected ConceptRelations relations; |
23 | | - protected ConceptFeatures features; |
24 | | - protected TermReference[] terms; |
| 46 | + private ConceptRelations relations; |
| 47 | + private ConceptFeatures features; |
| 48 | + private TermReference[] terms; |
25 | 49 | |
26 | 50 | public WikiWordConcept(DatasetIdentifier dataset, int id, ConceptType type) { |
27 | 51 | this.id = id; |
28 | 52 | this.dataset = dataset; |
29 | 53 | this.type = type; |
30 | 54 | } |
| 55 | + |
| 56 | + @Override |
| 57 | + public String toString() { |
| 58 | + if (name!=null) return "#"+id+":[["+name+"]]"; |
| 59 | + else return "#"+id; |
| 60 | + } |
| 61 | + |
| 62 | + public String getName() { |
| 63 | + return name; |
| 64 | + } |
31 | 65 | |
| 66 | + public void setName(String name) { |
| 67 | + if (this.name!=null) throw new IllegalStateException("property already initialized"); |
| 68 | + this.name = name; |
| 69 | + } |
| 70 | + |
32 | 71 | public DatasetIdentifier getDatasetIdentifier() { |
33 | 72 | return dataset; |
34 | 73 | } |
— | — | @@ -108,13 +147,7 @@ |
109 | 148 | |
110 | 149 | return id == other.id; |
111 | 150 | } |
112 | | - |
113 | 151 | |
114 | | - @Override |
115 | | - public String toString() { |
116 | | - return "#"+id; |
117 | | - } |
118 | | - |
119 | 152 | public static final Measure<WikiWordConcept> theCardinality = new Measure<WikiWordConcept>(){ |
120 | 153 | public double measure(WikiWordConcept r) { |
121 | 154 | return r.getCardinality(); |
— | — | @@ -144,30 +177,43 @@ |
145 | 178 | else return (int)(rb-ra) +1; |
146 | 179 | } |
147 | 180 | }; |
148 | | - |
| 181 | + |
| 182 | + public static final Comparator<WikiWordConcept> byName = new ByName(); |
| 183 | + |
149 | 184 | public static interface Factory<T extends WikiWordConcept> { |
150 | 185 | public T newInstance(int id, String name, ConceptType type) throws PersistenceException; |
151 | 186 | public T newInstance(int id, String name, ConceptType type, int card, double rel) throws PersistenceException; |
152 | 187 | public T[] newArray(int size); |
153 | 188 | } |
154 | 189 | |
| 190 | + public static final int LIST_FORMAT_USE_ID = 0x0001; |
| 191 | + public static final int LIST_FORMAT_USE_NAME = 0x0001; |
| 192 | + public static final int LIST_FORMAT_USE_CARDINALITY = 0x0010; |
| 193 | + public static final int LIST_FORMAT_USE_RELEVANCE = 0x0020; |
| 194 | + public static final int LIST_FORMAT_USE_LANGUAGE_PREFIX= 0x0100; |
| 195 | + |
155 | 196 | public static class ListFormatSpec { |
156 | 197 | public final boolean useId; |
157 | 198 | public final boolean useName; |
158 | 199 | public final boolean useCardinality; |
159 | 200 | public final boolean useRelevance; |
| 201 | + public final boolean useLanguagePrefix; |
160 | 202 | public final Pattern referenceSeparator; |
161 | 203 | public final Pattern fieldSeparator; |
| 204 | + public final Pattern prefixSeparator; |
162 | 205 | public final int width; |
163 | 206 | |
164 | | - public ListFormatSpec(Pattern referenceSeparator, Pattern fieldSeparator, boolean useId, boolean useName, boolean useCardinality, boolean useRelevance) { |
| 207 | + public ListFormatSpec(Pattern referenceSeparator, Pattern fieldSeparator, Pattern prefixSeparator, int flags) { |
165 | 208 | this.referenceSeparator = referenceSeparator; |
166 | 209 | this.fieldSeparator = fieldSeparator; |
167 | | - this.useId = useId; |
168 | | - this.useName = useName; |
169 | | - this.useCardinality = useCardinality; |
170 | | - this.useRelevance = useRelevance; |
| 210 | + this.prefixSeparator = prefixSeparator; |
171 | 211 | |
| 212 | + useId = (flags & LIST_FORMAT_USE_ID) > 0; |
| 213 | + useName = (flags & LIST_FORMAT_USE_NAME) > 0; |
| 214 | + useCardinality = (flags & LIST_FORMAT_USE_CARDINALITY) > 0; |
| 215 | + useRelevance = (flags & LIST_FORMAT_USE_RELEVANCE) > 0; |
| 216 | + useLanguagePrefix = (flags & LIST_FORMAT_USE_LANGUAGE_PREFIX) > 0; |
| 217 | + |
172 | 218 | int w = 0; |
173 | 219 | if (useId) w++; |
174 | 220 | if (useName) w++; |
— | — | @@ -208,8 +254,19 @@ |
209 | 255 | if (rr.length>=i+1 && spec.useName) name = rr[i++]; |
210 | 256 | if (rr.length>=i+1 && spec.useCardinality) cardinality = rr[i].length()==0 ? -1 : Integer.parseInt(rr[i++]); |
211 | 257 | if (rr.length>=i+1 && spec.useRelevance) relevance = rr[i].length()==0 ? -1 : Double.parseDouble(rr[i++]); |
| 258 | + String lang = null; |
212 | 259 | |
213 | | - a[c++] = factory.newInstance(id, name, null, cardinality, relevance); |
| 260 | + if (spec.useLanguagePrefix) { |
| 261 | + String[] nn = spec.prefixSeparator.split(name, 2); |
| 262 | + if (nn.length>1) { |
| 263 | + name = nn[1]; |
| 264 | + lang = nn[0]; |
| 265 | + } |
| 266 | + } |
| 267 | + |
| 268 | + T t = factory.newInstance(id, name, null, cardinality, relevance); |
| 269 | + if (lang!=null && t instanceof LocalConcept) ((LocalConcept)t).setLanguage(lang); |
| 270 | + a[c++] = t; |
214 | 271 | } |
215 | 272 | |
216 | 273 | if (c<a.length) { //should not happen, but might, if values get truncated |
— | — | @@ -220,7 +277,7 @@ |
221 | 278 | |
222 | 279 | if (spec.useRelevance) Arrays.sort(a, byRelevance); |
223 | 280 | else if (spec.useCardinality) Arrays.sort(a, byCardinality); |
224 | | - //else if (spec.useName) Arrays.sort(a, byName); |
| 281 | + else if (spec.useName) Arrays.sort(a, byName); |
225 | 282 | |
226 | 283 | return a; |
227 | 284 | } |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/model/ConceptRelations.java |
— | — | @@ -1,6 +1,5 @@ |
2 | 2 | package de.brightbyte.wikiword.model; |
3 | 3 | |
4 | | -import java.util.Map; |
5 | 4 | |
6 | 5 | public class ConceptRelations<R extends WikiWordConcept> { |
7 | 6 | |
— | — | @@ -10,7 +9,7 @@ |
11 | 10 | protected R[] narrower; |
12 | 11 | protected R[] similar; |
13 | 12 | protected R[] related; |
14 | | - protected Map<String, LocalConcept> langlinks; |
| 13 | + protected R[] langlinks; |
15 | 14 | |
16 | 15 | //TODO: inlinks, outlinks, coocc, co-coocc |
17 | 16 | |
— | — | @@ -20,7 +19,7 @@ |
21 | 20 | R[] outlinks, |
22 | 21 | R[] similar, |
23 | 22 | R[] related, |
24 | | - Map<String, LocalConcept> langlinks) { |
| 23 | + R[] langlinks) { |
25 | 24 | |
26 | 25 | if (inlinks==null) throw new NullPointerException(); |
27 | 26 | if (outlinks==null) throw new NullPointerException(); |
— | — | @@ -43,7 +42,7 @@ |
44 | 43 | return broader; |
45 | 44 | } |
46 | 45 | |
47 | | - public Map<String, LocalConcept> getLanglinks() { |
| 46 | + public R[] getLanglinks() { |
48 | 47 | return langlinks; |
49 | 48 | } |
50 | 49 | |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/rdf/LocalConceptSkosProperties.java |
— | — | @@ -49,7 +49,7 @@ |
50 | 50 | TermReference[] tt = concept.getTerms(); |
51 | 51 | for (TermReference t: tt) { |
52 | 52 | //if (t.getName().equals(name)) continue; //NOTE: for prefLabel |
53 | | - setLiteralProperty(about, skos.altLabel, t.getName(), lang); |
| 53 | + setLiteralProperty(about, skos.altLabel, t.getTerm(), lang); |
54 | 54 | } |
55 | 55 | |
56 | 56 | //TODO: idf, lhs |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/rdf/RdfOutput.java |
— | — | @@ -27,7 +27,7 @@ |
28 | 28 | |
29 | 29 | @SuppressWarnings("unchecked") |
30 | 30 | public RdfOutput(WikiWordIdentifiers identifiers, String platform, Writer writer, String format, DatasetIdentifier ds) throws RdfException, PersistenceException { |
31 | | - this(identifiers, (RdfPlatform<V, R, A, W>)RdfPlatforms.newPlatform(platform)); |
| 31 | + this(identifiers, (RdfPlatform<V, R, A, W>)(Object)RdfPlatforms.newPlatform(platform)); //XXX: ugly insane cast to avoid some compilers complaining. |
32 | 32 | init(this.platform.newWriter(writer, format), ds); |
33 | 33 | } |
34 | 34 | |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/WikiWordConceptStoreBuilder.java |
— | — | @@ -2,7 +2,6 @@ |
3 | 3 | |
4 | 4 | import de.brightbyte.util.PersistenceException; |
5 | 5 | import de.brightbyte.wikiword.model.WikiWordConcept; |
6 | | -import de.brightbyte.wikiword.model.WikiWordConceptReference; |
7 | 6 | import de.brightbyte.wikiword.store.WikiWordConceptStore; |
8 | 7 | import de.brightbyte.wikiword.store.WikiWordConceptStoreBase; |
9 | 8 | |
— | — | @@ -12,6 +11,6 @@ |
13 | 12 | public ConceptInfoStoreBuilder<T> getConceptInfoStoreBuilder() throws PersistenceException; |
14 | 13 | public ProximityStoreBuilder getProximityStoreBuilder() throws PersistenceException; |
15 | 14 | |
16 | | - public WikiWordConceptStore<T, ? extends WikiWordConceptReference<T>> getConceptStore() throws PersistenceException; |
| 15 | + public WikiWordConceptStore<T> getConceptStore() throws PersistenceException; |
17 | 16 | |
18 | 17 | } |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DebugLocalConceptStoreBuilder.java |
— | — | @@ -19,8 +19,6 @@ |
20 | 20 | import de.brightbyte.wikiword.ExtractionRule; |
21 | 21 | import de.brightbyte.wikiword.ResourceType; |
22 | 22 | import de.brightbyte.wikiword.model.LocalConcept; |
23 | | -import de.brightbyte.wikiword.model.LocalConceptReference; |
24 | | -import de.brightbyte.wikiword.model.WikiWordConceptReference; |
25 | 23 | import de.brightbyte.wikiword.schema.AliasScope; |
26 | 24 | import de.brightbyte.wikiword.store.GroupNameTranslator; |
27 | 25 | import de.brightbyte.wikiword.store.WikiWordConceptStore; |
— | — | @@ -834,11 +832,11 @@ |
835 | 833 | log("+ storeWarning: rcId="+rcId+", problem="+problem+", details="+details); |
836 | 834 | } |
837 | 835 | |
838 | | - public WikiWordConceptStore<LocalConcept, WikiWordConceptReference<LocalConcept>> getConceptStore() throws PersistenceException { |
| 836 | + public WikiWordConceptStore<LocalConcept> getConceptStore() throws PersistenceException { |
839 | 837 | return null; //XXX... |
840 | 838 | } |
841 | 839 | |
842 | | - public DataSet<LocalConceptReference> listUnknownConcepts() throws PersistenceException { |
| 840 | + public DataSet<LocalConcept> listUnknownConcepts() throws PersistenceException { |
843 | 841 | return null; //XXX... |
844 | 842 | } |
845 | 843 | |
— | — | @@ -846,7 +844,7 @@ |
847 | 845 | //noop |
848 | 846 | } |
849 | 847 | |
850 | | - public int processUnknownConcepts(CursorProcessor<LocalConceptReference> processor) throws PersistenceException { |
| 848 | + public int processUnknownConcepts(CursorProcessor<LocalConcept> processor) throws PersistenceException { |
851 | 849 | //noop |
852 | 850 | return 0; |
853 | 851 | } |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseLocalConceptStoreBuilder.java |
— | — | @@ -1,7 +1,5 @@ |
2 | 2 | package de.brightbyte.wikiword.store.builder; |
3 | 3 | |
4 | | -import static de.brightbyte.db.DatabaseUtil.asString; |
5 | | - |
6 | 4 | import java.io.File; |
7 | 5 | import java.sql.Connection; |
8 | 6 | import java.sql.ResultSet; |
— | — | @@ -38,7 +36,6 @@ |
39 | 37 | import de.brightbyte.wikiword.TweakSet; |
40 | 38 | import de.brightbyte.wikiword.builder.NameMaps; |
41 | 39 | import de.brightbyte.wikiword.model.LocalConcept; |
42 | | -import de.brightbyte.wikiword.model.LocalConceptReference; |
43 | 40 | import de.brightbyte.wikiword.schema.AliasScope; |
44 | 41 | import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema; |
45 | 42 | import de.brightbyte.wikiword.schema.LocalConceptStoreSchema; |
— | — | @@ -1453,16 +1450,18 @@ |
1454 | 1451 | return new DatabaseLocalConceptStore((LocalConceptStoreSchema)database, tweaks); |
1455 | 1452 | } |
1456 | 1453 | |
1457 | | - protected static DatabaseDataSet.Factory<LocalConceptReference> localConceptReferenceFactory = new DatabaseDataSet.Factory<LocalConceptReference>() { |
1458 | | - public LocalConceptReference newInstance(ResultSet row) throws SQLException, PersistenceException { |
| 1454 | + protected DatabaseDataSet.Factory<LocalConcept> localConceptReferenceFactory = new DatabaseDataSet.Factory<LocalConcept>() { |
| 1455 | + public LocalConcept newInstance(ResultSet row) throws SQLException, PersistenceException { |
1459 | 1456 | int id = row.getInt("id"); |
1460 | | - String name = asString(row.getObject("name")); |
| 1457 | + String name = DatabaseUtil.asString(row.getObject("name")); |
| 1458 | + //FIXME: type?! |
1461 | 1459 | |
1462 | | - return new LocalConceptReference(id, name, -1, -1); |
| 1460 | + LocalConcept concept = new LocalConcept(getCorpus(), id, null, name); |
| 1461 | + return concept; |
1463 | 1462 | } |
1464 | 1463 | }; |
1465 | 1464 | |
1466 | | - public int processUnknownConcepts(final CursorProcessor<LocalConceptReference> processor) throws PersistenceException { |
| 1465 | + public int processUnknownConcepts(final CursorProcessor<LocalConcept> processor) throws PersistenceException { |
1467 | 1466 | String sql = "SELECT * FROM "+conceptTable.getSQLName(); |
1468 | 1467 | String where = "type = "+ConceptType.UNKNOWN.getCode(); |
1469 | 1468 | |
— | — | @@ -1483,7 +1482,7 @@ |
1484 | 1483 | return ds; |
1485 | 1484 | }*/ |
1486 | 1485 | |
1487 | | - public DataSet<LocalConceptReference> listUnknownConcepts() throws PersistenceException { |
| 1486 | + public DataSet<LocalConcept> listUnknownConcepts() throws PersistenceException { |
1488 | 1487 | String sql = "SELECT * FROM "+conceptTable.getSQLName(); |
1489 | 1488 | String where = "type = "+ConceptType.UNKNOWN.getCode(); |
1490 | 1489 | |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseConceptInfoStoreBuilder.java |
— | — | @@ -12,7 +12,7 @@ |
13 | 13 | import de.brightbyte.wikiword.model.WikiWordConcept; |
14 | 14 | import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema; |
15 | 15 | import de.brightbyte.wikiword.schema.WikiWordConceptStoreSchema; |
16 | | -import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema.ReferenceListEntrySpec; |
| 16 | +import de.brightbyte.wikiword.schema.ConceptInfoStoreSchema.ConceptListEntrySpec; |
17 | 17 | |
18 | 18 | public abstract class DatabaseConceptInfoStoreBuilder<T extends WikiWordConcept> |
19 | 19 | extends DatabaseWikiWordStoreBuilder |
— | — | @@ -118,7 +118,7 @@ |
119 | 119 | protected int buildConceptPropertyCache( |
120 | 120 | final DatabaseTable cacheTable, final String cacheIdField, |
121 | 121 | final String propertyField, final String realtion, final String relConceptField, |
122 | | - final ReferenceListEntrySpec spec, final boolean append, final String threshold, |
| 122 | + final ConceptListEntrySpec spec, final boolean append, final String threshold, |
123 | 123 | final int chunkFactor) throws PersistenceException { |
124 | 124 | |
125 | 125 | final DatabaseTable relationTable = conceptStore.getDatabaseAccess().getTable(realtion); |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseWikiWordConceptStoreBuilder.java |
— | — | @@ -24,7 +24,6 @@ |
25 | 25 | import de.brightbyte.wikiword.ConceptType; |
26 | 26 | import de.brightbyte.wikiword.TweakSet; |
27 | 27 | import de.brightbyte.wikiword.model.WikiWordConcept; |
28 | | -import de.brightbyte.wikiword.model.WikiWordConceptReference; |
29 | 28 | import de.brightbyte.wikiword.schema.WikiWordConceptStoreSchema; |
30 | 29 | import de.brightbyte.wikiword.store.DatabaseWikiWordConceptStore; |
31 | 30 | |
— | — | @@ -425,16 +424,16 @@ |
426 | 425 | |
427 | 426 | protected abstract DatabaseStatisticsStoreBuilder newStatisticsStoreBuilder() throws SQLException, PersistenceException; |
428 | 427 | protected abstract DatabaseConceptInfoStoreBuilder<T> newConceptInfoStoreBuilder() throws SQLException, PersistenceException; |
429 | | - protected abstract DatabaseWikiWordConceptStore<T, ? extends WikiWordConceptReference<T>> newConceptStore() throws SQLException, PersistenceException; |
| 428 | + protected abstract DatabaseWikiWordConceptStore<T> newConceptStore() throws SQLException, PersistenceException; |
430 | 429 | protected abstract DatabaseProximityStoreBuilder newProximityStoreBuilder() throws SQLException; |
431 | 430 | |
432 | 431 | private DatabaseStatisticsStoreBuilder statsStore; |
433 | 432 | private DatabaseProximityStoreBuilder proximityStore; |
434 | 433 | private DatabaseConceptInfoStoreBuilder<T> infoStore; |
435 | 434 | |
436 | | - private DatabaseWikiWordConceptStore<T, ? extends WikiWordConceptReference<T>> conceptStore; |
| 435 | + private DatabaseWikiWordConceptStore<T> conceptStore; |
437 | 436 | |
438 | | - public DatabaseWikiWordConceptStore<T, ? extends WikiWordConceptReference<T>> getConceptStore() throws PersistenceException { |
| 437 | + public DatabaseWikiWordConceptStore<T> getConceptStore() throws PersistenceException { |
439 | 438 | try { |
440 | 439 | if (conceptStore==null) conceptStore = newConceptStore(); |
441 | 440 | return conceptStore; |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/LocalConceptStoreBuilder.java |
— | — | @@ -2,15 +2,14 @@ |
3 | 3 | |
4 | 4 | import java.util.Date; |
5 | 5 | |
6 | | -import de.brightbyte.data.cursor.DataSet; |
7 | 6 | import de.brightbyte.data.cursor.CursorProcessor; |
| 7 | +import de.brightbyte.data.cursor.DataSet; |
8 | 8 | import de.brightbyte.util.PersistenceException; |
9 | 9 | import de.brightbyte.wikiword.ConceptType; |
10 | 10 | import de.brightbyte.wikiword.Corpus; |
11 | 11 | import de.brightbyte.wikiword.ExtractionRule; |
12 | 12 | import de.brightbyte.wikiword.ResourceType; |
13 | 13 | import de.brightbyte.wikiword.model.LocalConcept; |
14 | | -import de.brightbyte.wikiword.model.LocalConceptReference; |
15 | 14 | import de.brightbyte.wikiword.schema.AliasScope; |
16 | 15 | |
17 | 16 | /** |
— | — | @@ -98,8 +97,8 @@ |
99 | 98 | public Corpus getCorpus(); |
100 | 99 | |
101 | 100 | public void resetTermsForUnknownConcepts() throws PersistenceException; |
102 | | - public DataSet<LocalConceptReference> listUnknownConcepts() throws PersistenceException; |
103 | | - public int processUnknownConcepts(CursorProcessor<LocalConceptReference> processor) throws PersistenceException; |
| 101 | + public DataSet<LocalConcept> listUnknownConcepts() throws PersistenceException; |
| 102 | + public int processUnknownConcepts(CursorProcessor<LocalConcept> processor) throws PersistenceException; |
104 | 103 | |
105 | 104 | public TextStoreBuilder getTextStoreBuilder() throws PersistenceException; |
106 | 105 | public PropertyStoreBuilder getPropertyStoreBuilder() throws PersistenceException; |