Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/DatabaseProximityStoreBuilder.java |
— | — | @@ -20,7 +20,7 @@ |
21 | 21 | extends DatabaseWikiWordStoreBuilder |
22 | 22 | implements ProximityStoreBuilder { |
23 | 23 | |
24 | | - protected FeatureVectorFactors featureVectorFactors = new FeatureVectorFactors(); |
| 24 | + protected FeatureVectorFactors featureVectorFactors; |
25 | 25 | |
26 | 26 | protected WikiWordConceptStoreSchema conceptDatabase; |
27 | 27 | |
— | — | @@ -35,6 +35,7 @@ |
36 | 36 | super(database, tweaks, agenda); |
37 | 37 | |
38 | 38 | this.conceptStore = conceptStore; |
| 39 | + this.featureVectorFactors = new FeatureVectorFactors(tweaks); |
39 | 40 | |
40 | 41 | Inserter featureInserter = configureTable("feature", 8*1024, 32); |
41 | 42 | featureTable = (RelationTable)featureInserter.getTable(); |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/store/builder/ProximityStoreBuilder.java |
— | — | @@ -1,6 +1,7 @@ |
2 | 2 | package de.brightbyte.wikiword.store.builder; |
3 | 3 | |
4 | 4 | import de.brightbyte.util.PersistenceException; |
| 5 | +import de.brightbyte.wikiword.TweakSet; |
5 | 6 | |
6 | 7 | public interface ProximityStoreBuilder extends WikiWordStoreBuilder { |
7 | 8 | |
— | — | @@ -10,20 +11,37 @@ |
11 | 12 | //NOTE: since there are usually more link than categories, there's a bias in favor of categories! |
12 | 13 | // number of links grows with article length, number of categories does not! |
13 | 14 | |
14 | | - public final double selfWeight = 4; |
15 | | - //public final double weightOffset = 1; |
| 15 | + public final double selfWeight; |
| 16 | + //public final double weightOffset; |
16 | 17 | |
17 | | - public final double downWeight = 0.2; //having common children is not very relevant; also, categorization is favored by systemic bias, so tone it down. |
18 | | - public final double downBiasCoef = 1; //if the parent has many children should be considered |
| 18 | + public final double downWeight; |
| 19 | + public final double downBiasCoef; |
19 | 20 | |
20 | | - public final double upWeight = 1.2; //having common parents is interesting; note: categorization is favored by systemic bias, but the bias is tuned out here anyway. |
21 | | - public final double upBiasCoef = 0.1; //if a child has many parents doesn't matter |
| 21 | + public final double upWeight; |
| 22 | + public final double upBiasCoef; |
22 | 23 | |
23 | | - public final double inWeight = 1.5; //bein referenced from the same place is a strong factor |
24 | | - public final double inBiasCoef = 1; //if the concept is referenced a lot, co-reference becvomes less relevant |
| 24 | + public final double inWeight; |
| 25 | + public final double inBiasCoef; |
25 | 26 | |
26 | | - public final double outWeight = 1.0; //referencing the same thing is a good indicator |
27 | | - public final double outBiasCoef = 0.2; //if the concept has many outgoing links doesn't matter much |
| 27 | + public final double outWeight; |
| 28 | + public final double outBiasCoef; |
| 29 | + |
| 30 | + public FeatureVectorFactors(TweakSet tweaks) { |
| 31 | + selfWeight = tweaks.getTweak("proximity.selfWeight", 2); |
| 32 | + //weightOffset = 1; |
| 33 | + |
| 34 | + downWeight = tweaks.getTweak("proximity.downWeight", 0.2); //having common children is not very relevant; also, categorization is favored by systemic bias, so tone it down. |
| 35 | + downBiasCoef = tweaks.getTweak("proximity.downBiasCoef", 1); //if the parent has many children should be considered |
| 36 | + |
| 37 | + upWeight = tweaks.getTweak("proximity.upWeight", 1.2); //having common parents is interesting; note: categorization is favored by systemic bias, but the bias is tuned out here anyway. |
| 38 | + upBiasCoef = tweaks.getTweak("proximity.upBiasCoef", 0.1); //if a child has many parents doesn't matter |
| 39 | + |
| 40 | + inWeight = tweaks.getTweak("proximity.inWeight", 1.5); //bein referenced from the same place is a strong factor |
| 41 | + inBiasCoef = tweaks.getTweak("proximity.inBiasCoef", 1); //if the concept is referenced a lot, co-reference becvomes less relevant |
| 42 | + |
| 43 | + outWeight = tweaks.getTweak("proximity.outWeight", 1.0); //referencing the same thing is a good indicator |
| 44 | + outBiasCoef = tweaks.getTweak("proximity.outBiasCoef", 0.2); //if the concept has many outgoing links doesn't matter much |
| 45 | + } |
28 | 46 | } |
29 | 47 | |
30 | 48 | public void buildFeatures() throws PersistenceException; |