Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java |
— | — | @@ -238,8 +238,6 @@ |
239 | 239 | return getScore(r.getInterpretation(), context, similarities, features); |
240 | 240 | } |
241 | 241 | |
242 | | - pruneMeanings(meanings); |
243 | | - |
244 | 242 | sz = meanings.size(); |
245 | 243 | if (context!=null) sz += context.size(); |
246 | 244 | if (sz <2) { |
— | — | @@ -267,26 +265,34 @@ |
268 | 266 | return getBestInterpretation(root, meanings, context, interpretations, similarities, features); |
269 | 267 | } |
270 | 268 | |
| 269 | + protected <X extends T>Map<X, List<? extends C>> getMeanings(Collection<X> terms) throws PersistenceException { |
| 270 | + Map<X, List<? extends C>> meanings = super.getMeanings(terms); |
| 271 | + pruneMeanings(meanings); |
| 272 | + return meanings; |
| 273 | + } |
| 274 | + |
271 | 275 | protected void pruneMeanings(Map<? extends T, List<? extends C>> meanings) { |
272 | | - if (minPopularity<=1) return; //nothing to do |
273 | | - |
274 | 276 | Iterator<?> eit = meanings.entrySet().iterator(); |
275 | 277 | while (eit.hasNext()) { |
276 | 278 | Entry<T, List<? extends C>> e = (Entry<T, List<? extends C>>) eit.next(); //XXX: ugly cast. got confused about generics. ugh. |
277 | 279 | List<? extends C> m = e.getValue(); |
278 | 280 | if (m==null) continue; |
279 | 281 | |
280 | | - Iterator<? extends C> cit = m.iterator(); |
281 | | - while (cit.hasNext()) { |
282 | | - C c = cit.next(); |
283 | | - double p = popularityMeasure.measure(c); |
284 | | - |
285 | | - if (p<minPopularity) { |
286 | | - if (m.size()==1) { |
287 | | - eit.remove(); |
288 | | - break; |
289 | | - } else { |
290 | | - cit.remove(); |
| 282 | + if (minPopularity>0) { |
| 283 | + Iterator<? extends C> cit = m.iterator(); |
| 284 | + while (cit.hasNext()) { |
| 285 | + C c = cit.next(); |
| 286 | + double p = popularityMeasure.measure(c); |
| 287 | + |
| 288 | + if (p<minPopularity) { |
| 289 | + trace("pruning unpopular meaning of "+e.getKey()+" (pop: "+p+" < "+minPopularity+"): "+c.getName()); |
| 290 | + |
| 291 | + if (m.size()==1) { |
| 292 | + eit.remove(); |
| 293 | + break; |
| 294 | + } else { |
| 295 | + cit.remove(); |
| 296 | + } |
291 | 297 | } |
292 | 298 | } |
293 | 299 | } |
— | — | @@ -294,6 +300,9 @@ |
295 | 301 | if (m.size()==0) eit.remove(); |
296 | 302 | else if (m.size()>maxMeanings) { |
297 | 303 | Collections.sort(m, popularityComparator); |
| 304 | + |
| 305 | + trace("pruning least popular meanings of "+e.getKey()+" (keeping top "+maxMeanings+"): "+m.subList(maxMeanings, m.size())); |
| 306 | + |
298 | 307 | m = m.subList(0, maxMeanings); |
299 | 308 | e.setValue(m); |
300 | 309 | } |
— | — | @@ -321,6 +330,7 @@ |
322 | 331 | for (Disambiguator.Interpretation<X, C> interp: interpretations) { |
323 | 332 | CoherenceDisambiguation<X, C> r = getScore(interp, context, similarities, features); |
324 | 333 | double score = r.getScore(); |
| 334 | + //trace(" ~ score "+score+": "+r.getMeanings()); |
325 | 335 | |
326 | 336 | if ( ( best == null && score> 0 && !Double.isNaN(score)) |
327 | 337 | || (score > bestScore && !Double.isNaN(score)) ) { |
— | — | @@ -398,16 +408,16 @@ |
399 | 409 | |
400 | 410 | for (TermReference t: interp.getSequence()) { |
401 | 411 | C m = interp.getMeanings().get(t); |
402 | | - ((HashMap<TermReference, C>)concepts).put(t, m); |
| 412 | + ((Map<TermReference, C>)concepts).put(t, m); |
403 | 413 | } |
404 | 414 | |
405 | 415 | if (context != null) { |
406 | 416 | for (C con: context) { |
407 | | - if (con!=null)((HashMap<TermReference, C>)concepts).put(new Term(con.getName(), 1), con); |
| 417 | + if (con!=null)((Map<TermReference, C>)concepts).put(new Term(con.getName(), 1), con); |
408 | 418 | } |
409 | 419 | } |
410 | 420 | } else { |
411 | | - concepts = (HashMap<TermReference, C>)interp.getMeanings(); |
| 421 | + concepts = (Map<TermReference, C>)interp.getMeanings(); |
412 | 422 | } |
413 | 423 | |
414 | 424 | int c = concepts.size(); |
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java |
— | — | @@ -79,8 +79,6 @@ |
80 | 80 | return getScore(r.getInterpretation(), context, similarities, features); |
81 | 81 | } |
82 | 82 | |
83 | | - pruneMeanings(meanings); |
84 | | - |
85 | 83 | sz = meanings.size(); |
86 | 84 | if (context!=null) sz += context.size(); |
87 | 85 | |