r68771 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r68770‎ | r68771 | r68772 >
Date:12:59, 30 June 2010
Author:daniel
Status:deferred
Tags:
Comment:
improved the way meanings are pruned before disambiguation
Modified paths:
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java (modified) (history)
  • /trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java (modified) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/CoherenceDisambiguator.java
@@ -238,8 +238,6 @@
239239 return getScore(r.getInterpretation(), context, similarities, features);
240240 }
241241
242 - pruneMeanings(meanings);
243 -
244242 sz = meanings.size();
245243 if (context!=null) sz += context.size();
246244 if (sz <2) {
@@ -267,26 +265,34 @@
268266 return getBestInterpretation(root, meanings, context, interpretations, similarities, features);
269267 }
270268
 269+ protected <X extends T>Map<X, List<? extends C>> getMeanings(Collection<X> terms) throws PersistenceException {
 270+ Map<X, List<? extends C>> meanings = super.getMeanings(terms);
 271+ pruneMeanings(meanings);
 272+ return meanings;
 273+ }
 274+
271275 protected void pruneMeanings(Map<? extends T, List<? extends C>> meanings) {
272 - if (minPopularity<=1) return; //nothing to do
273 -
274276 Iterator<?> eit = meanings.entrySet().iterator();
275277 while (eit.hasNext()) {
276278 Entry<T, List<? extends C>> e = (Entry<T, List<? extends C>>) eit.next(); //XXX: ugly cast. got confused about generics. ugh.
277279 List<? extends C> m = e.getValue();
278280 if (m==null) continue;
279281
280 - Iterator<? extends C> cit = m.iterator();
281 - while (cit.hasNext()) {
282 - C c = cit.next();
283 - double p = popularityMeasure.measure(c);
284 -
285 - if (p<minPopularity) {
286 - if (m.size()==1) {
287 - eit.remove();
288 - break;
289 - } else {
290 - cit.remove();
 282+ if (minPopularity>0) {
 283+ Iterator<? extends C> cit = m.iterator();
 284+ while (cit.hasNext()) {
 285+ C c = cit.next();
 286+ double p = popularityMeasure.measure(c);
 287+
 288+ if (p<minPopularity) {
 289+ trace("pruning unpopular meaning of "+e.getKey()+" (pop: "+p+" < "+minPopularity+"): "+c.getName());
 290+
 291+ if (m.size()==1) {
 292+ eit.remove();
 293+ break;
 294+ } else {
 295+ cit.remove();
 296+ }
291297 }
292298 }
293299 }
@@ -294,6 +300,9 @@
295301 if (m.size()==0) eit.remove();
296302 else if (m.size()>maxMeanings) {
297303 Collections.sort(m, popularityComparator);
 304+
 305+ trace("pruning least popular meanings of "+e.getKey()+" (keeping top "+maxMeanings+"): "+m.subList(maxMeanings, m.size()));
 306+
298307 m = m.subList(0, maxMeanings);
299308 e.setValue(m);
300309 }
@@ -321,6 +330,7 @@
322331 for (Disambiguator.Interpretation<X, C> interp: interpretations) {
323332 CoherenceDisambiguation<X, C> r = getScore(interp, context, similarities, features);
324333 double score = r.getScore();
 334+ //trace(" ~ score "+score+": "+r.getMeanings());
325335
326336 if ( ( best == null && score> 0 && !Double.isNaN(score))
327337 || (score > bestScore && !Double.isNaN(score)) ) {
@@ -398,16 +408,16 @@
399409
400410 for (TermReference t: interp.getSequence()) {
401411 C m = interp.getMeanings().get(t);
402 - ((HashMap<TermReference, C>)concepts).put(t, m);
 412+ ((Map<TermReference, C>)concepts).put(t, m);
403413 }
404414
405415 if (context != null) {
406416 for (C con: context) {
407 - if (con!=null)((HashMap<TermReference, C>)concepts).put(new Term(con.getName(), 1), con);
 417+ if (con!=null)((Map<TermReference, C>)concepts).put(new Term(con.getName(), 1), con);
408418 }
409419 }
410420 } else {
411 - concepts = (HashMap<TermReference, C>)interp.getMeanings();
 421+ concepts = (Map<TermReference, C>)interp.getMeanings();
412422 }
413423
414424 int c = concepts.size();
Index: trunk/WikiWord/WikiWord/src/main/java/de/brightbyte/wikiword/disambig/SlidingCoherenceDisambiguator.java
@@ -79,8 +79,6 @@
8080 return getScore(r.getInterpretation(), context, similarities, features);
8181 }
8282
83 - pruneMeanings(meanings);
84 -
8583 sz = meanings.size();
8684 if (context!=null) sz += context.size();
8785

Status & tagging log