Index: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/Atom.java |
— | — | @@ -0,0 +1,49 @@ |
| 2 | +package org.wikimedia.nlp.mdl; |
| 3 | + |
| 4 | +/** |
| 5 | + * atoms are POJO representing either unanalyzed Lexemes or Morphemes |
| 6 | + * @author oren |
| 7 | + * |
| 8 | + */ |
| 9 | +public class Atom implements Comparable<Atom> { |
| 10 | + |
| 11 | + /** does this atom have a morphological analysis */ |
| 12 | + private String langId="Unknown"; //iso code |
| 13 | + private boolean isAnalysed=false; |
| 14 | + private double analysisConfidence; |
| 15 | + private boolean isStem=false; |
| 16 | + private boolean isAffix=false; |
| 17 | + |
| 18 | + private String string; |
| 19 | + private Long frequency; |
| 20 | + |
| 21 | + |
| 22 | + private SortMode sortMode = SortMode.FRQ; |
| 23 | + |
| 24 | + public Atom(String string, Long frequency) { |
| 25 | + this.string=string; |
| 26 | + this.frequency=frequency; |
| 27 | + } |
| 28 | + |
| 29 | + public Atom(String string, Long frequency, String langId) { |
| 30 | + |
| 31 | + this.string=string; |
| 32 | + this.frequency=frequency; |
| 33 | + this.langId=langId; |
| 34 | + } |
| 35 | + |
| 36 | + public int compareTo(Atom o) { |
| 37 | + |
| 38 | + switch (sortMode){ |
| 39 | + |
| 40 | + case FRQ: |
| 41 | + return (int) (frequency-o.frequency); |
| 42 | + |
| 43 | + default: |
| 44 | + return string.compareTo(o.string); |
| 45 | + } |
| 46 | + |
| 47 | + |
| 48 | + } |
| 49 | + |
| 50 | +} |
Property changes on: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/Atom.java |
___________________________________________________________________ |
Added: svn:keywords |
1 | 51 | + LastChangedDate LastChangedRevision LastChangedBy Id |
Index: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/Signature.java |
— | — | @@ -0,0 +1,38 @@ |
| 2 | +package org.wikimedia.nlp.mdl; |
| 3 | + |
| 4 | +import java.util.HashMap; |
| 5 | + |
| 6 | + |
| 7 | +public class Signature { |
| 8 | + |
| 9 | + private HashMap<Atom,String> signatures = new HashMap<Atom,String>(); |
| 10 | + |
| 11 | + public void addKey(Atom keySource, Atom keyTarget ){ |
| 12 | + signatures.put(keyTarget, signatures.get(keySource)); |
| 13 | + } |
| 14 | + |
| 15 | + public void addKeyValue(Atom lexeme, String suffixses){ |
| 16 | + signatures.put(lexeme, suffixses); |
| 17 | + } |
| 18 | + |
| 19 | + public void addKey(Atom lexeme){ |
| 20 | + signatures.put(lexeme,null); |
| 21 | + } |
| 22 | + |
| 23 | + public boolean hasKey(Atom key) { |
| 24 | + |
| 25 | + return signatures.containsKey(key); |
| 26 | + } |
| 27 | + |
| 28 | + |
| 29 | + public HashMap<Atom,String> getSignatures() { |
| 30 | + |
| 31 | + return new HashMap<Atom,String>(signatures); |
| 32 | + } |
| 33 | + |
| 34 | + public String getVal(Atom lexeme) { |
| 35 | + |
| 36 | + return signatures.get(lexeme); |
| 37 | + } |
| 38 | + |
| 39 | +} |
Property changes on: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/Signature.java |
___________________________________________________________________ |
Added: svn:keywords |
1 | 40 | + LastChangedDate LastChangedRevision LastChangedBy Id |
Index: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/MdlMorphology.java |
— | — | @@ -0,0 +1,32 @@ |
| 2 | +package org.wikimedia.nlp.mdl; |
| 3 | + |
| 4 | +import java.util.ArrayList; |
| 5 | +import java.util.HashMap; |
| 6 | +import java.util.List; |
| 7 | +import java.util.Map; |
| 8 | + |
| 9 | +public class MdlMorphology { |
| 10 | + |
| 11 | + /**new words*/ |
| 12 | + protected List<Atom> lexemesLst = new ArrayList<Atom>(); |
| 13 | + |
| 14 | + /** stems and roots*/ |
| 15 | + protected List<Atom> stemLst = new ArrayList<Atom>(); |
| 16 | + |
| 17 | + /** suffixes */ |
| 18 | + protected List<Atom> affixLst = new ArrayList<Atom>(); |
| 19 | + |
| 20 | + //stem to signature map |
| 21 | + Map<Atom,Signature> morphology= new HashMap<Atom,Signature>(); |
| 22 | + |
| 23 | + public void addLexeme(String text,Long frequency,String langId){ |
| 24 | + new Atom(text, frequency,langId); |
| 25 | + |
| 26 | + } |
| 27 | + |
| 28 | + public void addLexeme(String text){ |
| 29 | + addLexeme(text,1l,"unknown"); |
| 30 | + |
| 31 | + } |
| 32 | + |
| 33 | +} |
Property changes on: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/MdlMorphology.java |
___________________________________________________________________ |
Added: svn:keywords |
1 | 34 | + LastChangedDate LastChangedRevision LastChangedBy Id |
Index: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/SortMode.java |
— | — | @@ -0,0 +1,7 @@ |
| 2 | +package org.wikimedia.nlp.mdl; |
| 3 | + |
| 4 | +public enum SortMode { |
| 5 | + FRQ, |
| 6 | + STR |
| 7 | + |
| 8 | +} |
Property changes on: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/SortMode.java |
___________________________________________________________________ |
Added: svn:keywords |
1 | 9 | + LastChangedDate LastChangedRevision LastChangedBy Id |