r111345 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r111344‎ | r111345 | r111346 >
Date:00:26, 13 February 2012
Author:oren
Status:deferred
Tags:
Comment:
some MDL related classes
Modified paths:
  • /trunk/lucene-search-3/src/main/java/org/wikimedia/nlp (added) (history)
  • /trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl (added) (history)
  • /trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/Atom.java (added) (history)
  • /trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/MdlMorphology.java (added) (history)
  • /trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/Signature.java (added) (history)
  • /trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/SortMode.java (added) (history)

Diff [purge]

Index: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/Atom.java
@@ -0,0 +1,49 @@
 2+package org.wikimedia.nlp.mdl;
 3+
 4+/**
 5+ * atoms are POJO representing either unanalyzed Lexemes or Morphemes
 6+ * @author oren
 7+ *
 8+ */
 9+public class Atom implements Comparable<Atom> {
 10+
 11+ /** does this atom have a morphological analysis */
 12+ private String langId="Unknown"; //iso code
 13+ private boolean isAnalysed=false;
 14+ private double analysisConfidence;
 15+ private boolean isStem=false;
 16+ private boolean isAffix=false;
 17+
 18+ private String string;
 19+ private Long frequency;
 20+
 21+
 22+ private SortMode sortMode = SortMode.FRQ;
 23+
 24+ public Atom(String string, Long frequency) {
 25+ this.string=string;
 26+ this.frequency=frequency;
 27+ }
 28+
 29+ public Atom(String string, Long frequency, String langId) {
 30+
 31+ this.string=string;
 32+ this.frequency=frequency;
 33+ this.langId=langId;
 34+ }
 35+
 36+ public int compareTo(Atom o) {
 37+
 38+ switch (sortMode){
 39+
 40+ case FRQ:
 41+ return (int) (frequency-o.frequency);
 42+
 43+ default:
 44+ return string.compareTo(o.string);
 45+ }
 46+
 47+
 48+ }
 49+
 50+}
Property changes on: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/Atom.java
___________________________________________________________________
Added: svn:keywords
151 + LastChangedDate LastChangedRevision LastChangedBy Id
Index: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/Signature.java
@@ -0,0 +1,38 @@
 2+package org.wikimedia.nlp.mdl;
 3+
 4+import java.util.HashMap;
 5+
 6+
 7+public class Signature {
 8+
 9+ private HashMap<Atom,String> signatures = new HashMap<Atom,String>();
 10+
 11+ public void addKey(Atom keySource, Atom keyTarget ){
 12+ signatures.put(keyTarget, signatures.get(keySource));
 13+ }
 14+
 15+ public void addKeyValue(Atom lexeme, String suffixses){
 16+ signatures.put(lexeme, suffixses);
 17+ }
 18+
 19+ public void addKey(Atom lexeme){
 20+ signatures.put(lexeme,null);
 21+ }
 22+
 23+ public boolean hasKey(Atom key) {
 24+
 25+ return signatures.containsKey(key);
 26+ }
 27+
 28+
 29+ public HashMap<Atom,String> getSignatures() {
 30+
 31+ return new HashMap<Atom,String>(signatures);
 32+ }
 33+
 34+ public String getVal(Atom lexeme) {
 35+
 36+ return signatures.get(lexeme);
 37+ }
 38+
 39+}
Property changes on: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/Signature.java
___________________________________________________________________
Added: svn:keywords
140 + LastChangedDate LastChangedRevision LastChangedBy Id
Index: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/MdlMorphology.java
@@ -0,0 +1,32 @@
 2+package org.wikimedia.nlp.mdl;
 3+
 4+import java.util.ArrayList;
 5+import java.util.HashMap;
 6+import java.util.List;
 7+import java.util.Map;
 8+
 9+public class MdlMorphology {
 10+
 11+ /**new words*/
 12+ protected List<Atom> lexemesLst = new ArrayList<Atom>();
 13+
 14+ /** stems and roots*/
 15+ protected List<Atom> stemLst = new ArrayList<Atom>();
 16+
 17+ /** suffixes */
 18+ protected List<Atom> affixLst = new ArrayList<Atom>();
 19+
 20+ //stem to signature map
 21+ Map<Atom,Signature> morphology= new HashMap<Atom,Signature>();
 22+
 23+ public void addLexeme(String text,Long frequency,String langId){
 24+ new Atom(text, frequency,langId);
 25+
 26+ }
 27+
 28+ public void addLexeme(String text){
 29+ addLexeme(text,1l,"unknown");
 30+
 31+ }
 32+
 33+}
Property changes on: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/MdlMorphology.java
___________________________________________________________________
Added: svn:keywords
134 + LastChangedDate LastChangedRevision LastChangedBy Id
Index: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/SortMode.java
@@ -0,0 +1,7 @@
 2+package org.wikimedia.nlp.mdl;
 3+
 4+public enum SortMode {
 5+ FRQ,
 6+ STR
 7+
 8+}
Property changes on: trunk/lucene-search-3/src/main/java/org/wikimedia/nlp/mdl/SortMode.java
___________________________________________________________________
Added: svn:keywords
19 + LastChangedDate LastChangedRevision LastChangedBy Id

Sign-offs

UserFlagDate
Nikerabbitinspected07:27, 13 February 2012

Status & tagging log