r54539 MediaWiki - Code Review archive

Repository:MediaWiki
Revision:r54538‎ | r54539 | r54540 >
Date:20:49, 6 August 2009
Author:daniel
Status:deferred
Tags:
Comment:
ReverseStringKeyAnalyzer for PatriciaTrie
Modified paths:
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/IdManagerBenchmark.java (added) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ReverseStringKeyAnalyzer.java (added) (history)
  • /trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/builder/ReverseStringKeyAnalyzerTest.java (added) (history)

Diff [purge]

Index: trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/builder/ReverseStringKeyAnalyzerTest.java
@@ -0,0 +1,68 @@
 2+package de.brightbyte.wikiword.builder;
 3+
 4+import org.ardverk.collection.KeyAnalyzer;
 5+import org.ardverk.collection.PatriciaTrie;
 6+
 7+import junit.framework.TestCase;
 8+
 9+public class ReverseStringKeyAnalyzerTest extends TestCase {
 10+
 11+ public void testBitIndex() {
 12+ ReverseStringKeyAnalyzer analyzer = new ReverseStringKeyAnalyzer();
 13+
 14+ String key = "AAXB";
 15+ String other = "AAYB";
 16+ assertEquals(1*16+8+7,
 17+ analyzer.bitIndex(key, 0, 4*ReverseStringKeyAnalyzer.LENGTH,
 18+ other, 0, 4*ReverseStringKeyAnalyzer.LENGTH));
 19+
 20+ assertEquals(KeyAnalyzer.EQUAL_BIT_KEY,
 21+ analyzer.bitIndex(key, 0, 1*ReverseStringKeyAnalyzer.LENGTH,
 22+ other, 0, 1*ReverseStringKeyAnalyzer.LENGTH));
 23+
 24+ assertEquals(0+8+7,
 25+ analyzer.bitIndex(key, 1*ReverseStringKeyAnalyzer.LENGTH, 1*ReverseStringKeyAnalyzer.LENGTH,
 26+ other, 1*ReverseStringKeyAnalyzer.LENGTH, 1*ReverseStringKeyAnalyzer.LENGTH));
 27+
 28+ assertEquals(1*16+8+1,
 29+ analyzer.bitIndex(key, 0, 1*ReverseStringKeyAnalyzer.LENGTH,
 30+ other, 0, 2*ReverseStringKeyAnalyzer.LENGTH));
 31+
 32+ assertEquals(1*16+8+1,
 33+ analyzer.bitIndex(key, 0, 2*ReverseStringKeyAnalyzer.LENGTH,
 34+ other, 0, 1*ReverseStringKeyAnalyzer.LENGTH));
 35+
 36+ assertEquals(KeyAnalyzer.EQUAL_BIT_KEY,
 37+ analyzer.bitIndex(key, 2*ReverseStringKeyAnalyzer.LENGTH, 2*ReverseStringKeyAnalyzer.LENGTH,
 38+ other, 2*ReverseStringKeyAnalyzer.LENGTH, 2*ReverseStringKeyAnalyzer.LENGTH));
 39+
 40+ assertEquals(KeyAnalyzer.NULL_BIT_KEY,
 41+ analyzer.bitIndex(key, 0, 0,
 42+ other, 0, 0));
 43+
 44+ assertEquals(KeyAnalyzer.NULL_BIT_KEY,
 45+ analyzer.bitIndex("\0", 0, ReverseStringKeyAnalyzer.LENGTH,
 46+ "\0", 0, ReverseStringKeyAnalyzer.LENGTH));
 47+ }
 48+
 49+ public void testPatriciaTrie() {
 50+ ReverseStringKeyAnalyzer analyzer = new ReverseStringKeyAnalyzer();
 51+ PatriciaTrie<String, Integer> trie = new PatriciaTrie<String, Integer>(analyzer);
 52+
 53+ trie.put("ABC", 1);
 54+ trie.put("XBC", 2);
 55+ trie.put("ABD", 3);
 56+ trie.put("XBC", 4);
 57+
 58+ assertEquals(new Integer(1), trie.get("ABC"));
 59+ assertEquals(new Integer(3), trie.get("ABD"));
 60+ assertEquals(new Integer(4), trie.get("XBC"));
 61+
 62+ trie.put("", 0);
 63+ assertEquals(new Integer(0), trie.get(""));
 64+
 65+ trie.put("\0", 10);
 66+ assertEquals(new Integer(10), trie.get("\0"));
 67+ }
 68+
 69+}
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/IdManagerBenchmark.java
@@ -0,0 +1,68 @@
 2+package de.brightbyte.wikiword.builder;
 3+
 4+import java.io.BufferedReader;
 5+import java.io.File;
 6+import java.io.FileInputStream;
 7+import java.io.IOException;
 8+import java.io.InputStream;
 9+import java.io.InputStreamReader;
 10+import java.io.Reader;
 11+import java.util.HashMap;
 12+import java.util.Map;
 13+
 14+import org.ardverk.collection.PatriciaTrie;
 15+import org.ardverk.collection.StringKeyAnalyzer;
 16+
 17+import de.brightbyte.audit.DebugUtil;
 18+
 19+public class IdManagerBenchmark {
 20+
 21+ protected static void load(File store, String encoding, Map<String, Integer> ids) throws IOException {
 22+ InputStream f = new FileInputStream(store);
 23+ Reader rd = encoding!=null ? new InputStreamReader(f, encoding) : new InputStreamReader(f);
 24+ BufferedReader in = new BufferedReader(rd);
 25+
 26+ String s;
 27+ while ((s = in.readLine()) != null) {
 28+ int idx = s.indexOf('\t');
 29+ if (idx<0) {
 30+ break; //FIXME: remove broken record before appending!
 31+ }
 32+
 33+ try {
 34+ String n = s.substring(0, idx);
 35+ int i = Integer.parseInt(s.substring(idx+1));
 36+
 37+ Integer old = ids.put(n, i);
 38+ if (old != null && !old.equals(i)) throw new RuntimeException("multiple entries for key "+n+": was "+old+", found "+i);
 39+ } catch (NumberFormatException e) {
 40+ break; //FIXME: remove broken record before appending!
 41+ }
 42+ }
 43+
 44+ in.close();
 45+ }
 46+
 47+ public static void main(String[] args) throws IOException {
 48+ String mode = args[0];
 49+ String file = args[1];
 50+
 51+ String encoding = "UTF-8";
 52+
 53+ Map<String, Integer> map;
 54+
 55+ if (mode.equals("hash")) map = new HashMap<String, Integer>();
 56+ else if (mode.equals("trie")) map = new PatriciaTrie<String, Integer>(StringKeyAnalyzer.INSTANCE);
 57+ else if (mode.equals("rtrie")) map = new PatriciaTrie<String, Integer>(ReverseStringKeyAnalyzer.INSTANCE);
 58+ else throw new IllegalArgumentException("unknown mode: "+mode);
 59+
 60+ Runtime.getRuntime().gc();
 61+ System.out.println("Memory used before:"+ DebugUtil.memory());
 62+ System.out.println("loading...");
 63+ load(new File(file), encoding, map);
 64+ System.out.println("loaded "+map.size()+" entries");
 65+ System.out.println("GC...");
 66+ Runtime.getRuntime().gc();
 67+ System.out.println("Memory used after:"+ DebugUtil.memory());
 68+ }
 69+}
Property changes on: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/IdManagerBenchmark.java
___________________________________________________________________
Name: svn:mergeinfo
170 +
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/ReverseStringKeyAnalyzer.java
@@ -0,0 +1,100 @@
 2+package de.brightbyte.wikiword.builder;
 3+
 4+import org.ardverk.collection.KeyAnalyzer;
 5+import org.ardverk.collection.StringKeyAnalyzer;
 6+
 7+public class ReverseStringKeyAnalyzer extends StringKeyAnalyzer {
 8+
 9+ private static final long serialVersionUID = -3056514701732470312L;
 10+
 11+ private static final int MSB = 0x8000;
 12+
 13+ public static final ReverseStringKeyAnalyzer INSTANCE = new ReverseStringKeyAnalyzer();
 14+
 15+ public ReverseStringKeyAnalyzer() {
 16+ super();
 17+ }
 18+
 19+ @Override
 20+ public int bitIndex(String key, int offsetInBits, int lengthInBits, String other, int otherOffsetInBits, int otherLengthInBits) {
 21+ boolean allNull = true;
 22+
 23+ if (offsetInBits % LENGTH != 0 || otherOffsetInBits % LENGTH != 0
 24+ || lengthInBits % LENGTH != 0 || otherLengthInBits % LENGTH != 0) {
 25+ throw new IllegalArgumentException("offsets & lengths must be at character boundaries");
 26+ }
 27+
 28+ int length1 = key.length();
 29+ int length2 = other ==null ? otherLengthInBits / LENGTH : other.length();
 30+
 31+ int beginIndex1 = length1 -1 - (offsetInBits / LENGTH);
 32+ int beginIndex2 = length2 -1 - (otherOffsetInBits / LENGTH);
 33+
 34+ int endIndex1 = beginIndex1 - (lengthInBits / LENGTH);
 35+ int endIndex2 = beginIndex2 - (otherLengthInBits / LENGTH);
 36+
 37+ int length = Math.max(lengthInBits/ LENGTH, otherLengthInBits/ LENGTH);
 38+
 39+ // Look at each character, and if they're different
 40+ // then figure out which bit makes the difference
 41+ // and return it.
 42+ char k = 0, f = 0;
 43+ for(int i = 0; i < length; i++) {
 44+ int index1 = beginIndex1 - i;
 45+ int index2 = beginIndex2 - i;
 46+
 47+ if (index1 <= endIndex1) {
 48+ k = 0;
 49+ } else {
 50+ k = key.charAt(index1);
 51+ }
 52+
 53+ if (other == null || index2 <= endIndex2) {
 54+ f = 0;
 55+ } else {
 56+ f = other.charAt(index2);
 57+ }
 58+
 59+ if (k != f) {
 60+ int x = k ^ f;
 61+ return i * LENGTH + (Integer.numberOfLeadingZeros(x) - LENGTH);
 62+ }
 63+
 64+ if (k != 0) {
 65+ allNull = false;
 66+ }
 67+ }
 68+
 69+ if (allNull) {
 70+ return KeyAnalyzer.NULL_BIT_KEY;
 71+ }
 72+
 73+ return KeyAnalyzer.EQUAL_BIT_KEY;
 74+ }
 75+
 76+ @Override
 77+ public boolean isBitSet(String key, int bitIndex, int lengthInBits) {
 78+ if (key == null || bitIndex >= lengthInBits) {
 79+ return false;
 80+ }
 81+
 82+ int index = key.length() - (int)(bitIndex / LENGTH) -1;
 83+ int bit = (int)(bitIndex % LENGTH);
 84+
 85+ return (key.charAt(index) & (MSB >>> bit)) != 0;
 86+ }
 87+
 88+ @Override
 89+ public boolean isPrefix(String prefix, int offsetInBits, int lengthInBits, String key) {
 90+ if (offsetInBits % LENGTH != 0 || lengthInBits % LENGTH != 0) {
 91+ throw new IllegalArgumentException(
 92+ "Cannot determine prefix outside of Character boundaries");
 93+ }
 94+
 95+ int ofs = prefix.length() - ((offsetInBits + lengthInBits) / LENGTH);
 96+
 97+ String s1 = prefix.substring(ofs, lengthInBits / LENGTH);
 98+ return key.endsWith(s1);
 99+ }
 100+
 101+}

Status & tagging log