Index: trunk/WikiWord/WikiWordBuilder/src/test/java/de/brightbyte/wikiword/store/builder/NameStoreBenchmark.java |
— | — | @@ -1,87 +1,26 @@ |
2 | 2 | package de.brightbyte.wikiword.store.builder; |
3 | 3 | |
4 | 4 | import java.io.BufferedReader; |
5 | | -import java.io.File; |
6 | 5 | import java.io.FileInputStream; |
7 | 6 | import java.io.IOException; |
8 | 7 | import java.io.InputStreamReader; |
9 | 8 | import java.security.NoSuchAlgorithmException; |
10 | | -import java.util.Arrays; |
11 | | -import java.util.HashMap; |
12 | | -import java.util.HashSet; |
13 | | -import java.util.Set; |
14 | 9 | |
15 | | -import de.brightbyte.data.BlockDigest; |
16 | | -import de.brightbyte.data.ByteString; |
17 | | -import de.brightbyte.data.Codec; |
18 | | -import de.brightbyte.data.Functor; |
19 | | -import de.brightbyte.data.KeyDigestingValueStore; |
20 | 10 | import de.brightbyte.data.KeyValueStore; |
21 | | -import de.brightbyte.data.MapLookup; |
22 | | -import de.brightbyte.data.XorFold32; |
23 | | -import de.brightbyte.data.XorFold64; |
24 | | -import de.brightbyte.data.XorWrap; |
25 | | -import de.brightbyte.io.HuffmanDataCodec; |
26 | | -import de.brightbyte.text.CharsetCodec; |
27 | 11 | import de.brightbyte.util.PersistenceException; |
| 12 | +import de.brightbyte.wikiword.builder.NameMaps; |
28 | 13 | |
29 | 14 | public class NameStoreBenchmark { |
30 | 15 | public static void main(String[] args) throws IOException, PersistenceException, NoSuchAlgorithmException, InterruptedException { |
31 | | - String type = args[0]; |
| 16 | + String params = args[0]; |
32 | 17 | int limit = Integer.parseInt(args[1]); |
33 | 18 | |
34 | | - KeyValueStore<String, Integer> store = null; |
| 19 | + KeyValueStore<String, Integer> store = NameMaps.newStore(params, "en"); |
35 | 20 | |
36 | | - String[] tt = type.split("[,;|+/]"); |
37 | | - |
38 | | - Set<String> params = new HashSet<String>(); |
39 | | - params.addAll(Arrays.asList(tt)); |
40 | | - |
41 | | - if (params.contains("none") || params.contains("null")) store = null; |
42 | | - else if (params.contains("string")) store = new MapLookup<String, Integer>(new HashMap<String, Integer>()); |
43 | | - else if (params.contains("utf8") || params.contains("utf16")) { |
44 | | - //initial digest turns string into UTF-8 bytes |
45 | | - Functor<byte[], String> digest; |
46 | | - |
47 | | - if (params.contains("utf8")) digest = new Codec.Encoder<String, byte[]>(new CharsetCodec("UTF-8")); |
48 | | - else digest = new Codec.Encoder<String, byte[]>(new CharsetCodec("UTF-16")); |
49 | | - |
50 | | - //apply md5 digest or huffman compression |
51 | | - if (params.contains("md5")) digest = new Functor.Composite<byte[], byte[], String>(digest, new BlockDigest("MD5")); |
52 | | - else if (params.contains("sha1")) digest = new Functor.Composite<byte[], byte[], String>(digest, new BlockDigest("SHA-1")); |
53 | | - else if (params.contains("huff") || params.contains("huffman")) digest = new Functor.Composite<byte[], byte[], String>(digest, getHuffmanEncoder(args[3])); |
54 | | - |
55 | | - if (params.contains("fold64") || params.contains("fold32")) { //fold into Long |
56 | | - Functor<? extends Number, byte[]> fold; |
57 | | - |
58 | | - if (params.contains("fold32")) fold = XorFold32.instance; |
59 | | - else fold = XorFold64.instance; |
60 | | - |
61 | | - Functor<Number, String> convert = new Functor.Composite<Number, byte[], String>(digest, fold); |
62 | | - |
63 | | - MapLookup<Number, Integer> numStore = new MapLookup<Number, Integer>(new HashMap<Number, Integer>()); |
64 | | - store = new KeyDigestingValueStore<String, Number, Integer>(numStore, convert); |
65 | | - } else { //keep bytes, wrap in ByteArray |
66 | | - if (params.contains("wrap8")) digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(8)); |
67 | | - else if (params.contains("wrap6")) digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(6)); |
68 | | - else if (params.contains("wrap4")) digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(4)); |
69 | | - else if (params.contains("wrap4")) digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(4)); |
70 | | - |
71 | | - //create converter that includes wrapping the byte array in a ByteString |
72 | | - Functor<ByteString, String> convert = new Functor.Composite<ByteString, byte[], String>(digest, ByteString.wrap); |
73 | | - |
74 | | - //set up the store |
75 | | - MapLookup<ByteString, Integer> byteStore = new MapLookup<ByteString, Integer>(new HashMap<ByteString, Integer>()); |
76 | | - store = new KeyDigestingValueStore<String, ByteString, Integer>(byteStore, convert); |
77 | | - } |
78 | | - } else { |
79 | | - throw new IllegalArgumentException("bad store type: "+type+"; expected 'none' or 'string' or 'utf8' as part of the type spec"); |
80 | | - } |
81 | | - |
82 | 21 | BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(args[2]), "UTF-8")); |
83 | 22 | |
84 | 23 | Runtime.getRuntime().gc(); |
85 | | - Thread.currentThread().sleep(1000); |
| 24 | + Thread.sleep(1000); |
86 | 25 | long baseline = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); |
87 | 26 | |
88 | 27 | long start = System.nanoTime(); |
— | — | @@ -101,17 +40,11 @@ |
102 | 41 | System.out.format("Processed %d entries in %01.3f sec\n", c, t/1000000000.0); |
103 | 42 | |
104 | 43 | Runtime.getRuntime().gc(); |
105 | | - Thread.currentThread().sleep(1000); |
| 44 | + Thread.sleep(1000); |
106 | 45 | long m = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); |
107 | 46 | |
108 | 47 | System.out.format("Memoray used: %01.2f MB\n", (m - baseline)/(1024.0*1024.0)); |
109 | 48 | |
110 | 49 | if (store!=null) store.close(); |
111 | 50 | } |
112 | | - |
113 | | - private static Functor<byte[], byte[]> getHuffmanEncoder(String dictFile) throws IOException { |
114 | | - HuffmanDataCodec codec = new HuffmanDataCodec(); |
115 | | - codec.buildDictionary(new File(dictFile), 0); |
116 | | - return new Codec.Encoder<byte[], byte[]>(codec); |
117 | | - } |
118 | 51 | } |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/TitleSetFilter.java |
— | — | @@ -4,15 +4,12 @@ |
5 | 5 | import java.io.FileInputStream; |
6 | 6 | import java.io.IOException; |
7 | 7 | import java.io.InputStream; |
8 | | -import java.util.Map; |
9 | | -import java.util.Set; |
10 | 8 | |
11 | 9 | import de.brightbyte.data.KeyValueLookup; |
12 | 10 | import de.brightbyte.data.KeyValueStore; |
13 | 11 | import de.brightbyte.data.Lookup; |
14 | 12 | import de.brightbyte.data.cursor.DataCursor; |
15 | 13 | import de.brightbyte.data.filter.LookupFilter; |
16 | | -import de.brightbyte.data.filter.StaticSetFilter; |
17 | 14 | import de.brightbyte.io.LineCursor; |
18 | 15 | import de.brightbyte.util.PersistenceException; |
19 | 16 | import de.brightbyte.wikiword.builder.NameMaps; |
— | — | @@ -21,7 +18,7 @@ |
22 | 19 | protected final static Integer ONE = new Integer(1); |
23 | 20 | |
24 | 21 | protected static Lookup<String, Integer> slurpCursor(DataCursor<String> titleCursor) throws PersistenceException { |
25 | | - KeyValueStore<String, Integer> store = NameMaps.<Integer>newStore("string", "en"); //XXX: language... |
| 22 | + KeyValueStore<String, Integer> store = NameMaps.newStore("string", "en"); //XXX: language... |
26 | 23 | |
27 | 24 | String s; |
28 | 25 | while ((s = titleCursor.next()) != null) { |
— | — | @@ -50,7 +47,7 @@ |
51 | 48 | |
52 | 49 | @SuppressWarnings("unchecked") |
53 | 50 | public TitleSetFilter(String name, Lookup<String, Integer> titles) { |
54 | | - super(name, new LookupFilter<CharSequence, Integer>(titles, ONE)); |
| 51 | + super(name, new LookupFilter<String, Integer>(titles, ONE)); |
55 | 52 | } |
56 | 53 | |
57 | 54 | public TitleSetFilter(File titleFile, String enc) throws PersistenceException { |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/processor/PageTitleFilter.java |
— | — | @@ -4,10 +4,10 @@ |
5 | 5 | import de.brightbyte.wikiword.analyzer.WikiPage; |
6 | 6 | |
7 | 7 | public class PageTitleFilter implements WikiPageFilter { |
8 | | - protected Filter<CharSequence> filter; |
| 8 | + protected Filter<String> filter; |
9 | 9 | private String name; |
10 | 10 | |
11 | | - public PageTitleFilter(String name, Filter<CharSequence> filter) { |
| 11 | + public PageTitleFilter(String name, Filter<String> filter) { |
12 | 12 | if (filter==null) throw new NullPointerException(); |
13 | 13 | this.filter = filter; |
14 | 14 | this.name = name; |
— | — | @@ -15,7 +15,7 @@ |
16 | 16 | |
17 | 17 | public boolean matches(WikiPage page) { |
18 | 18 | CharSequence t = page.getResourceName(); |
19 | | - return filter.matches(t); |
| 19 | + return filter.matches(t.toString()); |
20 | 20 | } |
21 | 21 | |
22 | 22 | public String getName() { |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/NameMaps.java |
— | — | @@ -20,6 +20,7 @@ |
21 | 21 | import de.brightbyte.data.Functor; |
22 | 22 | import de.brightbyte.data.KeyDigestingValueStore; |
23 | 23 | import de.brightbyte.data.KeyValueStore; |
| 24 | +import de.brightbyte.data.LongIntLookup; |
24 | 25 | import de.brightbyte.data.MapLookup; |
25 | 26 | import de.brightbyte.data.XorFold32; |
26 | 27 | import de.brightbyte.data.XorFold64; |
— | — | @@ -42,8 +43,8 @@ |
43 | 44 | return new HashMap<String, V>(); |
44 | 45 | }*/ |
45 | 46 | |
46 | | - public static <V>KeyValueStore<String, V> newStore(String storeParams, String lang) { |
47 | | - KeyValueStore<String, V> store = null; |
| 47 | + public static KeyValueStore<String, Integer> newStore(String storeParams, String lang) { |
| 48 | + KeyValueStore<String, Integer> store = null; |
48 | 49 | |
49 | 50 | String[] tt = storeParams.split("[,;|+/ &]+"); |
50 | 51 | |
— | — | @@ -51,7 +52,7 @@ |
52 | 53 | params.addAll(Arrays.asList(tt)); |
53 | 54 | |
54 | 55 | if (params.contains("none") || params.contains("null")) store = null; |
55 | | - else if (params.contains("string")) store = new MapLookup<String, V>(new HashMap<String, V>()); |
| 56 | + else if (params.contains("string")) store = new MapLookup<String, Integer>(new HashMap<String, Integer>()); |
56 | 57 | else if (params.contains("utf8") || params.contains("utf16")) { |
57 | 58 | //initial digest turns string into UTF-8 bytes |
58 | 59 | Functor<byte[], String> digest; |
— | — | @@ -72,16 +73,19 @@ |
73 | 74 | throw new RuntimeException(e); |
74 | 75 | } |
75 | 76 | |
76 | | - if (params.contains("fold64") || params.contains("fold32")) { //fold into Long |
77 | | - Functor<? extends Number, byte[]> fold; |
| 77 | + if (params.contains("fold64")) { //fold into Long |
| 78 | + Functor<Long, byte[]> fold; |
| 79 | + fold = XorFold64.instance; |
78 | 80 | |
79 | | - if (params.contains("fold32")) fold = XorFold32.instance; |
80 | | - else fold = XorFold64.instance; |
81 | | - |
82 | | - Functor<Number, String> convert = new Functor.Composite<Number, byte[], String>(digest, fold); |
| 81 | + Functor<Long, String> convert = new Functor.Composite<Long, byte[], String>(digest, fold); |
83 | 82 | |
84 | | - MapLookup<Number, V> numStore = new MapLookup<Number, V>(new HashMap<Number, V>()); |
85 | | - store = new KeyDigestingValueStore<String, Number, V>(numStore, convert); |
| 83 | + if (params.contains("primitive")) { |
| 84 | + LongIntLookup<Long> numStore = new LongIntLookup<Long>(); |
| 85 | + store = new KeyDigestingValueStore<String, Long, Integer>(numStore, convert); |
| 86 | + } else { |
| 87 | + MapLookup<Long, Integer> numStore = new MapLookup<Long, Integer>(new HashMap<Long, Integer>()); |
| 88 | + store = new KeyDigestingValueStore<String, Long, Integer>(numStore, convert); |
| 89 | + } |
86 | 90 | } else { //keep bytes, wrap in ByteArray |
87 | 91 | if (params.contains("wrap8")) digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(8)); |
88 | 92 | else if (params.contains("wrap6")) digest = new Functor.Composite<byte[], byte[], String>(digest, new XorWrap(6)); |
— | — | @@ -92,8 +96,8 @@ |
93 | 97 | Functor<ByteString, String> convert = new Functor.Composite<ByteString, byte[], String>(digest, ByteString.wrap); |
94 | 98 | |
95 | 99 | //set up the store |
96 | | - MapLookup<ByteString, V> byteStore = new MapLookup<ByteString, V>(new HashMap<ByteString, V>()); |
97 | | - store = new KeyDigestingValueStore<String, ByteString, V>(byteStore, convert); |
| 100 | + MapLookup<ByteString, Integer> byteStore = new MapLookup<ByteString, Integer>(new HashMap<ByteString, Integer>()); |
| 101 | + store = new KeyDigestingValueStore<String, ByteString, Integer>(byteStore, convert); |
98 | 102 | } |
99 | 103 | } else { |
100 | 104 | throw new IllegalArgumentException("bad store spec: "+storeParams+"; expected 'none' or 'string' or 'utf8' or 'utf16' as part of the type spec"); |
Index: trunk/WikiWord/WikiWordBuilder/src/main/java/de/brightbyte/wikiword/builder/util/IdManagerBenchmark.java |
— | — | @@ -10,10 +10,7 @@ |
11 | 11 | import java.util.HashMap; |
12 | 12 | import java.util.Map; |
13 | 13 | |
14 | | -import javolution.util.FastMap; |
15 | | - |
16 | 14 | import org.ardverk.collection.PatriciaTrie; |
17 | | -import org.ardverk.collection.StringKeyAnalyzer; |
18 | 15 | |
19 | 16 | import de.brightbyte.audit.DebugUtil; |
20 | 17 | import de.brightbyte.data.Codec; |
— | — | @@ -68,8 +65,8 @@ |
69 | 66 | Map<String, Integer> map; |
70 | 67 | |
71 | 68 | if (mode.equals("hash")) map = new HashMap<String, Integer>(); |
72 | | - else if (mode.equals("fast")) map = new FastMap<String, Integer>(); |
73 | | - else if (mode.equals("trie")) map = new PatriciaTrie<String, Integer>(StringKeyAnalyzer.INSTANCE); |
| 69 | + //else if (mode.equals("fast")) map = new FastMap<String, Integer>(); |
| 70 | + //else if (mode.equals("trie")) map = new PatriciaTrie<String, Integer>(StringKeyAnalyzer.INSTANCE); |
74 | 71 | else if (mode.equals("rtrie")) map = new PatriciaTrie<String, Integer>(ReverseStringKeyAnalyzer.INSTANCE); |
75 | 72 | else if (mode.equals("terse")) map = new TerseIdMap<String>(String.class, NaturalComparator.<String>instance()); |
76 | 73 | else throw new IllegalArgumentException("unknown mode: "+mode); |
— | — | @@ -82,8 +79,8 @@ |
83 | 80 | CharsetCodec converter = new CharsetCodec(enc); |
84 | 81 | |
85 | 82 | if (mode.equals("hash")) map = new HashMap<byte[], Integer>(); |
86 | | - else if (mode.equals("fast")) map = new FastMap<byte[], Integer>(); |
87 | | - else if (mode.equals("trie")) map = new PatriciaTrie<byte[], Integer>(ByteArrayKeyAnalyzer.INSTANCE); |
| 83 | + //else if (mode.equals("fast")) map = new FastMap<byte[], Integer>(); |
| 84 | + //else if (mode.equals("trie")) map = new PatriciaTrie<byte[], Integer>(ByteArrayKeyAnalyzer.INSTANCE); |
88 | 85 | else if (mode.equals("rtrie")) throw new IllegalArgumentException("Reverte Trie is not yet supported for byte arrays"); |
89 | 86 | else if (mode.equals("terse")) map = new TerseIdMap<byte[]>(byte[].class, ArrayComparator.BYTES); |
90 | 87 | else throw new IllegalArgumentException("unknown mode: "+mode); |
Index: trunk/WikiWord/WikiWordBuilder/.classpath |
— | — | @@ -9,12 +9,7 @@ |
10 | 10 | <classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/3.8.1"/> |
11 | 11 | <classpathentry combineaccessrules="false" kind="src" path="/BrightByteDB"/> |
12 | 12 | <classpathentry kind="var" path="M2_REPO/mysql/mysql-connector-java/3.1.11/mysql-connector-java-3.1.11.jar"/> |
13 | | - <classpathentry kind="lib" path="lib/patricia-trie-0.1.jar" sourcepath="/home/daniel/src/patricia-trie-0.1/patricia-trie-0.1.jar"> |
14 | | - <attributes> |
15 | | - <attribute name="javadoc_location" value="file:/home/daniel/src/patricia-trie-0.1/api/"/> |
16 | | - </attributes> |
17 | | - </classpathentry> |
18 | | - <classpathentry kind="var" path="M2_REPO/org/javolution/javolution/5.2.6/javolution-5.2.6.jar"/> |
19 | | - <classpathentry kind="lib" path="lib/jzlib-1.0.7.jar" sourcepath="/home/daniel/src/jzlib-1.0.7"/> |
| 13 | + <classpathentry kind="var" path="M2_REPO/trove/trove/3.0.0a3/trove-3.0.0a3.jar"/> |
| 14 | + <classpathentry kind="var" path="M2_REPO/kapsi/patricia-trie/0.1/patricia-trie-0.1.jar"/> |
20 | 15 | <classpathentry kind="output" path="bin"/> |
21 | 16 | </classpath> |
Index: trunk/WikiWord/WikiWordBuilder/contrib/patricia-trie-0.1.pom |
— | — | @@ -0,0 +1,9 @@ |
| 2 | +<project xmlns="http://maven.apache.org/POM/4.0.0" |
| 3 | + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
| 4 | + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
| 5 | + |
| 6 | + <modelVersion>4.0.0</modelVersion> |
| 7 | + <groupId>kapsi</groupId> |
| 8 | + <artifactId>patricia-trie</artifactId> |
| 9 | + <version>0.1</version> |
| 10 | +</project> |
Index: trunk/WikiWord/WikiWordBuilder/contrib/patricia-trie-0.1-LICENSE-2.0.txt |
— | — | @@ -0,0 +1,202 @@ |
| 2 | + |
| 3 | + Apache License |
| 4 | + Version 2.0, January 2004 |
| 5 | + http://www.apache.org/licenses/ |
| 6 | + |
| 7 | + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION |
| 8 | + |
| 9 | + 1. Definitions. |
| 10 | + |
| 11 | + "License" shall mean the terms and conditions for use, reproduction, |
| 12 | + and distribution as defined by Sections 1 through 9 of this document. |
| 13 | + |
| 14 | + "Licensor" shall mean the copyright owner or entity authorized by |
| 15 | + the copyright owner that is granting the License. |
| 16 | + |
| 17 | + "Legal Entity" shall mean the union of the acting entity and all |
| 18 | + other entities that control, are controlled by, or are under common |
| 19 | + control with that entity. For the purposes of this definition, |
| 20 | + "control" means (i) the power, direct or indirect, to cause the |
| 21 | + direction or management of such entity, whether by contract or |
| 22 | + otherwise, or (ii) ownership of fifty percent (50%) or more of the |
| 23 | + outstanding shares, or (iii) beneficial ownership of such entity. |
| 24 | + |
| 25 | + "You" (or "Your") shall mean an individual or Legal Entity |
| 26 | + exercising permissions granted by this License. |
| 27 | + |
| 28 | + "Source" form shall mean the preferred form for making modifications, |
| 29 | + including but not limited to software source code, documentation |
| 30 | + source, and configuration files. |
| 31 | + |
| 32 | + "Object" form shall mean any form resulting from mechanical |
| 33 | + transformation or translation of a Source form, including but |
| 34 | + not limited to compiled object code, generated documentation, |
| 35 | + and conversions to other media types. |
| 36 | + |
| 37 | + "Work" shall mean the work of authorship, whether in Source or |
| 38 | + Object form, made available under the License, as indicated by a |
| 39 | + copyright notice that is included in or attached to the work |
| 40 | + (an example is provided in the Appendix below). |
| 41 | + |
| 42 | + "Derivative Works" shall mean any work, whether in Source or Object |
| 43 | + form, that is based on (or derived from) the Work and for which the |
| 44 | + editorial revisions, annotations, elaborations, or other modifications |
| 45 | + represent, as a whole, an original work of authorship. For the purposes |
| 46 | + of this License, Derivative Works shall not include works that remain |
| 47 | + separable from, or merely link (or bind by name) to the interfaces of, |
| 48 | + the Work and Derivative Works thereof. |
| 49 | + |
| 50 | + "Contribution" shall mean any work of authorship, including |
| 51 | + the original version of the Work and any modifications or additions |
| 52 | + to that Work or Derivative Works thereof, that is intentionally |
| 53 | + submitted to Licensor for inclusion in the Work by the copyright owner |
| 54 | + or by an individual or Legal Entity authorized to submit on behalf of |
| 55 | + the copyright owner. For the purposes of this definition, "submitted" |
| 56 | + means any form of electronic, verbal, or written communication sent |
| 57 | + to the Licensor or its representatives, including but not limited to |
| 58 | + communication on electronic mailing lists, source code control systems, |
| 59 | + and issue tracking systems that are managed by, or on behalf of, the |
| 60 | + Licensor for the purpose of discussing and improving the Work, but |
| 61 | + excluding communication that is conspicuously marked or otherwise |
| 62 | + designated in writing by the copyright owner as "Not a Contribution." |
| 63 | + |
| 64 | + "Contributor" shall mean Licensor and any individual or Legal Entity |
| 65 | + on behalf of whom a Contribution has been received by Licensor and |
| 66 | + subsequently incorporated within the Work. |
| 67 | + |
| 68 | + 2. Grant of Copyright License. Subject to the terms and conditions of |
| 69 | + this License, each Contributor hereby grants to You a perpetual, |
| 70 | + worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
| 71 | + copyright license to reproduce, prepare Derivative Works of, |
| 72 | + publicly display, publicly perform, sublicense, and distribute the |
| 73 | + Work and such Derivative Works in Source or Object form. |
| 74 | + |
| 75 | + 3. Grant of Patent License. Subject to the terms and conditions of |
| 76 | + this License, each Contributor hereby grants to You a perpetual, |
| 77 | + worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
| 78 | + (except as stated in this section) patent license to make, have made, |
| 79 | + use, offer to sell, sell, import, and otherwise transfer the Work, |
| 80 | + where such license applies only to those patent claims licensable |
| 81 | + by such Contributor that are necessarily infringed by their |
| 82 | + Contribution(s) alone or by combination of their Contribution(s) |
| 83 | + with the Work to which such Contribution(s) was submitted. If You |
| 84 | + institute patent litigation against any entity (including a |
| 85 | + cross-claim or counterclaim in a lawsuit) alleging that the Work |
| 86 | + or a Contribution incorporated within the Work constitutes direct |
| 87 | + or contributory patent infringement, then any patent licenses |
| 88 | + granted to You under this License for that Work shall terminate |
| 89 | + as of the date such litigation is filed. |
| 90 | + |
| 91 | + 4. Redistribution. You may reproduce and distribute copies of the |
| 92 | + Work or Derivative Works thereof in any medium, with or without |
| 93 | + modifications, and in Source or Object form, provided that You |
| 94 | + meet the following conditions: |
| 95 | + |
| 96 | + (a) You must give any other recipients of the Work or |
| 97 | + Derivative Works a copy of this License; and |
| 98 | + |
| 99 | + (b) You must cause any modified files to carry prominent notices |
| 100 | + stating that You changed the files; and |
| 101 | + |
| 102 | + (c) You must retain, in the Source form of any Derivative Works |
| 103 | + that You distribute, all copyright, patent, trademark, and |
| 104 | + attribution notices from the Source form of the Work, |
| 105 | + excluding those notices that do not pertain to any part of |
| 106 | + the Derivative Works; and |
| 107 | + |
| 108 | + (d) If the Work includes a "NOTICE" text file as part of its |
| 109 | + distribution, then any Derivative Works that You distribute must |
| 110 | + include a readable copy of the attribution notices contained |
| 111 | + within such NOTICE file, excluding those notices that do not |
| 112 | + pertain to any part of the Derivative Works, in at least one |
| 113 | + of the following places: within a NOTICE text file distributed |
| 114 | + as part of the Derivative Works; within the Source form or |
| 115 | + documentation, if provided along with the Derivative Works; or, |
| 116 | + within a display generated by the Derivative Works, if and |
| 117 | + wherever such third-party notices normally appear. The contents |
| 118 | + of the NOTICE file are for informational purposes only and |
| 119 | + do not modify the License. You may add Your own attribution |
| 120 | + notices within Derivative Works that You distribute, alongside |
| 121 | + or as an addendum to the NOTICE text from the Work, provided |
| 122 | + that such additional attribution notices cannot be construed |
| 123 | + as modifying the License. |
| 124 | + |
| 125 | + You may add Your own copyright statement to Your modifications and |
| 126 | + may provide additional or different license terms and conditions |
| 127 | + for use, reproduction, or distribution of Your modifications, or |
| 128 | + for any such Derivative Works as a whole, provided Your use, |
| 129 | + reproduction, and distribution of the Work otherwise complies with |
| 130 | + the conditions stated in this License. |
| 131 | + |
| 132 | + 5. Submission of Contributions. Unless You explicitly state otherwise, |
| 133 | + any Contribution intentionally submitted for inclusion in the Work |
| 134 | + by You to the Licensor shall be under the terms and conditions of |
| 135 | + this License, without any additional terms or conditions. |
| 136 | + Notwithstanding the above, nothing herein shall supersede or modify |
| 137 | + the terms of any separate license agreement you may have executed |
| 138 | + with Licensor regarding such Contributions. |
| 139 | + |
| 140 | + 6. Trademarks. This License does not grant permission to use the trade |
| 141 | + names, trademarks, service marks, or product names of the Licensor, |
| 142 | + except as required for reasonable and customary use in describing the |
| 143 | + origin of the Work and reproducing the content of the NOTICE file. |
| 144 | + |
| 145 | + 7. Disclaimer of Warranty. Unless required by applicable law or |
| 146 | + agreed to in writing, Licensor provides the Work (and each |
| 147 | + Contributor provides its Contributions) on an "AS IS" BASIS, |
| 148 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
| 149 | + implied, including, without limitation, any warranties or conditions |
| 150 | + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A |
| 151 | + PARTICULAR PURPOSE. You are solely responsible for determining the |
| 152 | + appropriateness of using or redistributing the Work and assume any |
| 153 | + risks associated with Your exercise of permissions under this License. |
| 154 | + |
| 155 | + 8. Limitation of Liability. In no event and under no legal theory, |
| 156 | + whether in tort (including negligence), contract, or otherwise, |
| 157 | + unless required by applicable law (such as deliberate and grossly |
| 158 | + negligent acts) or agreed to in writing, shall any Contributor be |
| 159 | + liable to You for damages, including any direct, indirect, special, |
| 160 | + incidental, or consequential damages of any character arising as a |
| 161 | + result of this License or out of the use or inability to use the |
| 162 | + Work (including but not limited to damages for loss of goodwill, |
| 163 | + work stoppage, computer failure or malfunction, or any and all |
| 164 | + other commercial damages or losses), even if such Contributor |
| 165 | + has been advised of the possibility of such damages. |
| 166 | + |
| 167 | + 9. Accepting Warranty or Additional Liability. While redistributing |
| 168 | + the Work or Derivative Works thereof, You may choose to offer, |
| 169 | + and charge a fee for, acceptance of support, warranty, indemnity, |
| 170 | + or other liability obligations and/or rights consistent with this |
| 171 | + License. However, in accepting such obligations, You may act only |
| 172 | + on Your own behalf and on Your sole responsibility, not on behalf |
| 173 | + of any other Contributor, and only if You agree to indemnify, |
| 174 | + defend, and hold each Contributor harmless for any liability |
| 175 | + incurred by, or claims asserted against, such Contributor by reason |
| 176 | + of your accepting any such warranty or additional liability. |
| 177 | + |
| 178 | + END OF TERMS AND CONDITIONS |
| 179 | + |
| 180 | + APPENDIX: How to apply the Apache License to your work. |
| 181 | + |
| 182 | + To apply the Apache License to your work, attach the following |
| 183 | + boilerplate notice, with the fields enclosed by brackets "[]" |
| 184 | + replaced with your own identifying information. (Don't include |
| 185 | + the brackets!) The text should be enclosed in the appropriate |
| 186 | + comment syntax for the file format. We also recommend that a |
| 187 | + file or class name and description of purpose be included on the |
| 188 | + same "printed page" as the copyright notice for easier |
| 189 | + identification within third-party archives. |
| 190 | + |
| 191 | + Copyright [yyyy] [name of copyright owner] |
| 192 | + |
| 193 | + Licensed under the Apache License, Version 2.0 (the "License"); |
| 194 | + you may not use this file except in compliance with the License. |
| 195 | + You may obtain a copy of the License at |
| 196 | + |
| 197 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 198 | + |
| 199 | + Unless required by applicable law or agreed to in writing, software |
| 200 | + distributed under the License is distributed on an "AS IS" BASIS, |
| 201 | + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 202 | + See the License for the specific language governing permissions and |
| 203 | + limitations under the License. |
Index: trunk/WikiWord/WikiWordBuilder/contrib/patricia-trie-0.1.jar |
Cannot display: file marked as a binary type. |
svn:mime-type = application/octet-stream |
Property changes on: trunk/WikiWord/WikiWordBuilder/contrib/patricia-trie-0.1.jar |
___________________________________________________________________ |
Name: svn:mime-type |
1 | 204 | + application/octet-stream |
Index: trunk/WikiWord/WikiWordBuilder/contrib/patricia-trie-0.1.url |
— | — | @@ -0,0 +1 @@ |
| 2 | +http://code.google.com/p/patricia-trie/ |
Index: trunk/WikiWord/WikiWordBuilder/contrib/install-all |
— | — | @@ -0,0 +1,6 @@ |
| 2 | +#!/bin/bash |
| 3 | + |
| 4 | +for pom in *.pom; do |
| 5 | + jar="${pom%%.pom}.jar" |
| 6 | + mvn install:install-file -Dfile="$jar" -DpomFile="$pom" |
| 7 | +done |
Property changes on: trunk/WikiWord/WikiWordBuilder/contrib/install-all |
___________________________________________________________________ |
Name: svn:executable |
1 | 8 | + * |
Index: trunk/WikiWord/WikiWordBuilder/contrib/README |
— | — | @@ -0,0 +1,7 @@ |
| 2 | +This directory contains libraries that are not readily available from |
| 3 | +a well known Maven repository. For each jar file, there should be |
| 4 | +a corresponding pom file as well as a license text. |
| 5 | + |
| 6 | +Before using Apache Maven, please install these libraries into your local |
| 7 | +maven repository. You can use the script "install-all" to do this. Maven |
| 8 | +will automatically download any other libraries required for building. |
\ No newline at end of file |
Property changes on: trunk/WikiWord/WikiWordBuilder/contrib |
___________________________________________________________________ |
Name: svn:mergeinfo |
1 | 9 | + |
Index: trunk/WikiWord/WikiWordBuilder/tweaks.properties.sample |
— | — | @@ -86,9 +86,10 @@ |
87 | 87 | #idStoreParameters: |
88 | 88 | # basic: string (default), utf8, or utf16 |
89 | 89 | # for utf8 and utf16: md5, sha1, or huffman (or nothing) |
90 | | -# for utf8 and utf16: wrap8, fold64 |
91 | | -# "utf16+md5+fold64" uses about half as much memory as "string" |
92 | | -#dbstore.idManager.idStoreParameters="utf16+md5+fold64" |
| 90 | +# for utf8 and utf16: wrap8 (wrap to 8 bytes), fold64 (wrap to single long value) |
| 91 | +# for fold64: primitive (use gnu trove primitive hash) |
| 92 | +# "utf16+md5+fold64+primitive" uses about one third of the memory used by "string" |
| 93 | +#dbstore.idManager.idStoreParameters="utf16+md5+fold64+primitive" |
93 | 94 | |
94 | 95 | ### CycleFinder ##################################### |
95 | 96 | dbstore.CycleFinder.levelWarningThreshold=32 |