[Mulgara-svn] r872 - in projects/xa2/object-pool: scratch src src/trie
andrae at mulgara.org
andrae at mulgara.org
Thu May 1 04:46:00 UTC 2008
Author: andrae
Date: 2008-04-30 21:45:55 -0700 (Wed, 30 Apr 2008)
New Revision: 872
Added:
projects/xa2/object-pool/scratch/MemoryTrie.java
projects/xa2/object-pool/scratch/MemoryTrieTest.java
projects/xa2/object-pool/src/trie/
projects/xa2/object-pool/src/trie/ByteMap.java
projects/xa2/object-pool/src/trie/ByteMapTest.java
projects/xa2/object-pool/src/trie/CompBlockTrie.java
projects/xa2/object-pool/src/trie/CompBlockTrieTest.java
projects/xa2/object-pool/src/trie/CompMemTrie.java
projects/xa2/object-pool/src/trie/CompMemTrieTest.java
projects/xa2/object-pool/src/trie/OnesTable.dat
Removed:
projects/xa2/object-pool/src/ByteMap.java
projects/xa2/object-pool/src/ByteMapTest.java
projects/xa2/object-pool/src/CompBlockTrie.java
projects/xa2/object-pool/src/CompBlockTrieTest.java
projects/xa2/object-pool/src/CompMemTrie.java
projects/xa2/object-pool/src/CompMemTrieTest.java
projects/xa2/object-pool/src/MemoryTrie.java
projects/xa2/object-pool/src/MemoryTrieTest.java
projects/xa2/object-pool/src/OnesTable.dat
Log:
Started breaking out the classes into packages.
Copied: projects/xa2/object-pool/scratch/MemoryTrie.java (from rev 868, projects/xa2/object-pool/src/MemoryTrie.java)
===================================================================
--- projects/xa2/object-pool/scratch/MemoryTrie.java (rev 0)
+++ projects/xa2/object-pool/scratch/MemoryTrie.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -0,0 +1,239 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 9th April 2008
+ */
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Implements a basic in-memory trie - uses HashMaps to implement trie nodes.
+ */
+public class MemoryTrie {
+ @SuppressWarnings("serial")
+ public static class NotFound extends Exception {};
+
+ private TrieNode root;
+
+ public MemoryTrie() {
+ this.root = null;
+ }
+
+ public void insert(byte[] key, long value) {
+// System.err.println("MemoryTrie # Inserting: " + Arrays.hashCode(key) + " :: " + value);
+ if (root == null) {
+ root = new TrieLeaf(key, value);
+ } else {
+ try {
+ root.insert(key, value);
+ } catch (TrieNode.InsertAbove k) {
+ root = new TrieBranch(root, key, value);
+ }
+ }
+ }
+
+ public long lookup(byte[] key) throws NotFound {
+// System.err.println("MemoryTrie # Lookup: " + Arrays.hashCode(key));
+ if (root == null) {
+ throw new NotFound();
+ }
+
+ return root.lookup(key);
+ }
+
+ private abstract static class TrieNode {
+ @SuppressWarnings("serial")
+ protected static class InsertAbove extends Exception {} ;
+ protected static final InsertAbove cont = new InsertAbove();
+
+ protected TrieLeaf least;
+
+ public void insert(byte[] key, long value) throws InsertAbove {
+ insert(new TrieLeaf(key, value), 0);
+ }
+
+ public long lookup(byte[] key) throws NotFound {
+ return lookup(key, 0);
+ }
+
+ protected abstract void insert(TrieLeaf node, int parentLcd) throws InsertAbove;
+ protected abstract long lookup(byte[] key, int parentLcd) throws NotFound;
+
+ protected static boolean regionMatches(byte[] lhs, int lhsOff, byte[] rhs, int rhsOff, int len) {
+ if (lhsOff < 0 || rhsOff < 0) {
+ return false;
+ }
+ if (lhs.length < lhsOff + len || rhs.length < rhsOff + len) {
+ return false;
+ }
+ for (int i = 0; i < len; i++) {
+ if (lhs[lhsOff + i] != rhs[rhsOff + i]) return false;
+ }
+
+ return true;
+ }
+ }
+
+ private static class TrieBranch extends TrieNode {
+ private int offset;
+ private Map<Byte, TrieNode> children;
+ private TrieLeaf term;
+
+ public TrieBranch(byte[] key, long value) {
+ super();
+ this.children = new HashMap<Byte, TrieNode>();
+ this.term = new TrieLeaf(key, value);
+ this.offset = term.key.length;
+ this.least = this.term;
+ }
+
+ public TrieBranch(TrieNode oldRoot, byte[] key, long value) {
+ this(oldRoot, new TrieLeaf(key, value));
+ }
+
+ private TrieBranch(TrieNode oldNode, TrieLeaf newNode) {
+ super();
+ assert oldNode != null;
+ assert newNode != null;
+
+ offset = 0;
+ children = new HashMap<Byte, TrieNode>();
+ term = null;
+ least = null;
+
+ byte[] lhs = oldNode.least.key;
+ byte[] rhs = newNode.key;
+
+ int i = 0;
+ while (i < lhs.length && i < rhs.length) {
+ if (lhs[i] != rhs[i]) {
+ offset = i;
+ children.put(lhs[i], oldNode);
+ children.put(rhs[i], newNode);
+
+ least = lhs[i] < rhs[i] ? oldNode.least : newNode;
+
+ return; // Escape here.
+ }
+ i++;
+ }
+
+ if (i < lhs.length) {
+ offset = i;
+ children.put(lhs[i], oldNode);
+ term = newNode;
+ least = newNode;
+ } else if (i < rhs.length) {
+ if (oldNode instanceof TrieLeaf) {
+ offset = i;
+ children.put(rhs[i], newNode);
+ term = (TrieLeaf)oldNode;
+ least = (TrieLeaf)oldNode;
+ } else {
+ throw new IllegalStateException("Attempt to create new branch node with leaf > branch.");
+ }
+ } else {
+ throw new IllegalStateException("Attempt to create new branch node with equal children");
+ }
+ }
+
+ protected void insert(TrieLeaf node, int parentLcp) throws InsertAbove {
+ if (!regionMatches(least.key, parentLcp, node.key, parentLcp, offset - parentLcp)) {
+ throw cont;
+ } else {
+ // new node matches the lcp of this node.
+ if (node.key.length == offset) {
+ // new node is expected to terminate here.
+ if (term == null) {
+ term = node;
+ least = node;
+ } else {
+ term.insert(node, offset);
+ }
+ } else {
+ // new node is expected to terminate in one of this nodes children.
+ TrieNode child = children.get(node.key[offset]);
+ if (child == null) {
+ // this is the first node to be inserted on this branching key.
+ children.put(node.key[offset], node);
+ } else {
+ try {
+ // there is an existing child node branching on this branching key.
+ child.insert(node, offset);
+ } catch (InsertAbove k) {
+ // as every child node shares a common lcp, any child will suffice when preparing the new
+ // lcp/offset of the new node. The least node is only required as the new parent's least node
+ // will be the smallest of the inserted node and this node's least node.
+ children.put(node.key[offset], new TrieBranch(child, node));
+ }
+ }
+ }
+ }
+ }
+
+ protected long lookup(byte[] key, int parentLcd) throws NotFound {
+ if (!regionMatches(least.key, parentLcd, key, parentLcd, offset - parentLcd)) {
+ throw new NotFound();
+ } else {
+ // new node matches the lcp of this node.
+ TrieNode child;
+ if (key.length == offset) {
+ // new node is expected to terminate here.
+ if (term != null) {
+ return term.value;
+ } else {
+ throw new NotFound();
+ }
+ } else {
+ // new node is expected to terminate in one of this nodes children.
+ child = children.get(key[offset]);
+ if (child != null) {
+ return child.lookup(key, offset);
+ } else {
+ throw new NotFound();
+ }
+ }
+ }
+ }
+
+ public String toString() {
+ return "Trie-BRANCH[" + (term != null) + " on " + offset + " with " + children.size() + " and least[" + least + "]";
+ }
+ }
+
+ private static class TrieLeaf extends TrieNode {
+ final byte[] key;
+ final long value;
+
+ public TrieLeaf(byte[] key, long value) {
+ super();
+ this.key = new byte[key.length];
+ System.arraycopy(key, 0, this.key, 0, key.length);
+ this.value = value;
+ this.least = this;
+ }
+
+ protected void insert(TrieLeaf node, int parentLcp) throws InsertAbove {
+ if (key.length != node.key.length) {
+ throw cont;
+ } else if (!regionMatches(key, parentLcp, node.key, parentLcp, key.length - parentLcp)) {
+ throw cont;
+ } else if (value == node.value) {
+ return; // Duplicate key/value pair.
+ } else {
+ throw new IllegalArgumentException("Attempt to insert multiple values for same key");
+ }
+ }
+
+ protected long lookup(byte[] key, int parentLcd) {
+ assert Arrays.equals(this.key, key);
+ return value;
+ }
+
+ public String toString() {
+ return "Trie-LEAF: " + Arrays.toString(key) + " -> " + value;
+ }
+ }
+}
Copied: projects/xa2/object-pool/scratch/MemoryTrieTest.java (from rev 868, projects/xa2/object-pool/src/MemoryTrieTest.java)
===================================================================
--- projects/xa2/object-pool/scratch/MemoryTrieTest.java (rev 0)
+++ projects/xa2/object-pool/scratch/MemoryTrieTest.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -0,0 +1,283 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 9th April 2008
+ */
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+/**
+ * Basic tests of the MemoryTrie.
+ */
+public class MemoryTrieTest {
+ public static void main(String[] args) throws Exception {
+ testWithFile(new File("../scratch/propernames.uniq"));
+ testWithFile(new File("../scratch/connectives.uniq"));
+ testWithFile(new File("../scratch/web2a.uniq"));
+ testWithFile(new File("../scratch/web2.uniq"));
+ testWithRandomFile(new File("/dev/urandom"));
+// testLoadWithRandomFile(new File("/dev/urandom"));
+ }
+
+ public static void testWithFile(File file) throws Exception {
+ Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
+ MemoryTrie namesTrie = new MemoryTrie();
+
+ System.out.println("Inserting lines from " + file);
+ BufferedReader names = new BufferedReader(new FileReader(file));
+ long n = 0;
+ String name = names.readLine();
+ long _startInsert = System.currentTimeMillis();
+ while (name != null) {
+ namesMap.put(name.getBytes(), n);
+ namesTrie.insert(name.getBytes(), n);
+ name = names.readLine();
+ n++;
+ }
+ long _endInsert = System.currentTimeMillis();
+ names.close();
+
+ System.out.println("Checking lines from " + file);
+ long _startLookup = System.currentTimeMillis();
+ for (byte[] key : namesMap.keySet()) {
+ if (namesTrie.lookup(key) != namesMap.get(key)) {
+ throw new IllegalStateException("Trie doesn't match Map");
+ }
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static class Bytes {
+ public final byte[] bytes;
+
+ public Bytes(byte[] bytes) {
+ this.bytes = new byte[bytes.length];
+ System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+ }
+
+ public boolean equals(Object o) {
+// System.err.print("Bytes::equals() # ");
+ if (o instanceof Bytes) {
+// System.err.println("Comparing " + Arrays.hashCode(bytes) + " with " + Arrays.hashCode(((Bytes)o).bytes));
+ return Arrays.equals(bytes, ((Bytes)o).bytes);
+ } else {
+ return false;
+ }
+ }
+
+ public int hashCode() {
+ int hc = Arrays.hashCode(bytes);
+// System.err.println("Bytes # Calculated hashcode for: " + hc);
+ return hc;
+ }
+ }
+
+ public static void testWithRandomFile(File file) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ MemoryTrie namesTrie = new MemoryTrie();
+
+ System.out.println("Inserting random bytes from " + file);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nL = 0;
+ int nS = 0;
+
+ while (n < 5000) {
+ if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _si = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateL += System.currentTimeMillis() - _si;
+ _avlL += key.length;
+ nL++;
+ }
+
+ for (int i = 0; i < 10; i++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.println(" " + nL + " long entries ave: " + (_avlL / nL) + " in: " + _aggregateL + "ms");
+ System.out.println(" " + nS + " short entries ave: " + (_avlS / nS) + " in: " + _aggregateS + "ms");
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map");
+ }
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static void testLoadWithRandomFile(File file) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ MemoryTrie namesTrie = new MemoryTrie();
+
+ System.out.println("Inserting random bytes from " + file);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ int nL = 0;
+ long _aggregateLL = 0;
+ long _avlLL = 0;
+ int nLL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nS = 0;
+ long _aggregateSS = 0;
+ long _avlSS = 0;
+ int nSS = 0;
+
+ for (int i = 0; i < 100; i++) {
+ if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[buffer.getInt() & 0x0007FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sll = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateLL += System.currentTimeMillis() - _sll;
+ _avlLL += key.length;
+ nLL++;
+ }
+
+ for (int j = 0; j < 10; j++) {
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sl = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateL += System.currentTimeMillis() - _sl;
+ _avlL += key.length;
+ nL++;
+ }
+
+ for (int k = 0; k < 10; k++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+ }
+
+ for (int ii = 0; ii < 10; ii++) {
+ key = new byte[((int)buffer.get()) & 0x0000001F];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateSS += System.currentTimeMillis() - _sss;
+ _avlSS += key.length;
+ nSS++;
+ }
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.println(" " + nLL + " very long entries ave: " + (_avlLL / nLL) + " in: " + _aggregateLL + "ms");
+ System.out.println(" " + nL + " long entries ave: " + (_avlL / nL) + " in: " + _aggregateL + "ms");
+ System.out.println(" " + nS + " short entries ave: " + (_avlS / nS) + " in: " + _aggregateS + "ms");
+ System.out.println(" " + nSS + " very short entries ave: " + (_avlSS / nSS) + " in: " + _aggregateSS + "ms");
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map");
+ }
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+}
Deleted: projects/xa2/object-pool/src/ByteMap.java
===================================================================
--- projects/xa2/object-pool/src/ByteMap.java 2008-04-30 08:59:06 UTC (rev 871)
+++ projects/xa2/object-pool/src/ByteMap.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -1,172 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 8th April 2008
- */
-
-import static gen.OnesTable.*;
-
-import java.nio.ByteBuffer;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NoSuchElementException;
-
-
-/**
- * Represents a map from a byte to data.
- * ie. a compressed 256 element array.
- */
-public class ByteMap<T> implements Iterable<T> {
- short high;
- short[] low;
- T[] data;
-
- @SuppressWarnings("unchecked")
- public ByteMap() {
- high = 0;
- low = new short[0];
- data = (T[])new Object[0];
- }
-
- @SuppressWarnings("unchecked")
- public ByteMap(ByteBuffer index, Map<Short, T> nodeMap) {
- high = index.getShort();
- low = new short[ones(high)];
- short dataCount = 0;
- for (int i = 0; i < low.length; i++) {
- low[i] = index.getShort();
- dataCount += ones(low[i]);
- }
- data = (T[])new Object[dataCount];
- for (int i = 0; i < data.length; i++) {
- short t = index.getShort();
- data[i] = nodeMap.get(t);
- }
- }
-
- /**
- * @param datum Value to be associated with key. Note: Must not be null.
- * @return the old value associated with key, or null if this is a new mapping.
- */
- @SuppressWarnings("unchecked")
- public T insert(byte key, T datum) {
- if (datum == null) {
- throw new IllegalArgumentException("Attempted to insert null into ByteMap");
- }
-
- int h = hiNibbleAsBitInShort(key);
- if ((high & h) != 0) {
- // We already have a low entry for this hi-nibble.
- int lowOffset = ones(HIGH, high, key);
- int dataOffset = 0;
- for (int i = 0; i < lowOffset; i++) {
- dataOffset += ones(low[i]);
- }
- dataOffset += ones(LOW, low[lowOffset], key);
-
- int l = lowNibbleAsBitInShort(key);
- if ((low[lowOffset] & l) != 0) {
- // We already have a data entry for this byte.
- T t = data[dataOffset];
- data[dataOffset] = datum;
-
- return t;
- } else {
- // We need to insert a data entry for this byte.
- // Extend data array.
- T[] newData = (T[])new Object[data.length + 1];
- System.arraycopy(data, 0, newData, 0, dataOffset);
- System.arraycopy(data, dataOffset, newData, dataOffset + 1, data.length - dataOffset);
- data = newData;
- // Set new entry in data array; indicate it in low entry.
- low[lowOffset] = (short)(low[lowOffset] | l);
- data[dataOffset] = datum;
-
- return null;
- }
- } else {
- // We need to insert a new low entry for this hi-nibble.
- short[] newLow = new short[low.length + 1];
- int lowOffset = ones(HIGH, high, key);
- System.arraycopy(low, 0, newLow, 0, lowOffset);
- System.arraycopy(low, lowOffset, newLow, lowOffset + 1, low.length - lowOffset);
- high = (short)(high | h);
- low = newLow;
-
- return insert(key, datum);
- }
- }
-
- /**
- * Returns the forwarding pointer corresponding to the key in this trie node.
- * Note: The result is a *signed* short. +ve pointers correspond to references to Trie-Nodes; -ve
- * pointers correspond to references to child pointers.
- * @return Value corresponding to key; or null if not found.
- */
- public T lookup(byte key) {
- if ((high & hiNibbleAsBitInShort(key)) == 0) {
- // We didn't find the high nibble in this map.
- return null;
- }
-
- // the low-nibble index is the ones left of the key's high-nibble bit in high.
- int lowOffset = ones(HIGH, high, key);
-
- // the ptrs index is the sum of the ones in the low-nibble array prior to low-nibble index + the ones
- // left of the key's low-nibble bit in low[lowOffset].
- if (lowOffset >= low.length) {
- return null;
- }
- if ((low[lowOffset] & lowNibbleAsBitInShort(key)) == 0) {
- // The high nibble matched, but the low nibble didn't.
- return null;
- }
-
- int dataOffset = 0;
- for (int i = 0; i < lowOffset; i++) {
- dataOffset += ones(low[i]);
- }
- dataOffset += ones(LOW, low[lowOffset], key);
-
- if (dataOffset >= data.length) {
- return null;
- } else {
- // the resulting index is a direct index into the forwarding-pointer array.
- return data[dataOffset];
- }
- }
-
- public Iterator<T> iterator() {
- return new Iterator<T>() {
- private int index = 0;
- public boolean hasNext() { return index < data.length; }
- public T next() {
- if (index < data.length) {
- return data[index++];
- } else {
- throw new NoSuchElementException("Data Iterator in ByteMap");
- }
- }
- public void remove() { throw new UnsupportedOperationException("Data Iterator in ByteMap"); }
- };
- }
-
- public int size() {
- return data.length;
- }
-
- public int memSize() {
- return 2 + 2 * low.length + 2 * data.length;
- }
-
- public int headerSize() {
- return 2 + 2 * low.length;
- }
-
- public void writeHeader(ByteBuffer bb) {
- bb.putShort(high);
- for (int i = 0; i < low.length; i++) {
- bb.putShort(low[i]);
- }
- }
-}
Deleted: projects/xa2/object-pool/src/ByteMapTest.java
===================================================================
--- projects/xa2/object-pool/src/ByteMapTest.java 2008-04-30 08:59:06 UTC (rev 871)
+++ projects/xa2/object-pool/src/ByteMapTest.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -1,182 +0,0 @@
-
-
-/**
- * Represents a map from a byte to data.
- * ie. a compressed 256 element array.
- */
-public class ByteMapTest {
- public static void main(String[] args) throws Exception {
- testEmpty();
- testSequentialImmediate();
- testSequential();
- testAlternateImmediate();
- testAlternate();
- testAlternateLowNibbleImmediate();
- testLookupNonExistentLow();
- testLookupMissedHighMatchedLow();
- testLookupMatchedHighMissedLow();
- }
-
- public static void testEmpty() throws Exception {
- System.out.println("running testEmpty...");
- ByteMap<Long> byteMap = new ByteMap<Long>();
-
- for (int i = 0; i < 0x00000100; i++) {
- assertNull("Found result for i = " + i + " in empty map", byteMap.lookup((byte)i));
- }
- System.out.println("testEmpty Successful");
- }
-
- public static void testSequentialImmediate() throws Exception {
- System.out.println("running testSequentialImmediate...");
- ByteMap<Long> byteMap = new ByteMap<Long>();
-
- for (int i = 0; i < 0x00000100; i++) {
- assertNull("Found double entry for i = " + i + " in sequential map", byteMap.insert((byte)i, new Long(i)));
- Long l = byteMap.lookup((byte)i);
- assertEquals("Found mismatching entry for i = " + i + " in sequential map: Received " + l, new Long(i), l);
- }
- System.out.println("testSequentialImmediate Successful");
- }
-
- public static void testSequential() throws Exception {
- System.out.println("running testSequential...");
- ByteMap<Long> byteMap = new ByteMap<Long>();
-
- for (int i = 0; i < 0x00000100; i++) {
- assertNull("Found double entry for i = " + i + " in sequential map", byteMap.insert((byte)i, new Long(i)));
- }
-
- for (int i = 0; i < 0x00000100; i++) {
- Long l = byteMap.lookup((byte)i);
- assertEquals("Found mismatching entry for i = " + i + " in sequential map: Received " + l, new Long(i), l);
- }
- System.out.println("testSequential Successful");
- }
-
- public static void testAlternateImmediate() throws Exception {
- System.out.println("running testAlternateImmediate...");
- ByteMap<Long> byteMap = new ByteMap<Long>();
-
- for (int i = 0; i < 128; i++) {
- assertNull("Found double entry for i = " + i + " in alternate map", byteMap.insert((byte)i, new Long(i)));
- Long l = byteMap.lookup((byte)i);
- assertEquals("Found mismatching entry for i = " + i + " in alternate map: Received " + l, new Long(i), l);
-
- int ii = 255 - i;
- assertNull("Found double entry for i' = " + ii + " in alternate map", byteMap.insert((byte)ii, new Long(ii)));
- Long ll = byteMap.lookup((byte)ii);
- assertEquals("Found mismatching entry for i' = " + ii + " in alternate map: Received " + ll, new Long(ii), ll);
- }
- System.out.println("testAlternateImmediate Successful");
- }
-
- public static void testAlternate() throws Exception {
- System.out.println("running testAlternateImmediate...");
- ByteMap<Long> byteMap = new ByteMap<Long>();
-
- for (int i = 0; i < 128; i++) {
- assertNull("Found double entry for i = " + i + " in alternate map", byteMap.insert((byte)i, new Long(i)));
-
- int ii = 255 - i;
- assertNull("Found double entry for i' = " + ii + " in alternate map", byteMap.insert((byte)ii, new Long(ii)));
- }
-
- for (int i = 0; i < 128; i++) {
- Long l = byteMap.lookup((byte)i);
- assertEquals("Found mismatching entry for i = " + i + " in alternate map: Received " + l, new Long(i), l);
-
- int ii = 255 - i;
- Long ll = byteMap.lookup((byte)ii);
- assertEquals("Found mismatching entry for i' = " + ii + " in alternate map: Received " + ll, new Long(ii), ll);
- }
-
- System.out.println("testAlternateImmediate Successful");
- }
-
- public static void testAlternateLowNibbleImmediate() throws Exception {
- System.out.println("running testAlternateLowNibbleImmediate...");
- ByteMap<Long> byteMap = new ByteMap<Long>();
-
- for (int i = 0; i < 64; i++) {
- assertNull("Found double entry for i = " + i + " in alternate map", byteMap.insert((byte)i, new Long(i)));
- Long l = byteMap.lookup((byte)i);
- assertEquals("Found mismatching entry for i = " + i + " in alternate map: Received " + l, new Long(i), l);
-
- int ii = 127 - i;
- assertNull("Found double entry for i' = " + ii + " in alternate map", byteMap.insert((byte)ii, new Long(ii)));
- Long ll = byteMap.lookup((byte)ii);
- assertEquals("Found mismatching entry for i' = " + ii + " in alternate map: Received " + ll, new Long(ii), ll);
- }
- System.out.println("testAlternateLowNibbleImmediate Successful");
- }
-
- public static void testLookupNonExistentLow() throws Exception {
- System.out.println("running testLookupNonExistentLow...");
- ByteMap<Long> byteMap = new ByteMap<Long>();
-
- int i = 0x061;
- assertNull("Found double entry for i = " + i, byteMap.insert((byte)i, new Long(i)));
- Long l = byteMap.lookup((byte)i);
- assertEquals("Found mismatching entry for i = " + i + " Received " + l, new Long(i), l);
-
- i = 0x067;
- assertNull("Found entry for uninserted key i = " + i, byteMap.lookup((byte)i));
-
- System.out.println("testLookupNonExistentLow Successful");
- }
-
- public static void testLookupMissedHighMatchedLow() throws Exception {
- System.out.println("running testLookupMissedHighMatchedLow...");
- ByteMap<Long> byteMap = new ByteMap<Long>();
-
- int i = 0x017;
- assertNull("Found double entry for i = " + i, byteMap.insert((byte)i, new Long(i)));
- Long l = byteMap.lookup((byte)i);
- assertEquals("Found mismatching entry for i = " + i + " Received " + l, new Long(i), l);
-
- i = 0x007;
- assertNull("Found entry for uninserted key i = " + i, byteMap.lookup((byte)i));
-
- System.out.println("testLookupMissedHighMatchedLow Successful");
- }
-
- public static void testLookupMatchedHighMissedLow() throws Exception {
- System.out.println("running testLookupMatchedHighMissedLow...");
- ByteMap<Long> byteMap = new ByteMap<Long>();
-
- int i = 0x007;
- assertNull("Found double entry for i = " + i, byteMap.insert((byte)i, new Long(i)));
- Long l = byteMap.lookup((byte)i);
- assertEquals("Found mismatching entry for i = " + i + " Received " + l, new Long(i), l);
-
- i = 0x006;
- assertNull("Found entry for uninserted key i = " + i, byteMap.lookup((byte)i));
-
- System.out.println("testLookupMatchedHighMissedLow Successful");
- }
-
- public static void assertNull(String str, Object obj) throws Exception {
- if (obj != null) {
- throw new IllegalStateException(str);
- }
- }
-
- public static void assertNotNull(String str, Object obj) throws Exception {
- if (obj == null) {
- throw new IllegalStateException(str);
- }
- }
-
- public static void assertTrue(String str, boolean b) throws Exception {
- if (!b) {
- throw new IllegalStateException(str);
- }
- }
-
- public static void assertEquals(String str, Object lhs, Object rhs) throws Exception {
- if (lhs == null && rhs == null) return;
- if (lhs == null || rhs == null) throw new IllegalStateException(str);
- if (!lhs.equals(rhs)) throw new IllegalStateException(str);
- }
-}
Deleted: projects/xa2/object-pool/src/CompBlockTrie.java
===================================================================
--- projects/xa2/object-pool/src/CompBlockTrie.java 2008-04-30 08:59:06 UTC (rev 871)
+++ projects/xa2/object-pool/src/CompBlockTrie.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -1,342 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 22nd April 2008
- */
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.nio.channels.FileChannel;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Extends CompMemTrie to provide IO functionality.
- *
- * Guarantees Trie will fit within a single block, refuses to accept insert() if it would make the
- * serialized size of the node greater than the blocksize.
- */
-public class CompBlockTrie extends CompMemTrie {
- @SuppressWarnings("serial")
- public static class InsufficientSpace extends Exception
- { public InsufficientSpace(String s) { super(s); } };
-
- static final byte TYPE_BRANCH_TERM = 0x01; // Indicates a trie branch with a termination entry..
- static final byte TYPE_BRANCH_NOTERM = 0x02; // Indicates a trie branch without a termination entry..
- static final byte TYPE_LEAF = 0x03; // Indicates a trie branch without a termination entry..
- static final byte TYPE_ROOT_LOC = 0x04; // Indicates the following short indicates a branch.
- public final static int MAGIC = 0x00020001;
- public final static int INVERT_MAGIC = 0x01000200;
-
- // Calculated as the worst case size per entry.
- // Assumes a full leaf and branch per entry.
- // Block: 2-byte type-header
- // 4-byte offset
- // 2-byte ByteMap-High
- // 4-byte ByteMap-Lows (2-entries).
- // 4-byte Children Ptrs.
- // Leaf: 2-byte type-header
- // 8-byte offset.
- // 26-bytes Total.
- // Best case of course is 10-bytes leaf + 2-bytes children in branch.
- private static short WORST_CASE_ENTRY_SIZE = 26;
-//private static short BEST_CASE_ENTRY_SIZE = 12;
-
- /** Worst-case space left in trie measured in entries */
- private short space;
-
- /**
- * The blocksize for this Block-Trie.
- * Currently the maximum block size supported is 32K, which is worst-case:
- * 1260 entries covered by a single I0 index in 32KB.
- * 1.59 million entries by a single I1 index in 40MB.
- * 2.00 billion entries by a single I2 index in 49GB.
- * 2.52 trillion entries by a single I3 index in 60TB.
- */
- private int blockSize;
-
- public CompBlockTrie(int blockSize) {
- super();
- assert blockSize > 1024;
- assert blockSize <= 32*1024;
- this.blockSize = blockSize;
- this.space = (short)((blockSize - 8) / WORST_CASE_ENTRY_SIZE); // -8 leaves room for header info.
- }
-
- public CompBlockTrie(ByteBuffer index, FileChannel data) throws IOException {
- super();
- index.rewind(); // Note sure if I should doing this here or delegating to the caller.
- int magic = index.getInt();
- if (magic != MAGIC) {
- if (magic == INVERT_MAGIC) {
- index.order(index.order().equals(ByteOrder.BIG_ENDIAN) ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
- } else {
- throw new IllegalArgumentException("Bad magic in index buffer: " + magic + ", MAGIC=" + MAGIC);
- }
- }
-
- // I should be able to replace this with a stack.
- // The child->parent relationship is implicit in the ordering of the nodes in the file.
- // The only thing we need to know is when to stop reading nodes and that is provided by rootLocation.
- // If treated as a stack once we have read the root the stack should contain only a single element - the
- // root node.
- // For now I'm hoping the HashMap will help with debugging - but it is wasteful of memory in the long run.
- HashMap<Short, TrieNode> nodeMap = new HashMap<Short, TrieNode>();
-
- short rootLocation = -1;
- while (rootLocation == -1) {
- short location = (short)index.position();
- byte type = index.get();
- switch (type) {
- case TYPE_BRANCH_TERM:
- nodeMap.put(location, readTrieBranch(index, true, nodeMap));
- break;
- case TYPE_BRANCH_NOTERM:
- nodeMap.put(location, readTrieBranch(index, false, nodeMap));
- break;
- case TYPE_LEAF:
- nodeMap.put(location, readTrieLeaf(index, data));
- break;
- case TYPE_ROOT_LOC:
- index.get();
- rootLocation = index.getShort();
- break;
- default:
- throw new IllegalArgumentException("Invalid trie-node");
- }
- }
-
- root = nodeMap.get(rootLocation);
-
- this.space = 0; // If this gets modified it will be recalculated automaticly then.
- }
-
- public boolean insert(byte[] key, long value) {
- // Space was calculated on the basis of worst-case - if we were worst case we have now filled the
- // trie-block, but if we haven't we should recalculate the space available.
- if (space == 0) {
- int size = totalIndexSize(root);
- space = (short)((blockSize - size - 8) / WORST_CASE_ENTRY_SIZE);
- if (space < 0) {
- throw new IllegalStateException("Overfilled CompBlockTrie");
- } else if (space == 0) {
- return false;
- }
- }
-
- super.insert(key, value);
- space--;
-
- return true;
- }
-
- public void write(ByteBuffer index, FileChannel data) throws InsufficientSpace, IOException {
- if (index.remaining() < blockSize) {
- throw new InsufficientSpace("Insufficient space remaining in buffer to write block");
- }
-
- // Temporarally set the limit to blocksize.
- int limit = index.limit();
- index.limit(index.position() + blockSize);
-
- if (root == null) {
- throw new IllegalStateException("Attempt to write empty trie");
- }
-
- int indexreq = (root == null) ? 8 : totalIndexSize(root) + 4 + 4; // + sizeof(MAGIC) + sizeof(root_loc_type + root_loc)
- if (indexreq > index.remaining()) {
- System.err.printf("Index-Req:%d ; remaining:%d ; capacity:%d ; limit:%d ; position:%d\n", indexreq,
- index.remaining(), index.capacity(), index.limit(), index.position());
- throw new InsufficientSpace("Attempt to write trie index to bytebuffer with insufficient space");
- }
- if (indexreq > 0x00010000) {
- throw new InsufficientSpace("Attempt to write trie index larger than 64K");
- }
-
- HashMap<TrieNode, Short> locationMap = new HashMap<TrieNode, Short>();
-
- index.putInt(MAGIC);
- if (root != null) {
- writeTrieNode(root, index, data, locationMap);
- }
- index.put(TYPE_ROOT_LOC);
- index.put((byte)0xFF);
- if (root != null) {
- index.putShort(locationMap.get(root));
- } else {
- index.putShort((short)0x0000);
- }
-
- // Set the position to the block size to ensure whole blocks are written.
- index.position(index.limit());
- // Reset the limit to its initial value.
- index.limit(limit);
- }
-
- private static Map<Class, TrieNodeWriter> writerMap = new HashMap<Class, TrieNodeWriter>();
- static {
- writerMap.put(TrieBranch.class, new TrieBranchWriter());
- writerMap.put(TrieLeaf.class, new TrieLeafWriter());
- }
-
- protected static void writeTrieNode(TrieNode node, ByteBuffer index, FileChannel data,
- Map<TrieNode, Short> locationMap) throws IOException {
- writerMap.get(node.getClass()).write(node, index, data, locationMap);
- }
-
- private static interface TrieNodeWriter {
- /**
- * This method *must* update location with the position in the index buffer it was written to.
- * This allows parents to obtain a physical reference to this node in their write methods.
- * The index buffer is the target of the Trie index itself.
- * The data buffer is the target of the byte[]/value pairs stored in the leaves. We want to keep these
- * separate for caching purposes.
- */
- public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException;
- }
-
- private static class TrieBranchWriter implements TrieNodeWriter {
- public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException {
- TrieBranch branch = (TrieBranch)node;
- if (branch.term != null) {
- writeTrieNode(branch.term, index, data, locationMap);
- }
- for (TrieNode child : branch.children) {
- writeTrieNode(child, index, data, locationMap);
- }
-
- locationMap.put(branch, (short)index.position());
-
- index.put((branch.term == null) ? TYPE_BRANCH_NOTERM : TYPE_BRANCH_TERM);
- index.put((byte)0xFF); // Padding to keep things short-aligned.
- index.putInt(branch.offset);
- if (branch.term != null) {
- index.putShort(locationMap.get(branch.term));
- }
- branch.children.writeHeader(index);
- for (TrieNode child : branch.children) {
- index.putShort(locationMap.get(child));
- }
- }
- }
-
- private static class TrieLeafWriter implements TrieNodeWriter {
- public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException {
- TrieLeaf leaf = (TrieLeaf)node;
-
- ByteBuffer dataBuf = ByteBuffer.allocateDirect(8 + 4 + leaf.key.length);
- long keyLocation = data.position();
- dataBuf.putLong(leaf.value);
- dataBuf.putInt(leaf.key.length);
- dataBuf.put(leaf.key);
- data.write((ByteBuffer)dataBuf.flip());
-
- locationMap.put(leaf, (short)index.position());
- index.put(TYPE_LEAF);
- index.put((byte)0xFF); // Pad to 16-bit aligned.
- index.putLong(keyLocation);
- }
- }
-
- private TrieBranch readTrieBranch(ByteBuffer index, boolean hasTerm, Map<Short, TrieNode> nodeMap) throws IOException {
- TrieBranch branch = new TrieBranch();
-
- index.get(); // skip padding.
- branch.offset = index.getInt();
- if (hasTerm) {
- branch.term = (TrieLeaf)nodeMap.get(index.getShort());
- }
- branch.children = new ByteMap<TrieNode>(index, nodeMap);
- if (branch.term == null) {
- // This two-step process is required to avoid the cast from Object[] to TrieNode[] inserted by java
- // generics which triggers the obvious ClassCastException. By extracting explicitly to an Object[]
- // first; then indexing to obtain an Object; then casting the Object to TrieNode the compiler doesn't
- // insert the erroneous cast.
- Object[] d = branch.children.data;
- branch.aLeaf = ((TrieNode)(d[0])).aLeaf;
- } else {
- branch.aLeaf = branch.term;
- }
-
- return branch;
- }
-
- private TrieLeaf readTrieLeaf(ByteBuffer index, FileChannel data) throws IOException {
- index.get();
- long keyLocation = index.getLong();
- data.position(keyLocation);
- // FIXME: I need to cache these to reduce the allocation cost which is showing up under profiling
- ByteBuffer bb = ByteBuffer.allocateDirect(8+4); // sizeof(value) + sizof(length).
- data.read(bb);
- bb.flip();
- long value = bb.getLong();
- int length = bb.getInt();
- byte[] key = new byte[length]; // FIXME: I eventually need to replace this with a lazy load.
- data.read(ByteBuffer.wrap(key));
-
- return new TrieLeaf(key, value, false);
- }
-
- private static Map<Class, TrieNodeSizer> sizerMap = new HashMap<Class, TrieNodeSizer>();
- static {
- sizerMap.put(TrieBranch.class, new TrieBranchSizer());
- sizerMap.put(TrieLeaf.class, new TrieLeafSizer());
- }
-
- protected static int totalIndexSize(TrieNode node) {
- return sizerMap.get(node.getClass()).indexSize(node);
- }
-
- protected static int totalDataSize(TrieNode node) {
- return sizerMap.get(node.getClass()).dataSize(node);
- }
-
- private static interface TrieNodeSizer {
- public int indexSize(TrieNode node);
- public int dataSize(TrieNode node);
- }
-
- private static class TrieBranchSizer implements TrieNodeSizer {
- protected int memSize(TrieBranch branch) {
- return 4 + 1 + 1 + branch.children.memSize() + (branch.term == null ? 0 : 2);
- }
-
- public int indexSize(TrieNode node) {
- TrieBranch branch = (TrieBranch)node;
- int total = memSize(branch);
- if (branch.term != null) {
- total += totalIndexSize(branch.term);
- }
- for (TrieNode child : branch.children) {
- total += totalIndexSize(child);
- }
-
- return total;
- }
-
- public int dataSize(TrieNode node) {
- TrieBranch branch = (TrieBranch)node;
- int total = 0;
- if (branch.term != null) {
- total += totalDataSize(branch.term);
- }
- for (TrieNode child : branch.children) {
- total += totalDataSize(child);
- }
-
- return total;
- }
- }
-
- private static class TrieLeafSizer implements TrieNodeSizer {
- public int indexSize(TrieNode node) {
- return 1 + 1 + 8;
- }
-
- public int dataSize(TrieNode node) {
- TrieLeaf leaf = (TrieLeaf)node;
- return 8 + 4 + leaf.key.length;
- }
- }
-}
Deleted: projects/xa2/object-pool/src/CompBlockTrieTest.java
===================================================================
--- projects/xa2/object-pool/src/CompBlockTrieTest.java 2008-04-30 08:59:06 UTC (rev 871)
+++ projects/xa2/object-pool/src/CompBlockTrieTest.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -1,462 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 9th April 2008
- */
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.RandomAccessFile;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-
-/**
- * Basic tests of the CompBlockTrie.
- */
-public class CompBlockTrieTest {
- public static void main(String[] args) throws Exception {
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 1);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 2);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 3);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 4);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 5);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 6);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 7);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 8);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 9);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 10);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), Integer.MAX_VALUE);
- testWithLimitedFile(new File("../scratch/connectives.uniq"), Integer.MAX_VALUE);
- testWithLimitedFile(new File("../scratch/web2a.uniq"), Integer.MAX_VALUE);
- testWithLimitedFile(new File("../scratch/web2.uniq"), Integer.MAX_VALUE);
- testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
- testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
- testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
-// testLoadWithRandomFile(new File("/dev/urandom"));
- }
-
- private static final int BLOCK_SIZE = 32 * 1024;
- public static void testWithLimitedFile(File file, int limit) throws Exception {
- Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
-
- FileChannel indexFile = new FileOutputStream("tmp/sp_idx").getChannel();
- FileChannel dataFile = new FileOutputStream("tmp/sp_dat").getChannel();
- ByteBuffer indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
-
- ArrayList<CompBlockTrie> tries = new ArrayList<CompBlockTrie>();
- CompBlockTrie trie = new CompBlockTrie(BLOCK_SIZE);
-
- System.out.println("Inserting " + limit + " lines from " + file);
-
- BufferedReader names = new BufferedReader(new FileReader(file));
- long n = 0xDEADBEEF00000000L;
- String name = names.readLine();
- long _startInsert = System.currentTimeMillis();
- for (int i = 0; i < limit; i++) {
- // The use of the double loop means we pay the cost of setting up the exception handler only once per
- // block, not once per entry.
- if (name == null) {
- break;
- }
- if (trie.insert(name.getBytes(), n)) {
- // Note: exception thrown here if trie is full. So name will remain valid when we reenter loop with
- // a new trie.
- namesMap.put(name.getBytes(), n);
- name = names.readLine();
- n++;
- } else {
- trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
- indexFile.write((ByteBuffer)indexBuffer.flip());
- tries.add(trie);
- trie = new CompBlockTrie(BLOCK_SIZE);
- }
- }
- trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
- indexFile.write((ByteBuffer)indexBuffer.flip());
- tries.add(trie);
-
- indexFile.force(true);
- dataFile.force(true);
- indexFile.close();
- dataFile.close();
-
- long _endInsert = System.currentTimeMillis();
- names.close();
-
- indexFile = new FileInputStream("tmp/sp_idx").getChannel();
- dataFile = new FileInputStream("tmp/sp_dat").getChannel();
- indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
- ArrayList<CompBlockTrie> readTries = new ArrayList<CompBlockTrie>();
-
- long _startRead = System.currentTimeMillis();
-
- while (indexFile.read((ByteBuffer)indexBuffer.clear()) != -1) {
- indexBuffer.flip();
- CompBlockTrie ctrie = new CompBlockTrie(indexBuffer, dataFile);
- readTries.add(ctrie);
- }
-
- long _endRead = System.currentTimeMillis();
-
- System.out.println("Checking lines from " + file);
- long _startLookup = System.currentTimeMillis();
-
- boolean[] valid = new boolean[1];
-
- for (byte[] key : namesMap.keySet()) {
- boolean found = false;
- for (CompBlockTrie t : tries) {
- long value = t.lookup(key, valid);
- if (valid[0] && value == namesMap.get(key)) {
- found = true;
- break;
- }
- }
- if (!found) {
- throw new IllegalStateException("Trie doesn't match Map");
- }
- found = false;
- for (CompBlockTrie t : readTries) {
- long value = t.lookup(key, valid);
- if (valid[0] && value == namesMap.get(key)) {
- found = true;
- break;
- }
- }
- if (!found) {
- throw new IllegalStateException("Read-Trie doesn't match Map");
- }
- }
-
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) +
- " read:" + (_endRead - _startRead) +
- " lookup:" + (_endLookup - _startLookup));
- }
-
- public static class Bytes {
- public final byte[] bytes;
-
- public Bytes(byte[] bytes) {
- this.bytes = new byte[bytes.length];
- System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
- }
-
- public byte[] getBytes() {
- return bytes;
- }
-
- public boolean equals(Object o) {
- if (o instanceof Bytes) {
- return Arrays.equals(bytes, ((Bytes)o).bytes);
- } else {
- return false;
- }
- }
-
- public int hashCode() {
- int hc = Arrays.hashCode(bytes);
- return hc;
- }
-
- public String toString() {
- return Arrays.toString(bytes);
- }
- }
-
- public static void testWithRandomFileTuned(File file, int max, int small) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- CompBlockTrie namesTrie = new CompBlockTrie(32*1024);
-
- System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(134*1024);
-
- FileChannel indexFile = new FileOutputStream("tmp/sp_idx").getChannel();
- FileChannel dataFile = new FileOutputStream("tmp/sp_dat").getChannel();
- ByteBuffer indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
-
- ArrayList<CompBlockTrie> tries = new ArrayList<CompBlockTrie>();
- CompBlockTrie trie = new CompBlockTrie(BLOCK_SIZE);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nL = 0;
- int nS = 0;
-
- while (n < max) {
- if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (!namesMap.containsKey(keyBytes)) {
- n++;
- namesMap.put(keyBytes, n);
- long _si = System.currentTimeMillis();
-
- if (!trie.insert(key, n)) {
- trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
- indexFile.write((ByteBuffer)indexBuffer.flip());
- tries.add(trie);
- trie = new CompBlockTrie(BLOCK_SIZE);
- trie.insert(key, n);
- }
-
- n++;
-
- _aggregateL += System.currentTimeMillis() - _si;
- _avlL += key.length;
- nL++;
- }
-
- for (int i = 0; i < small; i++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (!namesMap.containsKey(keyBytes)) {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
-
- if (!trie.insert(key, n)) {
- trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
- indexFile.write((ByteBuffer)indexBuffer.flip());
- tries.add(trie);
- trie = new CompBlockTrie(BLOCK_SIZE);
- trie.insert(key, n);
- }
- n++;
-
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
- }
- }
- }
-
- trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
- indexFile.write((ByteBuffer)indexBuffer.flip());
- tries.add(trie);
-
- indexFile.force(true);
- dataFile.force(true);
- indexFile.close();
- dataFile.close();
-
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
- System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
- System.out.println(chan.position() + " bytes read from " + file);
- chan.close();
-
- indexFile = new FileInputStream("tmp/sp_idx").getChannel();
- dataFile = new FileInputStream("tmp/sp_dat").getChannel();
- indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
- ArrayList<CompBlockTrie> readTries = new ArrayList<CompBlockTrie>();
-
- long _startRead = System.currentTimeMillis();
-
- while (indexFile.read((ByteBuffer)indexBuffer.clear()) != -1) {
- indexBuffer.flip();
- CompBlockTrie ctrie = new CompBlockTrie(indexBuffer, dataFile);
- readTries.add(ctrie);
- }
-
- long _endRead = System.currentTimeMillis();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
-
- boolean[] valid = new boolean[1];
-
- for (Bytes keyBytes : namesMap.keySet()) {
- int i = 0;
- boolean found = false;
- for (CompBlockTrie t : readTries) {
- long value = t.lookup(keyBytes.getBytes(), valid);
- if (valid[0] && value == namesMap.get(keyBytes)) {
- found = true;
- break;
- }
- }
- if (!found) {
- throw new IllegalStateException("Trie doesn't match Map on entry: " + i
- + " key:" + keyBytes + " value: " + namesMap.get(keyBytes));
- }
- i++;
- }
-
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) +
- " read:" + (_endRead - _startRead) +
- " lookup:" + (_endLookup - _startLookup));
- }
-
- public static void testLoadWithRandomFile(File file) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- CompMemTrie namesTrie = new CompMemTrie();
-
- System.out.println("Inserting random bytes from " + file);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- int nL = 0;
- long _aggregateLL = 0;
- long _avlLL = 0;
- int nLL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nS = 0;
- long _aggregateSS = 0;
- long _avlSS = 0;
- int nSS = 0;
-
- for (int i = 0; i < 100; i++) {
- if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[buffer.getInt() & 0x0007FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sll = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateLL += System.currentTimeMillis() - _sll;
- _avlLL += key.length;
- nLL++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int j = 0; j < 10; j++) {
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sl = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateL += System.currentTimeMillis() - _sl;
- _avlL += key.length;
- nL++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int k = 0; k < 10; k++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int ii = 0; ii < 1000; ii++) {
- key = new byte[((int)buffer.get()) & 0x0000001F];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _sss;
- _avlSS += key.length;
- nSS++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
- }
- }
- }
- }
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.printf(" %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
- System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
- System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
- System.out.printf(" %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
- chan.close();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
- for (Bytes k : namesMap.keySet()) {
- int i = 0;
- if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
- throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
- }
- i++;
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-}
Deleted: projects/xa2/object-pool/src/CompMemTrie.java
===================================================================
--- projects/xa2/object-pool/src/CompMemTrie.java 2008-04-30 08:59:06 UTC (rev 871)
+++ projects/xa2/object-pool/src/CompMemTrie.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -1,255 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 15th April 2008
- */
-
-import java.util.Arrays;
-
-/**
- * Implements an in-memory trie - uses ByteMaps to implement trie nodes.
- */
-public class CompMemTrie {
- @SuppressWarnings("serial")
- public static class NotFound extends Exception {};
- private static NotFound notfound = new NotFound();
-
- protected TrieNode root;
- public CompMemTrie() {
- this.root = null;
- }
-
- public boolean insert(byte[] key, long value) {
- if (root == null) {
- root = new TrieLeaf(key, value);
- } else {
- if (!root.insert(key, value)) {
- root = new TrieBranch(root, key, value);
- }
- }
-
- return true;
- }
-
- public long lookup(byte[] key) throws NotFound {
- boolean[] valid = new boolean[1];
- long result = lookup(key, valid);
- if (valid[0]) {
- return result;
- } else {
- throw notfound;
- }
- }
-
-
- public long lookup(byte[] key, boolean[] valid) {
- if (root == null) {
- valid[0] = false;
- return 0;
- } else {
- return root.lookup(key, valid);
- }
- }
-
- protected abstract static class TrieNode {
- protected TrieLeaf aLeaf;
-
- /**
- * @return false if we need to insert key above this node.
- */
- public boolean insert(byte[] key, long value) {
- return insert(new TrieLeaf(key, value), 0);
- }
-
- public long lookup(byte[] key, boolean[] valid) {
- return lookup(key, 0, valid);
- }
-
- protected abstract boolean insert(TrieLeaf node, int parentLcd);
- protected abstract long lookup(byte[] key, int parentLcd, boolean[] valid);
-
- protected static boolean regionMatches(byte[] lhs, int lhsOff, byte[] rhs, int rhsOff, int len) {
- if (lhsOff < 0 || rhsOff < 0) {
- return false;
- }
- if (lhs.length < lhsOff + len || rhs.length < rhsOff + len) {
- return false;
- }
- for (int i = 0; i < len; i++) {
- if (lhs[lhsOff + i] != rhs[rhsOff + i]) return false;
- }
-
- return true;
- }
- }
-
- protected static class TrieBranch extends TrieNode {
- protected int offset;
- protected ByteMap<TrieNode> children;
- protected TrieLeaf term;
-
- protected TrieBranch() {}
-
- public TrieBranch(byte[] key, long value) {
- this.children = new ByteMap<TrieNode>();
- this.term = new TrieLeaf(key, value);
- this.offset = term.key.length;
- this.aLeaf = this.term;
- }
-
- public TrieBranch(TrieNode oldRoot, byte[] key, long value) {
- this(oldRoot, new TrieLeaf(key, value));
- }
-
- private TrieBranch(TrieNode oldNode, TrieLeaf newNode) {
- super();
- assert oldNode != null;
- assert newNode != null;
-
- offset = 0;
- children = new ByteMap<TrieNode>();
- term = null;
-
- // as every child node shares a common lcp, any child will suffice when preparing the new
- // lcp/offset of the new node.
- aLeaf = newNode;
-
- byte[] lhs = oldNode.aLeaf.key;
- byte[] rhs = newNode.key;
-
- int i = 0;
- while (i < lhs.length && i < rhs.length) {
- if (lhs[i] != rhs[i]) {
- offset = i;
- children.insert(lhs[i], oldNode);
- children.insert(rhs[i], newNode);
-
- return; // Escape here.
- }
- i++;
- }
-
- if (i < lhs.length) {
- offset = i;
- children.insert(lhs[i], oldNode);
- term = newNode;
- } else if (i < rhs.length) {
- if (oldNode instanceof TrieLeaf) {
- offset = i;
- children.insert(rhs[i], newNode);
- term = (TrieLeaf)oldNode;
- } else {
- throw new IllegalStateException("Attempt to create new branch node with leaf > branch.");
- }
- } else {
- throw new IllegalStateException("Attempt to create new branch node with equal children");
- }
- }
-
- protected boolean insert(TrieLeaf node, int parentLcp) {
- if (!regionMatches(aLeaf.key, parentLcp, node.key, parentLcp, offset - parentLcp)) {
- return false;
- } else {
- // new node matches the lcp of this node.
- if (node.key.length == offset) {
- // new node is expected to terminate here.
- if (term == null) {
- term = node;
- } else {
- return term.insert(node, offset);
- }
- } else {
- // new node is expected to terminate in one of this nodes children.
- TrieNode child = children.lookup(node.key[offset]);
- if (child == null) {
- // this is the first node to be inserted on this branching key.
- children.insert(node.key[offset], node);
- } else {
- // there is an existing child node branching on this branching key.
- if (!child.insert(node, offset)) {
- children.insert(node.key[offset], new TrieBranch(child, node));
- }
- }
- }
-
- return true;
- }
- }
-
- protected long lookup(byte[] key, int parentLcd, boolean[] valid) {
- if (!regionMatches(aLeaf.key, parentLcd, key, parentLcd, offset - parentLcd)) {
- //FIXME: I need to migrate this to the end of the search to avoid needing access to the key at each level.
- valid[0] = false;
- return 0;
- } else {
- // new node matches the lcp of this node.
- TrieNode child;
- if (key.length == offset) {
- // new node is expected to terminate here.
- if (term != null) {
- valid[0] = true;
- return term.value;
- } else {
- valid[0] = false;
- return 0;
- }
- } else {
- // new node is expected to terminate in one of this nodes children.
- child = children.lookup(key[offset]);
- if (child != null) {
- return child.lookup(key, offset, valid);
- } else {
- valid[0] = false;
- return 0;
- }
- }
- }
- }
-
- public String toString() {
- return "Trie-BRANCH[" + (term != null) + " on " + offset + " with " + children.size() + " and aLeaf[" + aLeaf + "]";
- }
- }
-
- protected static class TrieLeaf extends TrieNode {
- final byte[] key;
- final long value;
-
- protected TrieLeaf(byte[] key, long value, boolean copyKey) {
- if (copyKey) {
- this.key = new byte[key.length];
- System.arraycopy(key, 0, this.key, 0, key.length);
- } else {
- this.key = key;
- }
- this.value = value;
- this.aLeaf = this;
- }
-
- public TrieLeaf(byte[] key, long value) {
- this(key, value, true);
- }
-
- protected boolean insert(TrieLeaf node, int parentLcp) {
- if (key.length != node.key.length) {
- return false;
- } else if (!regionMatches(key, parentLcp, node.key, parentLcp, key.length - parentLcp)) {
- return false;
- } else if (value == node.value) {
- return true; // Duplicate key/value pair.
- } else {
- throw new IllegalArgumentException("Attempt to insert multiple values for same key");
- }
- }
-
- protected long lookup(byte[] key, int parentLcd, boolean[] valid) {
- assert Arrays.equals(this.key, key);
- valid[0] = true;
- return value;
- }
-
- public String toString() {
- return "Trie-LEAF: " + Arrays.toString(key) + " -> " + value;
- }
- }
-}
Deleted: projects/xa2/object-pool/src/CompMemTrieTest.java
===================================================================
--- projects/xa2/object-pool/src/CompMemTrieTest.java 2008-04-30 08:59:06 UTC (rev 871)
+++ projects/xa2/object-pool/src/CompMemTrieTest.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -1,409 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 9th April 2008
- */
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-
-/**
- * Basic tests of the CompMemTrie.
- */
-public class CompMemTrieTest {
- public static void main(String[] args) throws Exception {
- testWithFile(new File("../scratch/propernames.uniq"));
- testWithFile(new File("../scratch/connectives.uniq"));
- testWithFile(new File("../scratch/web2a.uniq"));
- testWithFile(new File("../scratch/web2.uniq"));
- testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
- testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
- testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
- testWithRandomFile(new File("/dev/urandom"));
-// testLoadWithRandomFile(new File("/dev/urandom"));
- }
-
- public static void testWithFile(File file) throws Exception {
- Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
- CompMemTrie namesTrie = new CompMemTrie();
-
- System.out.println("Inserting lines from " + file);
- BufferedReader names = new BufferedReader(new FileReader(file));
- long n = 0;
- String name = names.readLine();
- long _startInsert = System.currentTimeMillis();
- while (name != null) {
- namesMap.put(name.getBytes(), n);
- namesTrie.insert(name.getBytes(), n);
- name = names.readLine();
- n++;
- }
- long _endInsert = System.currentTimeMillis();
- names.close();
-
- System.out.println("Checking " + n + " lines from " + file);
- long _startLookup = System.currentTimeMillis();
- for (byte[] key : namesMap.keySet()) {
- if (namesTrie.lookup(key) != namesMap.get(key)) {
- throw new IllegalStateException("Trie doesn't match Map");
- }
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-
- public static class Bytes {
- public final byte[] bytes;
-
- public Bytes(byte[] bytes) {
- this.bytes = new byte[bytes.length];
- System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
- }
-
- public boolean equals(Object o) {
- if (o instanceof Bytes) {
- return Arrays.equals(bytes, ((Bytes)o).bytes);
- } else {
- return false;
- }
- }
-
- public int hashCode() {
- int hc = Arrays.hashCode(bytes);
- return hc;
- }
- }
-
- public static void testWithRandomFile(File file) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- CompMemTrie namesTrie = new CompMemTrie();
-
- System.out.println("Inserting random bytes from " + file);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(134*1024);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nL = 0;
- int nS = 0;
-
- while (n < 5000) {
- if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (namesMap.containsKey(keyBytes)) {
-// namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _si = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateL += System.currentTimeMillis() - _si;
- _avlL += key.length;
- nL++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int i = 0; i < 10; i++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
-// namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
- }
- }
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
- System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
- System.out.println(chan.position() + " bytes read from " + file);
- chan.close();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
- for (Bytes k : namesMap.keySet()) {
- int i = 0;
- if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
- throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
- }
- i++;
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-
- public static void testWithRandomFileTuned(File file, int max, int small) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- CompMemTrie namesTrie = new CompMemTrie();
-
- System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(134*1024);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nL = 0;
- int nS = 0;
-
- while (n < max) {
- if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (namesMap.containsKey(keyBytes)) {
-// namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _si = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateL += System.currentTimeMillis() - _si;
- _avlL += key.length;
- nL++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int i = 0; i < small; i++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
-// namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
- }
- }
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
- System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
- System.out.println(chan.position() + " bytes read from " + file);
- chan.close();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
- for (Bytes k : namesMap.keySet()) {
- int i = 0;
- if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
- throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
- }
- i++;
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-
- public static void testLoadWithRandomFile(File file) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- CompMemTrie namesTrie = new CompMemTrie();
-
- System.out.println("Inserting random bytes from " + file);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- int nL = 0;
- long _aggregateLL = 0;
- long _avlLL = 0;
- int nLL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nS = 0;
- long _aggregateSS = 0;
- long _avlSS = 0;
- int nSS = 0;
-
- for (int i = 0; i < 100; i++) {
- if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[buffer.getInt() & 0x0007FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sll = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateLL += System.currentTimeMillis() - _sll;
- _avlLL += key.length;
- nLL++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int j = 0; j < 10; j++) {
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sl = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateL += System.currentTimeMillis() - _sl;
- _avlL += key.length;
- nL++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int k = 0; k < 10; k++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int ii = 0; ii < 1000; ii++) {
- key = new byte[((int)buffer.get()) & 0x0000001F];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _sss;
- _avlSS += key.length;
- nSS++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
- }
- }
- }
- }
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.printf(" %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
- System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
- System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
- System.out.printf(" %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
- chan.close();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
- for (Bytes k : namesMap.keySet()) {
- int i = 0;
- if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
- throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
- }
- i++;
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-}
Deleted: projects/xa2/object-pool/src/MemoryTrie.java
===================================================================
--- projects/xa2/object-pool/src/MemoryTrie.java 2008-04-30 08:59:06 UTC (rev 871)
+++ projects/xa2/object-pool/src/MemoryTrie.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -1,239 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 9th April 2008
- */
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Implements a basic in-memory trie - uses HashMaps to implement trie nodes.
- */
-public class MemoryTrie {
- @SuppressWarnings("serial")
- public static class NotFound extends Exception {};
-
- private TrieNode root;
-
- public MemoryTrie() {
- this.root = null;
- }
-
- public void insert(byte[] key, long value) {
-// System.err.println("MemoryTrie # Inserting: " + Arrays.hashCode(key) + " :: " + value);
- if (root == null) {
- root = new TrieLeaf(key, value);
- } else {
- try {
- root.insert(key, value);
- } catch (TrieNode.InsertAbove k) {
- root = new TrieBranch(root, key, value);
- }
- }
- }
-
- public long lookup(byte[] key) throws NotFound {
-// System.err.println("MemoryTrie # Lookup: " + Arrays.hashCode(key));
- if (root == null) {
- throw new NotFound();
- }
-
- return root.lookup(key);
- }
-
- private abstract static class TrieNode {
- @SuppressWarnings("serial")
- protected static class InsertAbove extends Exception {} ;
- protected static final InsertAbove cont = new InsertAbove();
-
- protected TrieLeaf least;
-
- public void insert(byte[] key, long value) throws InsertAbove {
- insert(new TrieLeaf(key, value), 0);
- }
-
- public long lookup(byte[] key) throws NotFound {
- return lookup(key, 0);
- }
-
- protected abstract void insert(TrieLeaf node, int parentLcd) throws InsertAbove;
- protected abstract long lookup(byte[] key, int parentLcd) throws NotFound;
-
- protected static boolean regionMatches(byte[] lhs, int lhsOff, byte[] rhs, int rhsOff, int len) {
- if (lhsOff < 0 || rhsOff < 0) {
- return false;
- }
- if (lhs.length < lhsOff + len || rhs.length < rhsOff + len) {
- return false;
- }
- for (int i = 0; i < len; i++) {
- if (lhs[lhsOff + i] != rhs[rhsOff + i]) return false;
- }
-
- return true;
- }
- }
-
- private static class TrieBranch extends TrieNode {
- private int offset;
- private Map<Byte, TrieNode> children;
- private TrieLeaf term;
-
- public TrieBranch(byte[] key, long value) {
- super();
- this.children = new HashMap<Byte, TrieNode>();
- this.term = new TrieLeaf(key, value);
- this.offset = term.key.length;
- this.least = this.term;
- }
-
- public TrieBranch(TrieNode oldRoot, byte[] key, long value) {
- this(oldRoot, new TrieLeaf(key, value));
- }
-
- private TrieBranch(TrieNode oldNode, TrieLeaf newNode) {
- super();
- assert oldNode != null;
- assert newNode != null;
-
- offset = 0;
- children = new HashMap<Byte, TrieNode>();
- term = null;
- least = null;
-
- byte[] lhs = oldNode.least.key;
- byte[] rhs = newNode.key;
-
- int i = 0;
- while (i < lhs.length && i < rhs.length) {
- if (lhs[i] != rhs[i]) {
- offset = i;
- children.put(lhs[i], oldNode);
- children.put(rhs[i], newNode);
-
- least = lhs[i] < rhs[i] ? oldNode.least : newNode;
-
- return; // Escape here.
- }
- i++;
- }
-
- if (i < lhs.length) {
- offset = i;
- children.put(lhs[i], oldNode);
- term = newNode;
- least = newNode;
- } else if (i < rhs.length) {
- if (oldNode instanceof TrieLeaf) {
- offset = i;
- children.put(rhs[i], newNode);
- term = (TrieLeaf)oldNode;
- least = (TrieLeaf)oldNode;
- } else {
- throw new IllegalStateException("Attempt to create new branch node with leaf > branch.");
- }
- } else {
- throw new IllegalStateException("Attempt to create new branch node with equal children");
- }
- }
-
- protected void insert(TrieLeaf node, int parentLcp) throws InsertAbove {
- if (!regionMatches(least.key, parentLcp, node.key, parentLcp, offset - parentLcp)) {
- throw cont;
- } else {
- // new node matches the lcp of this node.
- if (node.key.length == offset) {
- // new node is expected to terminate here.
- if (term == null) {
- term = node;
- least = node;
- } else {
- term.insert(node, offset);
- }
- } else {
- // new node is expected to terminate in one of this nodes children.
- TrieNode child = children.get(node.key[offset]);
- if (child == null) {
- // this is the first node to be inserted on this branching key.
- children.put(node.key[offset], node);
- } else {
- try {
- // there is an existing child node branching on this branching key.
- child.insert(node, offset);
- } catch (InsertAbove k) {
- // as every child node shares a common lcp, any child will suffice when preparing the new
- // lcp/offset of the new node. The least node is only required as the new parent's least node
- // will be the smallest of the inserted node and this node's least node.
- children.put(node.key[offset], new TrieBranch(child, node));
- }
- }
- }
- }
- }
-
- protected long lookup(byte[] key, int parentLcd) throws NotFound {
- if (!regionMatches(least.key, parentLcd, key, parentLcd, offset - parentLcd)) {
- throw new NotFound();
- } else {
- // new node matches the lcp of this node.
- TrieNode child;
- if (key.length == offset) {
- // new node is expected to terminate here.
- if (term != null) {
- return term.value;
- } else {
- throw new NotFound();
- }
- } else {
- // new node is expected to terminate in one of this nodes children.
- child = children.get(key[offset]);
- if (child != null) {
- return child.lookup(key, offset);
- } else {
- throw new NotFound();
- }
- }
- }
- }
-
- public String toString() {
- return "Trie-BRANCH[" + (term != null) + " on " + offset + " with " + children.size() + " and least[" + least + "]";
- }
- }
-
- private static class TrieLeaf extends TrieNode {
- final byte[] key;
- final long value;
-
- public TrieLeaf(byte[] key, long value) {
- super();
- this.key = new byte[key.length];
- System.arraycopy(key, 0, this.key, 0, key.length);
- this.value = value;
- this.least = this;
- }
-
- protected void insert(TrieLeaf node, int parentLcp) throws InsertAbove {
- if (key.length != node.key.length) {
- throw cont;
- } else if (!regionMatches(key, parentLcp, node.key, parentLcp, key.length - parentLcp)) {
- throw cont;
- } else if (value == node.value) {
- return; // Duplicate key/value pair.
- } else {
- throw new IllegalArgumentException("Attempt to insert multiple values for same key");
- }
- }
-
- protected long lookup(byte[] key, int parentLcd) {
- assert Arrays.equals(this.key, key);
- return value;
- }
-
- public String toString() {
- return "Trie-LEAF: " + Arrays.toString(key) + " -> " + value;
- }
- }
-}
Deleted: projects/xa2/object-pool/src/MemoryTrieTest.java
===================================================================
--- projects/xa2/object-pool/src/MemoryTrieTest.java 2008-04-30 08:59:06 UTC (rev 871)
+++ projects/xa2/object-pool/src/MemoryTrieTest.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -1,283 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 9th April 2008
- */
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-
-/**
- * Basic tests of the MemoryTrie.
- */
-public class MemoryTrieTest {
- public static void main(String[] args) throws Exception {
- testWithFile(new File("../scratch/propernames.uniq"));
- testWithFile(new File("../scratch/connectives.uniq"));
- testWithFile(new File("../scratch/web2a.uniq"));
- testWithFile(new File("../scratch/web2.uniq"));
- testWithRandomFile(new File("/dev/urandom"));
-// testLoadWithRandomFile(new File("/dev/urandom"));
- }
-
- public static void testWithFile(File file) throws Exception {
- Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
- MemoryTrie namesTrie = new MemoryTrie();
-
- System.out.println("Inserting lines from " + file);
- BufferedReader names = new BufferedReader(new FileReader(file));
- long n = 0;
- String name = names.readLine();
- long _startInsert = System.currentTimeMillis();
- while (name != null) {
- namesMap.put(name.getBytes(), n);
- namesTrie.insert(name.getBytes(), n);
- name = names.readLine();
- n++;
- }
- long _endInsert = System.currentTimeMillis();
- names.close();
-
- System.out.println("Checking lines from " + file);
- long _startLookup = System.currentTimeMillis();
- for (byte[] key : namesMap.keySet()) {
- if (namesTrie.lookup(key) != namesMap.get(key)) {
- throw new IllegalStateException("Trie doesn't match Map");
- }
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-
- public static class Bytes {
- public final byte[] bytes;
-
- public Bytes(byte[] bytes) {
- this.bytes = new byte[bytes.length];
- System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
- }
-
- public boolean equals(Object o) {
-// System.err.print("Bytes::equals() # ");
- if (o instanceof Bytes) {
-// System.err.println("Comparing " + Arrays.hashCode(bytes) + " with " + Arrays.hashCode(((Bytes)o).bytes));
- return Arrays.equals(bytes, ((Bytes)o).bytes);
- } else {
- return false;
- }
- }
-
- public int hashCode() {
- int hc = Arrays.hashCode(bytes);
-// System.err.println("Bytes # Calculated hashcode for: " + hc);
- return hc;
- }
- }
-
- public static void testWithRandomFile(File file) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- MemoryTrie namesTrie = new MemoryTrie();
-
- System.out.println("Inserting random bytes from " + file);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(134*1024);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nL = 0;
- int nS = 0;
-
- while (n < 5000) {
- if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _si = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateL += System.currentTimeMillis() - _si;
- _avlL += key.length;
- nL++;
- }
-
- for (int i = 0; i < 10; i++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
- }
- }
- }
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.println(" " + nL + " long entries ave: " + (_avlL / nL) + " in: " + _aggregateL + "ms");
- System.out.println(" " + nS + " short entries ave: " + (_avlS / nS) + " in: " + _aggregateS + "ms");
- chan.close();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
- for (Bytes k : namesMap.keySet()) {
- if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
- throw new IllegalStateException("Trie doesn't match Map");
- }
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-
- public static void testLoadWithRandomFile(File file) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- MemoryTrie namesTrie = new MemoryTrie();
-
- System.out.println("Inserting random bytes from " + file);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- int nL = 0;
- long _aggregateLL = 0;
- long _avlLL = 0;
- int nLL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nS = 0;
- long _aggregateSS = 0;
- long _avlSS = 0;
- int nSS = 0;
-
- for (int i = 0; i < 100; i++) {
- if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[buffer.getInt() & 0x0007FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sll = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateLL += System.currentTimeMillis() - _sll;
- _avlLL += key.length;
- nLL++;
- }
-
- for (int j = 0; j < 10; j++) {
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sl = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateL += System.currentTimeMillis() - _sl;
- _avlL += key.length;
- nL++;
- }
-
- for (int k = 0; k < 10; k++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
- }
-
- for (int ii = 0; ii < 10; ii++) {
- key = new byte[((int)buffer.get()) & 0x0000001F];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateSS += System.currentTimeMillis() - _sss;
- _avlSS += key.length;
- nSS++;
- }
- }
- }
- }
- }
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.println(" " + nLL + " very long entries ave: " + (_avlLL / nLL) + " in: " + _aggregateLL + "ms");
- System.out.println(" " + nL + " long entries ave: " + (_avlL / nL) + " in: " + _aggregateL + "ms");
- System.out.println(" " + nS + " short entries ave: " + (_avlS / nS) + " in: " + _aggregateS + "ms");
- System.out.println(" " + nSS + " very short entries ave: " + (_avlSS / nSS) + " in: " + _aggregateSS + "ms");
- chan.close();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
- for (Bytes k : namesMap.keySet()) {
- if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
- throw new IllegalStateException("Trie doesn't match Map");
- }
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-}
Deleted: projects/xa2/object-pool/src/OnesTable.dat
===================================================================
(Binary files differ)
Copied: projects/xa2/object-pool/src/trie/ByteMap.java (from rev 868, projects/xa2/object-pool/src/ByteMap.java)
===================================================================
--- projects/xa2/object-pool/src/trie/ByteMap.java (rev 0)
+++ projects/xa2/object-pool/src/trie/ByteMap.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -0,0 +1,172 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 8th April 2008
+ */
+
+import static gen.OnesTable.*;
+
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+
+/**
+ * Represents a map from a byte to data.
+ * ie. a compressed 256 element array.
+ */
+public class ByteMap<T> implements Iterable<T> {
+ short high;
+ short[] low;
+ T[] data;
+
+ @SuppressWarnings("unchecked")
+ public ByteMap() {
+ high = 0;
+ low = new short[0];
+ data = (T[])new Object[0];
+ }
+
+ @SuppressWarnings("unchecked")
+ public ByteMap(ByteBuffer index, Map<Short, T> nodeMap) {
+ high = index.getShort();
+ low = new short[ones(high)];
+ short dataCount = 0;
+ for (int i = 0; i < low.length; i++) {
+ low[i] = index.getShort();
+ dataCount += ones(low[i]);
+ }
+ data = (T[])new Object[dataCount];
+ for (int i = 0; i < data.length; i++) {
+ short t = index.getShort();
+ data[i] = nodeMap.get(t);
+ }
+ }
+
+ /**
+ * @param datum Value to be associated with key. Note: Must not be null.
+ * @return the old value associated with key, or null if this is a new mapping.
+ */
+ @SuppressWarnings("unchecked")
+ public T insert(byte key, T datum) {
+ if (datum == null) {
+ throw new IllegalArgumentException("Attempted to insert null into ByteMap");
+ }
+
+ int h = hiNibbleAsBitInShort(key);
+ if ((high & h) != 0) {
+ // We already have a low entry for this hi-nibble.
+ int lowOffset = ones(HIGH, high, key);
+ int dataOffset = 0;
+ for (int i = 0; i < lowOffset; i++) {
+ dataOffset += ones(low[i]);
+ }
+ dataOffset += ones(LOW, low[lowOffset], key);
+
+ int l = lowNibbleAsBitInShort(key);
+ if ((low[lowOffset] & l) != 0) {
+ // We already have a data entry for this byte.
+ T t = data[dataOffset];
+ data[dataOffset] = datum;
+
+ return t;
+ } else {
+ // We need to insert a data entry for this byte.
+ // Extend data array.
+ T[] newData = (T[])new Object[data.length + 1];
+ System.arraycopy(data, 0, newData, 0, dataOffset);
+ System.arraycopy(data, dataOffset, newData, dataOffset + 1, data.length - dataOffset);
+ data = newData;
+ // Set new entry in data array; indicate it in low entry.
+ low[lowOffset] = (short)(low[lowOffset] | l);
+ data[dataOffset] = datum;
+
+ return null;
+ }
+ } else {
+ // We need to insert a new low entry for this hi-nibble.
+ short[] newLow = new short[low.length + 1];
+ int lowOffset = ones(HIGH, high, key);
+ System.arraycopy(low, 0, newLow, 0, lowOffset);
+ System.arraycopy(low, lowOffset, newLow, lowOffset + 1, low.length - lowOffset);
+ high = (short)(high | h);
+ low = newLow;
+
+ return insert(key, datum);
+ }
+ }
+
+ /**
+ * Returns the forwarding pointer corresponding to the key in this trie node.
+ * Note: The result is a *signed* short. +ve pointers correspond to references to Trie-Nodes; -ve
+ * pointers correspond to references to child pointers.
+ * @return Value corresponding to key; or null if not found.
+ */
+ public T lookup(byte key) {
+ if ((high & hiNibbleAsBitInShort(key)) == 0) {
+ // We didn't find the high nibble in this map.
+ return null;
+ }
+
+ // the low-nibble index is the ones left of the key's high-nibble bit in high.
+ int lowOffset = ones(HIGH, high, key);
+
+ // the ptrs index is the sum of the ones in the low-nibble array prior to low-nibble index + the ones
+ // left of the key's low-nibble bit in low[lowOffset].
+ if (lowOffset >= low.length) {
+ return null;
+ }
+ if ((low[lowOffset] & lowNibbleAsBitInShort(key)) == 0) {
+ // The high nibble matched, but the low nibble didn't.
+ return null;
+ }
+
+ int dataOffset = 0;
+ for (int i = 0; i < lowOffset; i++) {
+ dataOffset += ones(low[i]);
+ }
+ dataOffset += ones(LOW, low[lowOffset], key);
+
+ if (dataOffset >= data.length) {
+ return null;
+ } else {
+ // the resulting index is a direct index into the forwarding-pointer array.
+ return data[dataOffset];
+ }
+ }
+
+ public Iterator<T> iterator() {
+ return new Iterator<T>() {
+ private int index = 0;
+ public boolean hasNext() { return index < data.length; }
+ public T next() {
+ if (index < data.length) {
+ return data[index++];
+ } else {
+ throw new NoSuchElementException("Data Iterator in ByteMap");
+ }
+ }
+ public void remove() { throw new UnsupportedOperationException("Data Iterator in ByteMap"); }
+ };
+ }
+
+ public int size() {
+ return data.length;
+ }
+
+ public int memSize() {
+ return 2 + 2 * low.length + 2 * data.length;
+ }
+
+ public int headerSize() {
+ return 2 + 2 * low.length;
+ }
+
+ public void writeHeader(ByteBuffer bb) {
+ bb.putShort(high);
+ for (int i = 0; i < low.length; i++) {
+ bb.putShort(low[i]);
+ }
+ }
+}
Copied: projects/xa2/object-pool/src/trie/ByteMapTest.java (from rev 868, projects/xa2/object-pool/src/ByteMapTest.java)
===================================================================
--- projects/xa2/object-pool/src/trie/ByteMapTest.java (rev 0)
+++ projects/xa2/object-pool/src/trie/ByteMapTest.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -0,0 +1,182 @@
+
+
+/**
+ * Represents a map from a byte to data.
+ * ie. a compressed 256 element array.
+ */
+public class ByteMapTest {
+ public static void main(String[] args) throws Exception {
+ testEmpty();
+ testSequentialImmediate();
+ testSequential();
+ testAlternateImmediate();
+ testAlternate();
+ testAlternateLowNibbleImmediate();
+ testLookupNonExistentLow();
+ testLookupMissedHighMatchedLow();
+ testLookupMatchedHighMissedLow();
+ }
+
+ public static void testEmpty() throws Exception {
+ System.out.println("running testEmpty...");
+ ByteMap<Long> byteMap = new ByteMap<Long>();
+
+ for (int i = 0; i < 0x00000100; i++) {
+ assertNull("Found result for i = " + i + " in empty map", byteMap.lookup((byte)i));
+ }
+ System.out.println("testEmpty Successful");
+ }
+
+ public static void testSequentialImmediate() throws Exception {
+ System.out.println("running testSequentialImmediate...");
+ ByteMap<Long> byteMap = new ByteMap<Long>();
+
+ for (int i = 0; i < 0x00000100; i++) {
+ assertNull("Found double entry for i = " + i + " in sequential map", byteMap.insert((byte)i, new Long(i)));
+ Long l = byteMap.lookup((byte)i);
+ assertEquals("Found mismatching entry for i = " + i + " in sequential map: Received " + l, new Long(i), l);
+ }
+ System.out.println("testSequentialImmediate Successful");
+ }
+
+ public static void testSequential() throws Exception {
+ System.out.println("running testSequential...");
+ ByteMap<Long> byteMap = new ByteMap<Long>();
+
+ for (int i = 0; i < 0x00000100; i++) {
+ assertNull("Found double entry for i = " + i + " in sequential map", byteMap.insert((byte)i, new Long(i)));
+ }
+
+ for (int i = 0; i < 0x00000100; i++) {
+ Long l = byteMap.lookup((byte)i);
+ assertEquals("Found mismatching entry for i = " + i + " in sequential map: Received " + l, new Long(i), l);
+ }
+ System.out.println("testSequential Successful");
+ }
+
+ public static void testAlternateImmediate() throws Exception {
+ System.out.println("running testAlternateImmediate...");
+ ByteMap<Long> byteMap = new ByteMap<Long>();
+
+ for (int i = 0; i < 128; i++) {
+ assertNull("Found double entry for i = " + i + " in alternate map", byteMap.insert((byte)i, new Long(i)));
+ Long l = byteMap.lookup((byte)i);
+ assertEquals("Found mismatching entry for i = " + i + " in alternate map: Received " + l, new Long(i), l);
+
+ int ii = 255 - i;
+ assertNull("Found double entry for i' = " + ii + " in alternate map", byteMap.insert((byte)ii, new Long(ii)));
+ Long ll = byteMap.lookup((byte)ii);
+ assertEquals("Found mismatching entry for i' = " + ii + " in alternate map: Received " + ll, new Long(ii), ll);
+ }
+ System.out.println("testAlternateImmediate Successful");
+ }
+
+ public static void testAlternate() throws Exception {
+ System.out.println("running testAlternateImmediate...");
+ ByteMap<Long> byteMap = new ByteMap<Long>();
+
+ for (int i = 0; i < 128; i++) {
+ assertNull("Found double entry for i = " + i + " in alternate map", byteMap.insert((byte)i, new Long(i)));
+
+ int ii = 255 - i;
+ assertNull("Found double entry for i' = " + ii + " in alternate map", byteMap.insert((byte)ii, new Long(ii)));
+ }
+
+ for (int i = 0; i < 128; i++) {
+ Long l = byteMap.lookup((byte)i);
+ assertEquals("Found mismatching entry for i = " + i + " in alternate map: Received " + l, new Long(i), l);
+
+ int ii = 255 - i;
+ Long ll = byteMap.lookup((byte)ii);
+ assertEquals("Found mismatching entry for i' = " + ii + " in alternate map: Received " + ll, new Long(ii), ll);
+ }
+
+ System.out.println("testAlternateImmediate Successful");
+ }
+
+ public static void testAlternateLowNibbleImmediate() throws Exception {
+ System.out.println("running testAlternateLowNibbleImmediate...");
+ ByteMap<Long> byteMap = new ByteMap<Long>();
+
+ for (int i = 0; i < 64; i++) {
+ assertNull("Found double entry for i = " + i + " in alternate map", byteMap.insert((byte)i, new Long(i)));
+ Long l = byteMap.lookup((byte)i);
+ assertEquals("Found mismatching entry for i = " + i + " in alternate map: Received " + l, new Long(i), l);
+
+ int ii = 127 - i;
+ assertNull("Found double entry for i' = " + ii + " in alternate map", byteMap.insert((byte)ii, new Long(ii)));
+ Long ll = byteMap.lookup((byte)ii);
+ assertEquals("Found mismatching entry for i' = " + ii + " in alternate map: Received " + ll, new Long(ii), ll);
+ }
+ System.out.println("testAlternateLowNibbleImmediate Successful");
+ }
+
+ public static void testLookupNonExistentLow() throws Exception {
+ System.out.println("running testLookupNonExistentLow...");
+ ByteMap<Long> byteMap = new ByteMap<Long>();
+
+ int i = 0x061;
+ assertNull("Found double entry for i = " + i, byteMap.insert((byte)i, new Long(i)));
+ Long l = byteMap.lookup((byte)i);
+ assertEquals("Found mismatching entry for i = " + i + " Received " + l, new Long(i), l);
+
+ i = 0x067;
+ assertNull("Found entry for uninserted key i = " + i, byteMap.lookup((byte)i));
+
+ System.out.println("testLookupNonExistentLow Successful");
+ }
+
+ public static void testLookupMissedHighMatchedLow() throws Exception {
+ System.out.println("running testLookupMissedHighMatchedLow...");
+ ByteMap<Long> byteMap = new ByteMap<Long>();
+
+ int i = 0x017;
+ assertNull("Found double entry for i = " + i, byteMap.insert((byte)i, new Long(i)));
+ Long l = byteMap.lookup((byte)i);
+ assertEquals("Found mismatching entry for i = " + i + " Received " + l, new Long(i), l);
+
+ i = 0x007;
+ assertNull("Found entry for uninserted key i = " + i, byteMap.lookup((byte)i));
+
+ System.out.println("testLookupMissedHighMatchedLow Successful");
+ }
+
+ public static void testLookupMatchedHighMissedLow() throws Exception {
+ System.out.println("running testLookupMatchedHighMissedLow...");
+ ByteMap<Long> byteMap = new ByteMap<Long>();
+
+ int i = 0x007;
+ assertNull("Found double entry for i = " + i, byteMap.insert((byte)i, new Long(i)));
+ Long l = byteMap.lookup((byte)i);
+ assertEquals("Found mismatching entry for i = " + i + " Received " + l, new Long(i), l);
+
+ i = 0x006;
+ assertNull("Found entry for uninserted key i = " + i, byteMap.lookup((byte)i));
+
+ System.out.println("testLookupMatchedHighMissedLow Successful");
+ }
+
+ public static void assertNull(String str, Object obj) throws Exception {
+ if (obj != null) {
+ throw new IllegalStateException(str);
+ }
+ }
+
+ public static void assertNotNull(String str, Object obj) throws Exception {
+ if (obj == null) {
+ throw new IllegalStateException(str);
+ }
+ }
+
+ public static void assertTrue(String str, boolean b) throws Exception {
+ if (!b) {
+ throw new IllegalStateException(str);
+ }
+ }
+
+ public static void assertEquals(String str, Object lhs, Object rhs) throws Exception {
+ if (lhs == null && rhs == null) return;
+ if (lhs == null || rhs == null) throw new IllegalStateException(str);
+ if (!lhs.equals(rhs)) throw new IllegalStateException(str);
+ }
+}
Copied: projects/xa2/object-pool/src/trie/CompBlockTrie.java (from rev 868, projects/xa2/object-pool/src/CompBlockTrie.java)
===================================================================
--- projects/xa2/object-pool/src/trie/CompBlockTrie.java (rev 0)
+++ projects/xa2/object-pool/src/trie/CompBlockTrie.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -0,0 +1,342 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 22nd April 2008
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Extends CompMemTrie to provide IO functionality.
+ *
+ * Guarantees Trie will fit within a single block, refuses to accept insert() if it would make the
+ * serialized size of the node greater than the blocksize.
+ */
+public class CompBlockTrie extends CompMemTrie {
+ @SuppressWarnings("serial")
+ public static class InsufficientSpace extends Exception
+ { public InsufficientSpace(String s) { super(s); } };
+
+ static final byte TYPE_BRANCH_TERM = 0x01; // Indicates a trie branch with a termination entry..
+ static final byte TYPE_BRANCH_NOTERM = 0x02; // Indicates a trie branch without a termination entry..
+ static final byte TYPE_LEAF = 0x03; // Indicates a trie branch without a termination entry..
+ static final byte TYPE_ROOT_LOC = 0x04; // Indicates the following short indicates a branch.
+ public final static int MAGIC = 0x00020001;
+ public final static int INVERT_MAGIC = 0x01000200;
+
+ // Calculated as the worst case size per entry.
+ // Assumes a full leaf and branch per entry.
+ // Block: 2-byte type-header
+ // 4-byte offset
+ // 2-byte ByteMap-High
+ // 4-byte ByteMap-Lows (2-entries).
+ // 4-byte Children Ptrs.
+ // Leaf: 2-byte type-header
+ // 8-byte offset.
+ // 26-bytes Total.
+ // Best case of course is 10-bytes leaf + 2-bytes children in branch.
+ private static short WORST_CASE_ENTRY_SIZE = 26;
+//private static short BEST_CASE_ENTRY_SIZE = 12;
+
+ /** Worst-case space left in trie measured in entries */
+ private short space;
+
+ /**
+ * The blocksize for this Block-Trie.
+ * Currently the maximum block size supported is 32K, which is worst-case:
+ * 1260 entries covered by a single I0 index in 32KB.
+ * 1.59 million entries by a single I1 index in 40MB.
+ * 2.00 billion entries by a single I2 index in 49GB.
+ * 2.52 trillion entries by a single I3 index in 60TB.
+ */
+ private int blockSize;
+
+ public CompBlockTrie(int blockSize) {
+ super();
+ assert blockSize > 1024;
+ assert blockSize <= 32*1024;
+ this.blockSize = blockSize;
+ this.space = (short)((blockSize - 8) / WORST_CASE_ENTRY_SIZE); // -8 leaves room for header info.
+ }
+
+ public CompBlockTrie(ByteBuffer index, FileChannel data) throws IOException {
+ super();
+ index.rewind(); // Note sure if I should doing this here or delegating to the caller.
+ int magic = index.getInt();
+ if (magic != MAGIC) {
+ if (magic == INVERT_MAGIC) {
+ index.order(index.order().equals(ByteOrder.BIG_ENDIAN) ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
+ } else {
+ throw new IllegalArgumentException("Bad magic in index buffer: " + magic + ", MAGIC=" + MAGIC);
+ }
+ }
+
+ // I should be able to replace this with a stack.
+ // The child->parent relationship is implicit in the ordering of the nodes in the file.
+ // The only thing we need to know is when to stop reading nodes and that is provided by rootLocation.
+ // If treated as a stack once we have read the root the stack should contain only a single element - the
+ // root node.
+ // For now I'm hoping the HashMap will help with debugging - but it is wasteful of memory in the long run.
+ HashMap<Short, TrieNode> nodeMap = new HashMap<Short, TrieNode>();
+
+ short rootLocation = -1;
+ while (rootLocation == -1) {
+ short location = (short)index.position();
+ byte type = index.get();
+ switch (type) {
+ case TYPE_BRANCH_TERM:
+ nodeMap.put(location, readTrieBranch(index, true, nodeMap));
+ break;
+ case TYPE_BRANCH_NOTERM:
+ nodeMap.put(location, readTrieBranch(index, false, nodeMap));
+ break;
+ case TYPE_LEAF:
+ nodeMap.put(location, readTrieLeaf(index, data));
+ break;
+ case TYPE_ROOT_LOC:
+ index.get();
+ rootLocation = index.getShort();
+ break;
+ default:
+ throw new IllegalArgumentException("Invalid trie-node");
+ }
+ }
+
+ root = nodeMap.get(rootLocation);
+
+ this.space = 0; // If this gets modified it will be recalculated automaticly then.
+ }
+
+ public boolean insert(byte[] key, long value) {
+ // Space was calculated on the basis of worst-case - if we were worst case we have now filled the
+ // trie-block, but if we haven't we should recalculate the space available.
+ if (space == 0) {
+ int size = totalIndexSize(root);
+ space = (short)((blockSize - size - 8) / WORST_CASE_ENTRY_SIZE);
+ if (space < 0) {
+ throw new IllegalStateException("Overfilled CompBlockTrie");
+ } else if (space == 0) {
+ return false;
+ }
+ }
+
+ super.insert(key, value);
+ space--;
+
+ return true;
+ }
+
+ public void write(ByteBuffer index, FileChannel data) throws InsufficientSpace, IOException {
+ if (index.remaining() < blockSize) {
+ throw new InsufficientSpace("Insufficient space remaining in buffer to write block");
+ }
+
+ // Temporarally set the limit to blocksize.
+ int limit = index.limit();
+ index.limit(index.position() + blockSize);
+
+ if (root == null) {
+ throw new IllegalStateException("Attempt to write empty trie");
+ }
+
+ int indexreq = (root == null) ? 8 : totalIndexSize(root) + 4 + 4; // + sizeof(MAGIC) + sizeof(root_loc_type + root_loc)
+ if (indexreq > index.remaining()) {
+ System.err.printf("Index-Req:%d ; remaining:%d ; capacity:%d ; limit:%d ; position:%d\n", indexreq,
+ index.remaining(), index.capacity(), index.limit(), index.position());
+ throw new InsufficientSpace("Attempt to write trie index to bytebuffer with insufficient space");
+ }
+ if (indexreq > 0x00010000) {
+ throw new InsufficientSpace("Attempt to write trie index larger than 64K");
+ }
+
+ HashMap<TrieNode, Short> locationMap = new HashMap<TrieNode, Short>();
+
+ index.putInt(MAGIC);
+ if (root != null) {
+ writeTrieNode(root, index, data, locationMap);
+ }
+ index.put(TYPE_ROOT_LOC);
+ index.put((byte)0xFF);
+ if (root != null) {
+ index.putShort(locationMap.get(root));
+ } else {
+ index.putShort((short)0x0000);
+ }
+
+ // Set the position to the block size to ensure whole blocks are written.
+ index.position(index.limit());
+ // Reset the limit to its initial value.
+ index.limit(limit);
+ }
+
+ private static Map<Class, TrieNodeWriter> writerMap = new HashMap<Class, TrieNodeWriter>();
+ static {
+ writerMap.put(TrieBranch.class, new TrieBranchWriter());
+ writerMap.put(TrieLeaf.class, new TrieLeafWriter());
+ }
+
+ protected static void writeTrieNode(TrieNode node, ByteBuffer index, FileChannel data,
+ Map<TrieNode, Short> locationMap) throws IOException {
+ writerMap.get(node.getClass()).write(node, index, data, locationMap);
+ }
+
+ private static interface TrieNodeWriter {
+ /**
+ * This method *must* update location with the position in the index buffer it was written to.
+ * This allows parents to obtain a physical reference to this node in their write methods.
+ * The index buffer is the target of the Trie index itself.
+ * The data buffer is the target of the byte[]/value pairs stored in the leaves. We want to keep these
+ * separate for caching purposes.
+ */
+ public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException;
+ }
+
+ private static class TrieBranchWriter implements TrieNodeWriter {
+ public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException {
+ TrieBranch branch = (TrieBranch)node;
+ if (branch.term != null) {
+ writeTrieNode(branch.term, index, data, locationMap);
+ }
+ for (TrieNode child : branch.children) {
+ writeTrieNode(child, index, data, locationMap);
+ }
+
+ locationMap.put(branch, (short)index.position());
+
+ index.put((branch.term == null) ? TYPE_BRANCH_NOTERM : TYPE_BRANCH_TERM);
+ index.put((byte)0xFF); // Padding to keep things short-aligned.
+ index.putInt(branch.offset);
+ if (branch.term != null) {
+ index.putShort(locationMap.get(branch.term));
+ }
+ branch.children.writeHeader(index);
+ for (TrieNode child : branch.children) {
+ index.putShort(locationMap.get(child));
+ }
+ }
+ }
+
+ private static class TrieLeafWriter implements TrieNodeWriter {
+ public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException {
+ TrieLeaf leaf = (TrieLeaf)node;
+
+ ByteBuffer dataBuf = ByteBuffer.allocateDirect(8 + 4 + leaf.key.length);
+ long keyLocation = data.position();
+ dataBuf.putLong(leaf.value);
+ dataBuf.putInt(leaf.key.length);
+ dataBuf.put(leaf.key);
+ data.write((ByteBuffer)dataBuf.flip());
+
+ locationMap.put(leaf, (short)index.position());
+ index.put(TYPE_LEAF);
+ index.put((byte)0xFF); // Pad to 16-bit aligned.
+ index.putLong(keyLocation);
+ }
+ }
+
+ private TrieBranch readTrieBranch(ByteBuffer index, boolean hasTerm, Map<Short, TrieNode> nodeMap) throws IOException {
+ TrieBranch branch = new TrieBranch();
+
+ index.get(); // skip padding.
+ branch.offset = index.getInt();
+ if (hasTerm) {
+ branch.term = (TrieLeaf)nodeMap.get(index.getShort());
+ }
+ branch.children = new ByteMap<TrieNode>(index, nodeMap);
+ if (branch.term == null) {
+ // This two-step process is required to avoid the cast from Object[] to TrieNode[] inserted by java
+ // generics which triggers the obvious ClassCastException. By extracting explicitly to an Object[]
+ // first; then indexing to obtain an Object; then casting the Object to TrieNode the compiler doesn't
+ // insert the erroneous cast.
+ Object[] d = branch.children.data;
+ branch.aLeaf = ((TrieNode)(d[0])).aLeaf;
+ } else {
+ branch.aLeaf = branch.term;
+ }
+
+ return branch;
+ }
+
+ private TrieLeaf readTrieLeaf(ByteBuffer index, FileChannel data) throws IOException {
+ index.get();
+ long keyLocation = index.getLong();
+ data.position(keyLocation);
+ // FIXME: I need to cache these to reduce the allocation cost which is showing up under profiling
+ ByteBuffer bb = ByteBuffer.allocateDirect(8+4); // sizeof(value) + sizof(length).
+ data.read(bb);
+ bb.flip();
+ long value = bb.getLong();
+ int length = bb.getInt();
+ byte[] key = new byte[length]; // FIXME: I eventually need to replace this with a lazy load.
+ data.read(ByteBuffer.wrap(key));
+
+ return new TrieLeaf(key, value, false);
+ }
+
+ private static Map<Class, TrieNodeSizer> sizerMap = new HashMap<Class, TrieNodeSizer>();
+ static {
+ sizerMap.put(TrieBranch.class, new TrieBranchSizer());
+ sizerMap.put(TrieLeaf.class, new TrieLeafSizer());
+ }
+
+ protected static int totalIndexSize(TrieNode node) {
+ return sizerMap.get(node.getClass()).indexSize(node);
+ }
+
+ protected static int totalDataSize(TrieNode node) {
+ return sizerMap.get(node.getClass()).dataSize(node);
+ }
+
+ private static interface TrieNodeSizer {
+ public int indexSize(TrieNode node);
+ public int dataSize(TrieNode node);
+ }
+
+ private static class TrieBranchSizer implements TrieNodeSizer {
+ protected int memSize(TrieBranch branch) {
+ return 4 + 1 + 1 + branch.children.memSize() + (branch.term == null ? 0 : 2);
+ }
+
+ public int indexSize(TrieNode node) {
+ TrieBranch branch = (TrieBranch)node;
+ int total = memSize(branch);
+ if (branch.term != null) {
+ total += totalIndexSize(branch.term);
+ }
+ for (TrieNode child : branch.children) {
+ total += totalIndexSize(child);
+ }
+
+ return total;
+ }
+
+ public int dataSize(TrieNode node) {
+ TrieBranch branch = (TrieBranch)node;
+ int total = 0;
+ if (branch.term != null) {
+ total += totalDataSize(branch.term);
+ }
+ for (TrieNode child : branch.children) {
+ total += totalDataSize(child);
+ }
+
+ return total;
+ }
+ }
+
+ private static class TrieLeafSizer implements TrieNodeSizer {
+ public int indexSize(TrieNode node) {
+ return 1 + 1 + 8;
+ }
+
+ public int dataSize(TrieNode node) {
+ TrieLeaf leaf = (TrieLeaf)node;
+ return 8 + 4 + leaf.key.length;
+ }
+ }
+}
Copied: projects/xa2/object-pool/src/trie/CompBlockTrieTest.java (from rev 868, projects/xa2/object-pool/src/CompBlockTrieTest.java)
===================================================================
--- projects/xa2/object-pool/src/trie/CompBlockTrieTest.java (rev 0)
+++ projects/xa2/object-pool/src/trie/CompBlockTrieTest.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -0,0 +1,462 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 9th April 2008
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+/**
+ * Basic tests of the CompBlockTrie.
+ */
+public class CompBlockTrieTest {
+ public static void main(String[] args) throws Exception {
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 1);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 2);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 3);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 4);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 5);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 6);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 7);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 8);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 9);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 10);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), Integer.MAX_VALUE);
+ testWithLimitedFile(new File("../scratch/connectives.uniq"), Integer.MAX_VALUE);
+ testWithLimitedFile(new File("../scratch/web2a.uniq"), Integer.MAX_VALUE);
+ testWithLimitedFile(new File("../scratch/web2.uniq"), Integer.MAX_VALUE);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
+// testLoadWithRandomFile(new File("/dev/urandom"));
+ }
+
+ private static final int BLOCK_SIZE = 32 * 1024;
+ public static void testWithLimitedFile(File file, int limit) throws Exception {
+ Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
+
+ FileChannel indexFile = new FileOutputStream("tmp/sp_idx").getChannel();
+ FileChannel dataFile = new FileOutputStream("tmp/sp_dat").getChannel();
+ ByteBuffer indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
+
+ ArrayList<CompBlockTrie> tries = new ArrayList<CompBlockTrie>();
+ CompBlockTrie trie = new CompBlockTrie(BLOCK_SIZE);
+
+ System.out.println("Inserting " + limit + " lines from " + file);
+
+ BufferedReader names = new BufferedReader(new FileReader(file));
+ long n = 0xDEADBEEF00000000L;
+ String name = names.readLine();
+ long _startInsert = System.currentTimeMillis();
+ for (int i = 0; i < limit; i++) {
+ // The use of the double loop means we pay the cost of setting up the exception handler only once per
+ // block, not once per entry.
+ if (name == null) {
+ break;
+ }
+ if (trie.insert(name.getBytes(), n)) {
+ // Note: exception thrown here if trie is full. So name will remain valid when we reenter loop with
+ // a new trie.
+ namesMap.put(name.getBytes(), n);
+ name = names.readLine();
+ n++;
+ } else {
+ trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
+ indexFile.write((ByteBuffer)indexBuffer.flip());
+ tries.add(trie);
+ trie = new CompBlockTrie(BLOCK_SIZE);
+ }
+ }
+ trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
+ indexFile.write((ByteBuffer)indexBuffer.flip());
+ tries.add(trie);
+
+ indexFile.force(true);
+ dataFile.force(true);
+ indexFile.close();
+ dataFile.close();
+
+ long _endInsert = System.currentTimeMillis();
+ names.close();
+
+ indexFile = new FileInputStream("tmp/sp_idx").getChannel();
+ dataFile = new FileInputStream("tmp/sp_dat").getChannel();
+ indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
+ ArrayList<CompBlockTrie> readTries = new ArrayList<CompBlockTrie>();
+
+ long _startRead = System.currentTimeMillis();
+
+ while (indexFile.read((ByteBuffer)indexBuffer.clear()) != -1) {
+ indexBuffer.flip();
+ CompBlockTrie ctrie = new CompBlockTrie(indexBuffer, dataFile);
+ readTries.add(ctrie);
+ }
+
+ long _endRead = System.currentTimeMillis();
+
+ System.out.println("Checking lines from " + file);
+ long _startLookup = System.currentTimeMillis();
+
+ boolean[] valid = new boolean[1];
+
+ for (byte[] key : namesMap.keySet()) {
+ boolean found = false;
+ for (CompBlockTrie t : tries) {
+ long value = t.lookup(key, valid);
+ if (valid[0] && value == namesMap.get(key)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ throw new IllegalStateException("Trie doesn't match Map");
+ }
+ found = false;
+ for (CompBlockTrie t : readTries) {
+ long value = t.lookup(key, valid);
+ if (valid[0] && value == namesMap.get(key)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ throw new IllegalStateException("Read-Trie doesn't match Map");
+ }
+ }
+
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) +
+ " read:" + (_endRead - _startRead) +
+ " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static class Bytes {
+ public final byte[] bytes;
+
+ public Bytes(byte[] bytes) {
+ this.bytes = new byte[bytes.length];
+ System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+ }
+
+ public byte[] getBytes() {
+ return bytes;
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof Bytes) {
+ return Arrays.equals(bytes, ((Bytes)o).bytes);
+ } else {
+ return false;
+ }
+ }
+
+ public int hashCode() {
+ int hc = Arrays.hashCode(bytes);
+ return hc;
+ }
+
+ public String toString() {
+ return Arrays.toString(bytes);
+ }
+ }
+
+ public static void testWithRandomFileTuned(File file, int max, int small) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ CompBlockTrie namesTrie = new CompBlockTrie(32*1024);
+
+ System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+ FileChannel indexFile = new FileOutputStream("tmp/sp_idx").getChannel();
+ FileChannel dataFile = new FileOutputStream("tmp/sp_dat").getChannel();
+ ByteBuffer indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
+
+ ArrayList<CompBlockTrie> tries = new ArrayList<CompBlockTrie>();
+ CompBlockTrie trie = new CompBlockTrie(BLOCK_SIZE);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nL = 0;
+ int nS = 0;
+
+ while (n < max) {
+ if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _si = System.currentTimeMillis();
+
+ if (!trie.insert(key, n)) {
+ trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
+ indexFile.write((ByteBuffer)indexBuffer.flip());
+ tries.add(trie);
+ trie = new CompBlockTrie(BLOCK_SIZE);
+ trie.insert(key, n);
+ }
+
+ n++;
+
+ _aggregateL += System.currentTimeMillis() - _si;
+ _avlL += key.length;
+ nL++;
+ }
+
+ for (int i = 0; i < small; i++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+
+ if (!trie.insert(key, n)) {
+ trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
+ indexFile.write((ByteBuffer)indexBuffer.flip());
+ tries.add(trie);
+ trie = new CompBlockTrie(BLOCK_SIZE);
+ trie.insert(key, n);
+ }
+ n++;
+
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+ }
+ }
+ }
+
+ trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
+ indexFile.write((ByteBuffer)indexBuffer.flip());
+ tries.add(trie);
+
+ indexFile.force(true);
+ dataFile.force(true);
+ indexFile.close();
+ dataFile.close();
+
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.println(chan.position() + " bytes read from " + file);
+ chan.close();
+
+ indexFile = new FileInputStream("tmp/sp_idx").getChannel();
+ dataFile = new FileInputStream("tmp/sp_dat").getChannel();
+ indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
+ ArrayList<CompBlockTrie> readTries = new ArrayList<CompBlockTrie>();
+
+ long _startRead = System.currentTimeMillis();
+
+ while (indexFile.read((ByteBuffer)indexBuffer.clear()) != -1) {
+ indexBuffer.flip();
+ CompBlockTrie ctrie = new CompBlockTrie(indexBuffer, dataFile);
+ readTries.add(ctrie);
+ }
+
+ long _endRead = System.currentTimeMillis();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+
+ boolean[] valid = new boolean[1];
+
+ for (Bytes keyBytes : namesMap.keySet()) {
+ int i = 0;
+ boolean found = false;
+ for (CompBlockTrie t : readTries) {
+ long value = t.lookup(keyBytes.getBytes(), valid);
+ if (valid[0] && value == namesMap.get(keyBytes)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i
+ + " key:" + keyBytes + " value: " + namesMap.get(keyBytes));
+ }
+ i++;
+ }
+
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) +
+ " read:" + (_endRead - _startRead) +
+ " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static void testLoadWithRandomFile(File file) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ CompMemTrie namesTrie = new CompMemTrie();
+
+ System.out.println("Inserting random bytes from " + file);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ int nL = 0;
+ long _aggregateLL = 0;
+ long _avlLL = 0;
+ int nLL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nS = 0;
+ long _aggregateSS = 0;
+ long _avlSS = 0;
+ int nSS = 0;
+
+ for (int i = 0; i < 100; i++) {
+ if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[buffer.getInt() & 0x0007FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sll = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateLL += System.currentTimeMillis() - _sll;
+ _avlLL += key.length;
+ nLL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int j = 0; j < 10; j++) {
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sl = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateL += System.currentTimeMillis() - _sl;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int k = 0; k < 10; k++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int ii = 0; ii < 1000; ii++) {
+ key = new byte[((int)buffer.get()) & 0x0000001F];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _sss;
+ _avlSS += key.length;
+ nSS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
+ System.out.printf(" %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+}
Copied: projects/xa2/object-pool/src/trie/CompMemTrie.java (from rev 868, projects/xa2/object-pool/src/CompMemTrie.java)
===================================================================
--- projects/xa2/object-pool/src/trie/CompMemTrie.java (rev 0)
+++ projects/xa2/object-pool/src/trie/CompMemTrie.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -0,0 +1,255 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 15th April 2008
+ */
+
+import java.util.Arrays;
+
+/**
+ * Implements an in-memory trie - uses ByteMaps to implement trie nodes.
+ */
+public class CompMemTrie {
+ @SuppressWarnings("serial")
+ public static class NotFound extends Exception {};
+ private static NotFound notfound = new NotFound();
+
+ protected TrieNode root;
+ public CompMemTrie() {
+ this.root = null;
+ }
+
+ public boolean insert(byte[] key, long value) {
+ if (root == null) {
+ root = new TrieLeaf(key, value);
+ } else {
+ if (!root.insert(key, value)) {
+ root = new TrieBranch(root, key, value);
+ }
+ }
+
+ return true;
+ }
+
+ public long lookup(byte[] key) throws NotFound {
+ boolean[] valid = new boolean[1];
+ long result = lookup(key, valid);
+ if (valid[0]) {
+ return result;
+ } else {
+ throw notfound;
+ }
+ }
+
+
+ public long lookup(byte[] key, boolean[] valid) {
+ if (root == null) {
+ valid[0] = false;
+ return 0;
+ } else {
+ return root.lookup(key, valid);
+ }
+ }
+
+ protected abstract static class TrieNode {
+ protected TrieLeaf aLeaf;
+
+ /**
+ * @return false if we need to insert key above this node.
+ */
+ public boolean insert(byte[] key, long value) {
+ return insert(new TrieLeaf(key, value), 0);
+ }
+
+ public long lookup(byte[] key, boolean[] valid) {
+ return lookup(key, 0, valid);
+ }
+
+ protected abstract boolean insert(TrieLeaf node, int parentLcd);
+ protected abstract long lookup(byte[] key, int parentLcd, boolean[] valid);
+
+ protected static boolean regionMatches(byte[] lhs, int lhsOff, byte[] rhs, int rhsOff, int len) {
+ if (lhsOff < 0 || rhsOff < 0) {
+ return false;
+ }
+ if (lhs.length < lhsOff + len || rhs.length < rhsOff + len) {
+ return false;
+ }
+ for (int i = 0; i < len; i++) {
+ if (lhs[lhsOff + i] != rhs[rhsOff + i]) return false;
+ }
+
+ return true;
+ }
+ }
+
+ protected static class TrieBranch extends TrieNode {
+ protected int offset;
+ protected ByteMap<TrieNode> children;
+ protected TrieLeaf term;
+
+ protected TrieBranch() {}
+
+ public TrieBranch(byte[] key, long value) {
+ this.children = new ByteMap<TrieNode>();
+ this.term = new TrieLeaf(key, value);
+ this.offset = term.key.length;
+ this.aLeaf = this.term;
+ }
+
+ public TrieBranch(TrieNode oldRoot, byte[] key, long value) {
+ this(oldRoot, new TrieLeaf(key, value));
+ }
+
+ private TrieBranch(TrieNode oldNode, TrieLeaf newNode) {
+ super();
+ assert oldNode != null;
+ assert newNode != null;
+
+ offset = 0;
+ children = new ByteMap<TrieNode>();
+ term = null;
+
+ // as every child node shares a common lcp, any child will suffice when preparing the new
+ // lcp/offset of the new node.
+ aLeaf = newNode;
+
+ byte[] lhs = oldNode.aLeaf.key;
+ byte[] rhs = newNode.key;
+
+ int i = 0;
+ while (i < lhs.length && i < rhs.length) {
+ if (lhs[i] != rhs[i]) {
+ offset = i;
+ children.insert(lhs[i], oldNode);
+ children.insert(rhs[i], newNode);
+
+ return; // Escape here.
+ }
+ i++;
+ }
+
+ if (i < lhs.length) {
+ offset = i;
+ children.insert(lhs[i], oldNode);
+ term = newNode;
+ } else if (i < rhs.length) {
+ if (oldNode instanceof TrieLeaf) {
+ offset = i;
+ children.insert(rhs[i], newNode);
+ term = (TrieLeaf)oldNode;
+ } else {
+ throw new IllegalStateException("Attempt to create new branch node with leaf > branch.");
+ }
+ } else {
+ throw new IllegalStateException("Attempt to create new branch node with equal children");
+ }
+ }
+
+ protected boolean insert(TrieLeaf node, int parentLcp) {
+ if (!regionMatches(aLeaf.key, parentLcp, node.key, parentLcp, offset - parentLcp)) {
+ return false;
+ } else {
+ // new node matches the lcp of this node.
+ if (node.key.length == offset) {
+ // new node is expected to terminate here.
+ if (term == null) {
+ term = node;
+ } else {
+ return term.insert(node, offset);
+ }
+ } else {
+ // new node is expected to terminate in one of this nodes children.
+ TrieNode child = children.lookup(node.key[offset]);
+ if (child == null) {
+ // this is the first node to be inserted on this branching key.
+ children.insert(node.key[offset], node);
+ } else {
+ // there is an existing child node branching on this branching key.
+ if (!child.insert(node, offset)) {
+ children.insert(node.key[offset], new TrieBranch(child, node));
+ }
+ }
+ }
+
+ return true;
+ }
+ }
+
+ protected long lookup(byte[] key, int parentLcd, boolean[] valid) {
+ if (!regionMatches(aLeaf.key, parentLcd, key, parentLcd, offset - parentLcd)) {
+ //FIXME: I need to migrate this to the end of the search to avoid needing access to the key at each level.
+ valid[0] = false;
+ return 0;
+ } else {
+ // new node matches the lcp of this node.
+ TrieNode child;
+ if (key.length == offset) {
+ // new node is expected to terminate here.
+ if (term != null) {
+ valid[0] = true;
+ return term.value;
+ } else {
+ valid[0] = false;
+ return 0;
+ }
+ } else {
+ // new node is expected to terminate in one of this nodes children.
+ child = children.lookup(key[offset]);
+ if (child != null) {
+ return child.lookup(key, offset, valid);
+ } else {
+ valid[0] = false;
+ return 0;
+ }
+ }
+ }
+ }
+
+ public String toString() {
+ return "Trie-BRANCH[" + (term != null) + " on " + offset + " with " + children.size() + " and aLeaf[" + aLeaf + "]";
+ }
+ }
+
+ protected static class TrieLeaf extends TrieNode {
+ final byte[] key;
+ final long value;
+
+ protected TrieLeaf(byte[] key, long value, boolean copyKey) {
+ if (copyKey) {
+ this.key = new byte[key.length];
+ System.arraycopy(key, 0, this.key, 0, key.length);
+ } else {
+ this.key = key;
+ }
+ this.value = value;
+ this.aLeaf = this;
+ }
+
+ public TrieLeaf(byte[] key, long value) {
+ this(key, value, true);
+ }
+
+ protected boolean insert(TrieLeaf node, int parentLcp) {
+ if (key.length != node.key.length) {
+ return false;
+ } else if (!regionMatches(key, parentLcp, node.key, parentLcp, key.length - parentLcp)) {
+ return false;
+ } else if (value == node.value) {
+ return true; // Duplicate key/value pair.
+ } else {
+ throw new IllegalArgumentException("Attempt to insert multiple values for same key");
+ }
+ }
+
+ protected long lookup(byte[] key, int parentLcd, boolean[] valid) {
+ assert Arrays.equals(this.key, key);
+ valid[0] = true;
+ return value;
+ }
+
+ public String toString() {
+ return "Trie-LEAF: " + Arrays.toString(key) + " -> " + value;
+ }
+ }
+}
Copied: projects/xa2/object-pool/src/trie/CompMemTrieTest.java (from rev 868, projects/xa2/object-pool/src/CompMemTrieTest.java)
===================================================================
--- projects/xa2/object-pool/src/trie/CompMemTrieTest.java (rev 0)
+++ projects/xa2/object-pool/src/trie/CompMemTrieTest.java 2008-05-01 04:45:55 UTC (rev 872)
@@ -0,0 +1,409 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 9th April 2008
+ */
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+/**
+ * Basic tests of the CompMemTrie.
+ */
+public class CompMemTrieTest {
+ public static void main(String[] args) throws Exception {
+ testWithFile(new File("../scratch/propernames.uniq"));
+ testWithFile(new File("../scratch/connectives.uniq"));
+ testWithFile(new File("../scratch/web2a.uniq"));
+ testWithFile(new File("../scratch/web2.uniq"));
+ testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
+ testWithRandomFile(new File("/dev/urandom"));
+// testLoadWithRandomFile(new File("/dev/urandom"));
+ }
+
+ public static void testWithFile(File file) throws Exception {
+ Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
+ CompMemTrie namesTrie = new CompMemTrie();
+
+ System.out.println("Inserting lines from " + file);
+ BufferedReader names = new BufferedReader(new FileReader(file));
+ long n = 0;
+ String name = names.readLine();
+ long _startInsert = System.currentTimeMillis();
+ while (name != null) {
+ namesMap.put(name.getBytes(), n);
+ namesTrie.insert(name.getBytes(), n);
+ name = names.readLine();
+ n++;
+ }
+ long _endInsert = System.currentTimeMillis();
+ names.close();
+
+ System.out.println("Checking " + n + " lines from " + file);
+ long _startLookup = System.currentTimeMillis();
+ for (byte[] key : namesMap.keySet()) {
+ if (namesTrie.lookup(key) != namesMap.get(key)) {
+ throw new IllegalStateException("Trie doesn't match Map");
+ }
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static class Bytes {
+ public final byte[] bytes;
+
+ public Bytes(byte[] bytes) {
+ this.bytes = new byte[bytes.length];
+ System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof Bytes) {
+ return Arrays.equals(bytes, ((Bytes)o).bytes);
+ } else {
+ return false;
+ }
+ }
+
+ public int hashCode() {
+ int hc = Arrays.hashCode(bytes);
+ return hc;
+ }
+ }
+
+ public static void testWithRandomFile(File file) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ CompMemTrie namesTrie = new CompMemTrie();
+
+ System.out.println("Inserting random bytes from " + file);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nL = 0;
+ int nS = 0;
+
+ while (n < 5000) {
+ if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (namesMap.containsKey(keyBytes)) {
+// namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _si = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateL += System.currentTimeMillis() - _si;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int i = 0; i < 10; i++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+// namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.println(chan.position() + " bytes read from " + file);
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static void testWithRandomFileTuned(File file, int max, int small) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ CompMemTrie namesTrie = new CompMemTrie();
+
+ System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nL = 0;
+ int nS = 0;
+
+ while (n < max) {
+ if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (namesMap.containsKey(keyBytes)) {
+// namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _si = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateL += System.currentTimeMillis() - _si;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int i = 0; i < small; i++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+// namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.println(chan.position() + " bytes read from " + file);
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static void testLoadWithRandomFile(File file) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ CompMemTrie namesTrie = new CompMemTrie();
+
+ System.out.println("Inserting random bytes from " + file);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ int nL = 0;
+ long _aggregateLL = 0;
+ long _avlLL = 0;
+ int nLL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nS = 0;
+ long _aggregateSS = 0;
+ long _avlSS = 0;
+ int nSS = 0;
+
+ for (int i = 0; i < 100; i++) {
+ if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[buffer.getInt() & 0x0007FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sll = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateLL += System.currentTimeMillis() - _sll;
+ _avlLL += key.length;
+ nLL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int j = 0; j < 10; j++) {
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sl = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateL += System.currentTimeMillis() - _sl;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int k = 0; k < 10; k++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int ii = 0; ii < 1000; ii++) {
+ key = new byte[((int)buffer.get()) & 0x0000001F];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _sss;
+ _avlSS += key.length;
+ nSS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
+ System.out.printf(" %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+}
Copied: projects/xa2/object-pool/src/trie/OnesTable.dat (from rev 868, projects/xa2/object-pool/src/OnesTable.dat)
===================================================================
(Binary files differ)
More information about the Mulgara-svn
mailing list