[Mulgara-svn] r899 - in projects/xa2/object-pool/src: . data gen trie
andrae at mulgara.org
andrae at mulgara.org
Tue May 6 06:39:35 UTC 2008
Author: andrae
Date: 2008-05-05 23:39:34 -0700 (Mon, 05 May 2008)
New Revision: 899
Added:
projects/xa2/object-pool/src/data/
projects/xa2/object-pool/src/data/DataEntry.java
projects/xa2/object-pool/src/data/DataFile.java
Removed:
projects/xa2/object-pool/src/gen/OnesTable.java
Modified:
projects/xa2/object-pool/src/trie/DiskTrie.java
projects/xa2/object-pool/src/trie/MemoryTrie.java
Log:
Now we start introducing caching to the entries within the object pool itself.
Specifically we start handling the possibility that an entry may require multiple blocks and that a block may
contain multiple entries.
This is compounded by the fact that a compressed trie will often only need to access a single byte from a
given entry, so we should only load the head block and the block containing that byte if we can.
Added: projects/xa2/object-pool/src/data/DataEntry.java
===================================================================
--- projects/xa2/object-pool/src/data/DataEntry.java (rev 0)
+++ projects/xa2/object-pool/src/data/DataEntry.java 2008-05-06 06:39:34 UTC (rev 899)
@@ -0,0 +1,76 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 6th May 2008
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Abstracts the concept of a data file entry.
+ */
+public abstract class DataEntry {
+ private long value;
+
+ public abstract int size();
+ public abstract int totalSize() {
+ return 8 + 4 + size(); // VALUE + LENGTH + DATA
+ }
+ public abstract void write(FileChannel chan) throws IOException;
+
+ protected class DiskBackedDataEntry extends DataEntry {
+ private int length;
+ private ByteBuffer entry;
+ private ByteBuffer data;
+
+ // FIXME: Need to replace this with a background populated ConcurrentLinkedQueue.
+ private Block[] blocks;
+
+ DiskBackedDataEntry(FileHandle handle, long position) {
+ Block block = handle.readBlock(position);
+ this.entry = block.offset(position & (BLOCK_SIZE - 1)); // block size is a power of 2 therefore mask is size - 1.
+
+ this.value = entry.getLong();
+ this.length = entry.getInt();
+
+ if (length <= entry.remaining()) {
+ entry.limit(length);
+ blocks[] = new Block[] { block };
+ } else {
+ entry.limit(entry.capacity());
+ int remaining = length - entry.remaining();
+ int totalBlocks = remaining / BLOCK_SIZE + (remaining % BLOCK_SIZE > 0 ? 1 : 0);
+ blocks[] = new Block[remainingBlocks];
+ blocks[0] = block;
+ }
+ }
+ }
+
+ protected class MemoryBackedDataEntry extends DataEntry {
+ private ByteBuffer data;
+
+ MemoryBackedDataEntry(long value, byte[] data) {
+ this(value, data, true);
+ }
+
+ MemoryBackedDataEntry(long value, byte[] data, boolean copyData) {
+ this.value = value;
+ if (copyData) {
+ byte[] copy = new byte[data.length];
+ System.arraycopy(data, 0, copy, 0, data.length);
+ this.data = ByteBuffer.wrap(copy);
+ } else {
+ this.data = ByteBuffer.wrap(data);
+ }
+ }
+
+ public int size() {
+ return data.capacity();
+ }
+ }
+}
Added: projects/xa2/object-pool/src/data/DataFile.java
===================================================================
--- projects/xa2/object-pool/src/data/DataFile.java (rev 0)
+++ projects/xa2/object-pool/src/data/DataFile.java 2008-05-06 06:39:34 UTC (rev 899)
@@ -0,0 +1,28 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 1st May 2008
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Abstracts the concept of a file of sorted octet sequences.
+ */
+public class DataFile {
+ public static final int MAGIC = 0x00030002;
+ public static final int LOG2_BLOCK_SIZE = 21;
+ public static final int BLOCK_SIZE = 0x01 << LOG2_BLOCK_SIZE;
+
+ private int blockSize;
+
+ public DataFile(File file) {
+ super();
+ FileHandle handle = IOScheduler.open(file, MAGIC, LOG2_BLOCK_SIZE);
+ }
+}
Deleted: projects/xa2/object-pool/src/gen/OnesTable.java
===================================================================
--- projects/xa2/object-pool/src/gen/OnesTable.java 2008-05-06 06:21:36 UTC (rev 898)
+++ projects/xa2/object-pool/src/gen/OnesTable.java 2008-05-06 06:39:34 UTC (rev 899)
@@ -1,58 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 18th April 2008
- */
-package gen;
-
-import java.io.FileInputStream;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-
-public class OnesTable {
- public static final int HIGH = 0;
- public static final int LOW = 1;
-
- public static int hiNibbleAsBitInShort(byte b) {
- return 0x00008000 >> ((b & 0x000000F0) >> 4);
- }
-
- public static int lowNibbleAsBitInShort(byte b) {
- return 0x00008000 >> (b & 0x0000000F);
- }
-
- public static int ones(short index) {
- return shortOnesTable[index & 0x0000FFFF];
- }
-
- public static int ones(int nibble, short index, byte key) {
- // Conversions from signed to unsigned.
- int idx = index & 0x0000FFFF;
- int ky = key & 0x000000FF;
-
- return onesTable[idx * 512 + 2 * ky + nibble];
- }
-
- // The number of ones in a 16-bit binary number indexed by 16-bit int.
- // Note: This is a 64KB array. By using nibbles instead of bytes, and indexing by 8-bit unsigned instead
- // of 16-bit, we could get away with as little as 127-bytes here.
- private static final byte[] shortOnesTable;
-
- // The number of ones in a 16-bit binary number relative to the high/low nibble of an unsigned byte
- // represented as a single bit in a 16-bit unsigned int.
- // Note: This is a 32MB array - this can be easily reduced to as little as 16K by moving bit manipulation
- // operations from the initialisation below to the lookup functions above. Classic time/space tradeoff.
- private static final byte[] onesTable;
-
- static {
- try {
- FileChannel fc = new FileInputStream("OnesTable.dat").getChannel();
- shortOnesTable = new byte[64*1024];
- fc.read(ByteBuffer.wrap(shortOnesTable));
- onesTable = new byte[64*1024 * 2*256];
- fc.read(ByteBuffer.wrap(onesTable));
- } catch (Exception e) {
- throw new IllegalStateException("Failed to initialize OnesTable", e);
- }
- }
-}
Modified: projects/xa2/object-pool/src/trie/DiskTrie.java
===================================================================
--- projects/xa2/object-pool/src/trie/DiskTrie.java 2008-05-06 06:21:36 UTC (rev 898)
+++ projects/xa2/object-pool/src/trie/DiskTrie.java 2008-05-06 06:39:34 UTC (rev 899)
@@ -112,7 +112,7 @@
this.space = 0; // If this gets modified it will be recalculated automaticly then.
}
- public boolean insert(byte[] key, long value) {
+ public boolean insert(DataEntry entry) {
// Space was calculated on the basis of worst-case - if we were worst case we have now filled the
// trie-block, but if we haven't we should recalculate the space available.
if (space == 0) {
@@ -125,7 +125,7 @@
}
}
- super.insert(key, value);
+ super.insert(entry);
space--;
return true;
@@ -225,12 +225,8 @@
public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException {
TrieLeaf leaf = (TrieLeaf)node;
- ByteBuffer dataBuf = ByteBuffer.allocateDirect(8 + 4 + leaf.key.length);
long keyLocation = data.position();
- dataBuf.putLong(leaf.value);
- dataBuf.putInt(leaf.key.length);
- dataBuf.put(leaf.key);
- data.write((ByteBuffer)dataBuf.flip());
+ leaf.entry.write(data);
locationMap.put(leaf, (short)index.position());
index.put(TYPE_LEAF);
@@ -262,20 +258,13 @@
return branch;
}
- private TrieLeaf readTrieLeaf(ByteBuffer index, FileChannel data) throws IOException {
- index.get();
+ private TrieLeaf readTrieLeaf(ByteBuffer index, DataFile dataFile) throws IOException {
+ index.get(); // skip padding.
long keyLocation = index.getLong();
- data.position(keyLocation);
- // FIXME: I need to cache these to reduce the allocation cost which is showing up under profiling
- ByteBuffer bb = ByteBuffer.allocateDirect(8+4); // sizeof(value) + sizof(length).
- data.read(bb);
- bb.flip();
- long value = bb.getLong();
- int length = bb.getInt();
- byte[] key = new byte[length]; // FIXME: I eventually need to replace this with a lazy load.
- data.read(ByteBuffer.wrap(key));
- return new TrieLeaf(key, value, false);
+ DataEntry entry = dataFile.getEntry(keyLocation);
+
+ return new TrieLeaf(entry, false);
}
private static Map<Class, TrieNodeSizer> sizerMap = new HashMap<Class, TrieNodeSizer>();
@@ -336,7 +325,7 @@
public int dataSize(TrieNode node) {
TrieLeaf leaf = (TrieLeaf)node;
- return 8 + 4 + leaf.key.length;
+ return leaf.entry.totalSize();
}
}
}
Modified: projects/xa2/object-pool/src/trie/MemoryTrie.java
===================================================================
--- projects/xa2/object-pool/src/trie/MemoryTrie.java 2008-05-06 06:21:36 UTC (rev 898)
+++ projects/xa2/object-pool/src/trie/MemoryTrie.java 2008-05-06 06:39:34 UTC (rev 899)
@@ -19,19 +19,19 @@
this.root = null;
}
- public boolean insert(byte[] key, long value) {
+ public boolean insert(DataEntry entry) {
if (root == null) {
- root = new TrieLeaf(key, value);
+ root = new TrieLeaf(entry);
} else {
- if (!root.insert(key, value)) {
- root = new TrieBranch(root, key, value);
+ if (!root.insert(entry)) {
+ root = new TrieBranch(root, entry);
}
}
return true;
}
- public long lookup(byte[] key) throws NotFound {
+ public long lookup(DataEntry key) throws NotFound {
boolean[] valid = new boolean[1];
long result = lookup(key, valid);
if (valid[0]) {
@@ -42,7 +42,7 @@
}
- public long lookup(byte[] key, boolean[] valid) {
+ public long lookup(DataEntry key, boolean[] valid) {
if (root == null) {
valid[0] = false;
return 0;
@@ -57,29 +57,28 @@
/**
* @return false if we need to insert key above this node.
*/
- public boolean insert(byte[] key, long value) {
- return insert(new TrieLeaf(key, value), 0);
+ public boolean insert(DataEntry entry) {
+ return insert(new TrieLeaf(entry), 0);
}
- public long lookup(byte[] key, boolean[] valid) {
+ public long lookup(DataEntry key, boolean[] valid) {
return lookup(key, 0, valid);
}
protected abstract boolean insert(TrieLeaf node, int parentLcd);
- protected abstract long lookup(byte[] key, int parentLcd, boolean[] valid);
+ protected abstract long lookup(DataEntry key, int parentLcd, boolean[] valid);
- protected static boolean regionMatches(byte[] lhs, int lhsOff, byte[] rhs, int rhsOff, int len) {
+ protected static boolean regionMatches(DataEntry lhs, int lhsOff, DataEntry rhs, int rhsOff, int len) {
if (lhsOff < 0 || rhsOff < 0) {
return false;
}
- if (lhs.length < lhsOff + len || rhs.length < rhsOff + len) {
+ if (lhs.size() < lhsOff + len || rhs.size() < rhsOff + len) {
return false;
}
- for (int i = 0; i < len; i++) {
- if (lhs[lhsOff + i] != rhs[rhsOff + i]) return false;
- }
- return true;
+ int cmp = lhs.slice(lhsOff, len).compareTo(rhs.slice(rhsOff, len));
+
+ return (cmp == 0);
}
}
@@ -90,10 +89,10 @@
protected TrieBranch() {}
- public TrieBranch(byte[] key, long value) {
+ public TrieBranch(DataEntry entry) {
this.children = new ByteMap<TrieNode>();
- this.term = new TrieLeaf(key, value);
- this.offset = term.key.length;
+ this.term = new TrieLeaf(entry);
+ this.offset = term.entry.size();
this.aLeaf = this.term;
}
@@ -114,29 +113,31 @@
// lcp/offset of the new node.
aLeaf = newNode;
- byte[] lhs = oldNode.aLeaf.key;
- byte[] rhs = newNode.key;
+ DataEntry lhs = oldNode.aLeaf.entry.rewind();
+ DataEntry rhs = newNode.entry.rewind();
int i = 0;
- while (i < lhs.length && i < rhs.length) {
- if (lhs[i] != rhs[i]) {
+ while (i < lhs.size() && i < rhs.size()) {
+ byte lb = lhs.get();
+ byte rb = rhs.get();
+ if (lb != rb) {
offset = i;
- children.insert(lhs[i], oldNode);
- children.insert(rhs[i], newNode);
+ children.insert(lb, oldNode);
+ children.insert(rb, newNode);
return; // Escape here.
}
i++;
}
- if (i < lhs.length) {
+ if (i < lhs.size()) {
offset = i;
- children.insert(lhs[i], oldNode);
+ children.insert(lhs.get(), oldNode);
term = newNode;
- } else if (i < rhs.length) {
+ } else if (i < rhs.size()) {
if (oldNode instanceof TrieLeaf) {
offset = i;
- children.insert(rhs[i], newNode);
+ children.insert(rhs.get(), newNode);
term = (TrieLeaf)oldNode;
} else {
throw new IllegalStateException("Attempt to create new branch node with leaf > branch.");
@@ -147,11 +148,11 @@
}
protected boolean insert(TrieLeaf node, int parentLcp) {
- if (!regionMatches(aLeaf.key, parentLcp, node.key, parentLcp, offset - parentLcp)) {
+ if (!regionMatches(aLeaf.entry, parentLcp, node.entry, parentLcp, offset - parentLcp)) {
return false;
} else {
// new node matches the lcp of this node.
- if (node.key.length == offset) {
+ if (node.entry.size() == offset) {
// new node is expected to terminate here.
if (term == null) {
term = node;
@@ -160,14 +161,14 @@
}
} else {
// new node is expected to terminate in one of this nodes children.
- TrieNode child = children.lookup(node.key[offset]);
+ TrieNode child = children.lookup(node.entry.get(offset));
if (child == null) {
// this is the first node to be inserted on this branching key.
- children.insert(node.key[offset], node);
+ children.insert(node.entry.get(offset), node);
} else {
// there is an existing child node branching on this branching key.
if (!child.insert(node, offset)) {
- children.insert(node.key[offset], new TrieBranch(child, node));
+ children.insert(node.entry(offset), new TrieBranch(child, node));
}
}
}
@@ -176,26 +177,26 @@
}
}
- protected long lookup(byte[] key, int parentLcd, boolean[] valid) {
- if (!regionMatches(aLeaf.key, parentLcd, key, parentLcd, offset - parentLcd)) {
+ protected long lookup(DataEntry key, int parentLcd, boolean[] valid) {
+ if (!regionMatches(aLeaf.entry, parentLcd, key, parentLcd, offset - parentLcd)) {
//FIXME: I need to migrate this to the end of the search to avoid needing access to the key at each level.
valid[0] = false;
return 0;
} else {
// new node matches the lcp of this node.
TrieNode child;
- if (key.length == offset) {
+ if (key.size() == offset) {
// new node is expected to terminate here.
if (term != null) {
valid[0] = true;
- return term.value;
+ return term.entry.getValue();
} else {
valid[0] = false;
return 0;
}
} else {
// new node is expected to terminate in one of this nodes children.
- child = children.lookup(key[offset]);
+ child = children.lookup(key.get(offset));
if (child != null) {
return child.lookup(key, offset, valid);
} else {
@@ -212,44 +213,33 @@
}
protected static class TrieLeaf extends TrieNode {
- final byte[] key;
- final long value;
+ final DataEntry entry;
- protected TrieLeaf(byte[] key, long value, boolean copyKey) {
- if (copyKey) {
- this.key = new byte[key.length];
- System.arraycopy(key, 0, this.key, 0, key.length);
- } else {
- this.key = key;
- }
- this.value = value;
+ protected TrieLeaf(DataEntry entry) {
+ this.entry = entry;
this.aLeaf = this;
}
- public TrieLeaf(byte[] key, long value) {
- this(key, value, true);
- }
-
protected boolean insert(TrieLeaf node, int parentLcp) {
- if (key.length != node.key.length) {
+ if (entry.size() != node.entry.size()) {
return false;
- } else if (!regionMatches(key, parentLcp, node.key, parentLcp, key.length - parentLcp)) {
+ } else if (!regionMatches(entry, parentLcp, node.entry, parentLcp, key.size() - parentLcp)) {
return false;
- } else if (value == node.value) {
+ } else if (entry.getValue() == node.entry.getValue()) {
return true; // Duplicate key/value pair.
} else {
throw new IllegalArgumentException("Attempt to insert multiple values for same key");
}
}
- protected long lookup(byte[] key, int parentLcd, boolean[] valid) {
- assert Arrays.equals(this.key, key);
+ protected long lookup(DataEntry key, int parentLcd, boolean[] valid) {
+ assert regionMatches(this.entry, 0, key, 0, key.size());
valid[0] = true;
return value;
}
public String toString() {
- return "Trie-LEAF: " + Arrays.toString(key) + " -> " + value;
+ return "Trie-LEAF: " + entry + " -> " + value;
}
}
}
More information about the Mulgara-svn
mailing list