[Mulgara-svn] r899 - in projects/xa2/object-pool/src: . data gen trie

andrae at mulgara.org andrae at mulgara.org
Tue May 6 06:39:35 UTC 2008


Author: andrae
Date: 2008-05-05 23:39:34 -0700 (Mon, 05 May 2008)
New Revision: 899

Added:
   projects/xa2/object-pool/src/data/
   projects/xa2/object-pool/src/data/DataEntry.java
   projects/xa2/object-pool/src/data/DataFile.java
Removed:
   projects/xa2/object-pool/src/gen/OnesTable.java
Modified:
   projects/xa2/object-pool/src/trie/DiskTrie.java
   projects/xa2/object-pool/src/trie/MemoryTrie.java
Log:
Now we start introducing caching to the entries within the object pool itself.
Specifically we start handling the possibility that an entry may require multiple blocks and that a block may
contain multiple entries.
This is compounded by the fact that a compressed trie will often only need to access a single byte from a
given entry, so we should only load the head block and the block containing that byte if we can.



Added: projects/xa2/object-pool/src/data/DataEntry.java
===================================================================
--- projects/xa2/object-pool/src/data/DataEntry.java	                        (rev 0)
+++ projects/xa2/object-pool/src/data/DataEntry.java	2008-05-06 06:39:34 UTC (rev 899)
@@ -0,0 +1,76 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 6th May 2008
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Abstracts the concept of a data file entry.
+ */
+public abstract class DataEntry {
+  private long value;
+
+  public abstract int size();
+  public abstract int totalSize() {
+    return 8 + 4 + size(); // VALUE + LENGTH + DATA
+  }
+  public abstract void write(FileChannel chan) throws IOException;
+
+  protected class DiskBackedDataEntry extends DataEntry {
+    private int length;
+    private ByteBuffer entry;
+    private ByteBuffer data;
+
+    // FIXME: Need to replace this with a background populated ConcurrentLinkedQueue.
+    private Block[] blocks;
+
+    DiskBackedDataEntry(FileHandle handle, long position) {
+      Block block = handle.readBlock(position);
+      this.entry = block.offset(position & (BLOCK_SIZE - 1));  // block size is a power of 2 therefore mask is size - 1.
+
+      this.value = entry.getLong();
+      this.length = entry.getInt();
+
+      if (length <= entry.remaining()) {
+        entry.limit(length);
+        blocks[] = new Block[] { block };
+      } else {
+        entry.limit(entry.capacity());
+        int remaining = length - entry.remaining();
+        int totalBlocks = remaining / BLOCK_SIZE + (remaining % BLOCK_SIZE > 0 ? 1 : 0);
+        blocks[] = new Block[remainingBlocks];
+        blocks[0] = block;
+      }
+    }
+  }
+
+  protected class MemoryBackedDataEntry extends DataEntry {
+    private ByteBuffer data;
+
+    MemoryBackedDataEntry(long value, byte[] data) {
+      this(value, data, true);
+    }
+
+    MemoryBackedDataEntry(long value, byte[] data, boolean copyData) {
+      this.value = value;
+      if (copyData) {
+        byte[] copy = new byte[data.length];
+        System.arraycopy(data, 0, copy, 0, data.length);
+        this.data = ByteBuffer.wrap(copy);
+      } else {
+        this.data = ByteBuffer.wrap(data);
+      }
+    }
+
+    public int size() {
+      return data.capacity();
+    }
+  }
+}

Added: projects/xa2/object-pool/src/data/DataFile.java
===================================================================
--- projects/xa2/object-pool/src/data/DataFile.java	                        (rev 0)
+++ projects/xa2/object-pool/src/data/DataFile.java	2008-05-06 06:39:34 UTC (rev 899)
@@ -0,0 +1,28 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 1st May 2008
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Abstracts the concept of a file of sorted octet sequences.
+ */
+public class DataFile {
+  public static final int MAGIC = 0x00030002;
+  public static final int LOG2_BLOCK_SIZE = 21;
+  public static final int BLOCK_SIZE = 0x01 << LOG2_BLOCK_SIZE;
+
+  private int blockSize;
+
+  public DataFile(File file) {
+    super();
+    FileHandle handle = IOScheduler.open(file, MAGIC, LOG2_BLOCK_SIZE);
+  }
+}

Deleted: projects/xa2/object-pool/src/gen/OnesTable.java
===================================================================
--- projects/xa2/object-pool/src/gen/OnesTable.java	2008-05-06 06:21:36 UTC (rev 898)
+++ projects/xa2/object-pool/src/gen/OnesTable.java	2008-05-06 06:39:34 UTC (rev 899)
@@ -1,58 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 18th April 2008
- */
-package gen;
-
-import java.io.FileInputStream;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-
-public class OnesTable {
-  public static final int HIGH = 0;
-  public static final int LOW = 1;
-
-  public static int hiNibbleAsBitInShort(byte b) {
-    return 0x00008000 >> ((b & 0x000000F0) >> 4);
-  }
-
-  public static int lowNibbleAsBitInShort(byte b) {
-    return 0x00008000 >> (b & 0x0000000F);
-  }
-
-  public static int ones(short index) {
-    return shortOnesTable[index & 0x0000FFFF];
-  }
-
-  public static int ones(int nibble, short index, byte key) {
-    // Conversions from signed to unsigned.
-    int idx = index & 0x0000FFFF;
-    int ky = key & 0x000000FF;
-
-    return onesTable[idx * 512 + 2 * ky + nibble];
-  }
-  
-  // The number of ones in a 16-bit binary number indexed by 16-bit int.
-  // Note: This is a 64KB array.  By using nibbles instead of bytes, and indexing by 8-bit unsigned instead
-  // of 16-bit, we could get away with as little as 127-bytes here.
-  private static final byte[] shortOnesTable;
-
-  // The number of ones in a 16-bit binary number relative to the high/low nibble of an unsigned byte
-  // represented as a single bit in a 16-bit unsigned int.
-  // Note: This is a 32MB array - this can be easily reduced to as little as 16K by moving bit manipulation
-  // operations from the initialisation below to the lookup functions above.  Classic time/space tradeoff.
-  private static final byte[] onesTable;
-
-  static {
-    try {
-      FileChannel fc = new FileInputStream("OnesTable.dat").getChannel();
-      shortOnesTable = new byte[64*1024];
-      fc.read(ByteBuffer.wrap(shortOnesTable));
-      onesTable = new byte[64*1024 * 2*256];
-      fc.read(ByteBuffer.wrap(onesTable));
-    } catch (Exception e) {
-      throw new IllegalStateException("Failed to initialize OnesTable", e);
-    }
-  }
-}

Modified: projects/xa2/object-pool/src/trie/DiskTrie.java
===================================================================
--- projects/xa2/object-pool/src/trie/DiskTrie.java	2008-05-06 06:21:36 UTC (rev 898)
+++ projects/xa2/object-pool/src/trie/DiskTrie.java	2008-05-06 06:39:34 UTC (rev 899)
@@ -112,7 +112,7 @@
     this.space = 0; // If this gets modified it will be recalculated automaticly then.
   }
 
-  public boolean insert(byte[] key, long value) {
+  public boolean insert(DataEntry entry) {
     // Space was calculated on the basis of worst-case - if we were worst case we have now filled the
     // trie-block, but if we haven't we should recalculate the space available. 
     if (space == 0) {
@@ -125,7 +125,7 @@
       }
     }
 
-    super.insert(key, value);
+    super.insert(entry);
     space--;
 
     return true;
@@ -225,12 +225,8 @@
     public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException {
       TrieLeaf leaf = (TrieLeaf)node;
 
-      ByteBuffer dataBuf = ByteBuffer.allocateDirect(8 + 4 + leaf.key.length);
       long keyLocation = data.position();
-      dataBuf.putLong(leaf.value);
-      dataBuf.putInt(leaf.key.length);
-      dataBuf.put(leaf.key);
-      data.write((ByteBuffer)dataBuf.flip());
+      leaf.entry.write(data);
 
       locationMap.put(leaf, (short)index.position());
       index.put(TYPE_LEAF);
@@ -262,20 +258,13 @@
     return branch;
   }
 
-  private TrieLeaf readTrieLeaf(ByteBuffer index, FileChannel data) throws IOException {
-    index.get();
+  private TrieLeaf readTrieLeaf(ByteBuffer index, DataFile dataFile) throws IOException {
+    index.get();  // skip padding.
     long keyLocation = index.getLong();
-    data.position(keyLocation);
-    // FIXME: I need to cache these to reduce the allocation cost which is showing up under profiling
-    ByteBuffer bb = ByteBuffer.allocateDirect(8+4); // sizeof(value) + sizof(length).
-    data.read(bb);
-    bb.flip();
-    long value = bb.getLong();
-    int length = bb.getInt();
-    byte[] key = new byte[length]; // FIXME: I eventually need to replace this with a lazy load.
-    data.read(ByteBuffer.wrap(key));
 
-    return new TrieLeaf(key, value, false);
+    DataEntry entry = dataFile.getEntry(keyLocation);
+
+    return new TrieLeaf(entry, false);
   }
 
   private static Map<Class, TrieNodeSizer> sizerMap = new HashMap<Class, TrieNodeSizer>();
@@ -336,7 +325,7 @@
 
     public int dataSize(TrieNode node) {
       TrieLeaf leaf = (TrieLeaf)node;
-      return 8 + 4 + leaf.key.length;
+      return leaf.entry.totalSize();
     }
   }
 }

Modified: projects/xa2/object-pool/src/trie/MemoryTrie.java
===================================================================
--- projects/xa2/object-pool/src/trie/MemoryTrie.java	2008-05-06 06:21:36 UTC (rev 898)
+++ projects/xa2/object-pool/src/trie/MemoryTrie.java	2008-05-06 06:39:34 UTC (rev 899)
@@ -19,19 +19,19 @@
     this.root = null;
   }
 
-  public boolean insert(byte[] key, long value) {
+  public boolean insert(DataEntry entry) {
     if (root == null) {
-      root = new TrieLeaf(key, value);
+      root = new TrieLeaf(entry);
     } else {
-      if (!root.insert(key, value)) {
-        root = new TrieBranch(root, key, value);
+      if (!root.insert(entry)) {
+        root = new TrieBranch(root, entry);
       }
     }
 
     return true;
   }
 
-  public long lookup(byte[] key) throws NotFound {
+  public long lookup(DataEntry key) throws NotFound {
     boolean[] valid = new boolean[1];
     long result = lookup(key, valid);
     if (valid[0]) {
@@ -42,7 +42,7 @@
   }
       
 
-  public long lookup(byte[] key, boolean[] valid) {
+  public long lookup(DataEntry key, boolean[] valid) {
     if (root == null) {
       valid[0] = false;
       return 0;
@@ -57,29 +57,28 @@
     /**
      * @return false if we need to insert key above this node.
      */
-    public boolean insert(byte[] key, long value) {
-      return insert(new TrieLeaf(key, value), 0);
+    public boolean insert(DataEntry entry) {
+      return insert(new TrieLeaf(entry), 0);
     }
 
-    public long lookup(byte[] key, boolean[] valid) {
+    public long lookup(DataEntry key, boolean[] valid) {
       return lookup(key, 0, valid);
     }
 
     protected abstract boolean insert(TrieLeaf node, int parentLcd);
-    protected abstract long lookup(byte[] key, int parentLcd, boolean[] valid);
+    protected abstract long lookup(DataEntry key, int parentLcd, boolean[] valid);
 
-    protected static boolean regionMatches(byte[] lhs, int lhsOff, byte[] rhs, int rhsOff, int len) {
+    protected static boolean regionMatches(DataEntry lhs, int lhsOff, DataEntry rhs, int rhsOff, int len) {
       if (lhsOff < 0 || rhsOff < 0) {
         return false;
       }
-      if (lhs.length < lhsOff + len || rhs.length < rhsOff + len) {
+      if (lhs.size() < lhsOff + len || rhs.size() < rhsOff + len) {
         return false;
       }
-      for (int i = 0; i < len; i++) {
-        if (lhs[lhsOff + i] != rhs[rhsOff + i]) return false;
-      }
 
-      return true;
+      int cmp = lhs.slice(lhsOff, len).compareTo(rhs.slice(rhsOff, len));
+
+      return (cmp == 0);
     }
   }
 
@@ -90,10 +89,10 @@
     
     protected TrieBranch() {}
 
-    public TrieBranch(byte[] key, long value) {
+    public TrieBranch(DataEntry entry) {
       this.children = new ByteMap<TrieNode>();
-      this.term = new TrieLeaf(key, value);
-      this.offset = term.key.length;
+      this.term = new TrieLeaf(entry);
+      this.offset = term.entry.size();
       this.aLeaf = this.term;
     }
 
@@ -114,29 +113,31 @@
       // lcp/offset of the new node.
       aLeaf = newNode;
 
-      byte[] lhs = oldNode.aLeaf.key;
-      byte[] rhs = newNode.key;
+      DataEntry lhs = oldNode.aLeaf.entry.rewind();
+      DataEntry rhs = newNode.entry.rewind();
 
       int i = 0;
-      while (i < lhs.length && i < rhs.length) {
-        if (lhs[i] != rhs[i]) {
+      while (i < lhs.size() && i < rhs.size()) {
+        byte lb = lhs.get();
+        byte rb = rhs.get();
+        if (lb != rb) {
           offset = i;
-          children.insert(lhs[i], oldNode);
-          children.insert(rhs[i], newNode);
+          children.insert(lb, oldNode);
+          children.insert(rb, newNode);
 
           return; // Escape here.
         }
         i++;
       }
 
-      if (i < lhs.length) {
+      if (i < lhs.size()) {
         offset = i;
-        children.insert(lhs[i], oldNode);
+        children.insert(lhs.get(), oldNode);
         term = newNode;
-      } else if (i < rhs.length) {
+      } else if (i < rhs.size()) {
         if (oldNode instanceof TrieLeaf) {
           offset = i;
-          children.insert(rhs[i], newNode);
+          children.insert(rhs.get(), newNode);
           term = (TrieLeaf)oldNode;
         } else {
           throw new IllegalStateException("Attempt to create new branch node with leaf > branch.");
@@ -147,11 +148,11 @@
     }
 
     protected boolean insert(TrieLeaf node, int parentLcp) {
-      if (!regionMatches(aLeaf.key, parentLcp, node.key, parentLcp, offset - parentLcp)) {
+      if (!regionMatches(aLeaf.entry, parentLcp, node.entry, parentLcp, offset - parentLcp)) {
         return false;
       } else {
         // new node matches the lcp of this node.
-        if (node.key.length == offset) {
+        if (node.entry.size() == offset) {
           // new node is expected to terminate here.
           if (term == null) {
             term = node;
@@ -160,14 +161,14 @@
           }
         } else {
           // new node is expected to terminate in one of this nodes children.
-          TrieNode child = children.lookup(node.key[offset]);
+          TrieNode child = children.lookup(node.entry.get(offset));
           if (child == null) {
             // this is the first node to be inserted on this branching key.
-            children.insert(node.key[offset], node);
+            children.insert(node.entry.get(offset), node);
           } else {
             // there is an existing child node branching on this branching key.
             if (!child.insert(node, offset)) {
-              children.insert(node.key[offset], new TrieBranch(child, node));
+              children.insert(node.entry(offset), new TrieBranch(child, node));
             }
           }
         }
@@ -176,26 +177,26 @@
       }
     }
 
-    protected long lookup(byte[] key, int parentLcd, boolean[] valid) {
-      if (!regionMatches(aLeaf.key, parentLcd, key, parentLcd, offset - parentLcd)) {
+    protected long lookup(DataEntry key, int parentLcd, boolean[] valid) {
+      if (!regionMatches(aLeaf.entry, parentLcd, key, parentLcd, offset - parentLcd)) {
         //FIXME: I need to migrate this to the end of the search to avoid needing access to the key at each level.
         valid[0] = false;
         return 0;
       } else {
         // new node matches the lcp of this node.
         TrieNode child;
-        if (key.length == offset) {
+        if (key.size() == offset) {
           // new node is expected to terminate here.
           if (term != null) {
             valid[0] = true;
-            return term.value;
+            return term.entry.getValue();
           } else {
             valid[0] = false;
             return 0;
           }
         } else {
           // new node is expected to terminate in one of this nodes children.
-          child = children.lookup(key[offset]);
+          child = children.lookup(key.get(offset));
           if (child != null) {
             return child.lookup(key, offset, valid);
           } else {
@@ -212,44 +213,33 @@
   }
 
   protected static class TrieLeaf extends TrieNode {
-    final byte[] key;
-    final long value;
+    final DataEntry entry;
 
-    protected TrieLeaf(byte[] key, long value, boolean copyKey) {
-      if (copyKey) {
-        this.key = new byte[key.length];
-        System.arraycopy(key, 0, this.key, 0, key.length);
-      } else {
-        this.key = key;
-      }
-      this.value = value;
+    protected TrieLeaf(DataEntry entry) {
+      this.entry = entry;
       this.aLeaf = this;
     }
 
-    public TrieLeaf(byte[] key, long value) {
-      this(key, value, true);
-    }
-
     protected boolean insert(TrieLeaf node, int parentLcp) {
-      if (key.length != node.key.length) {
+      if (entry.size() != node.entry.size()) {
         return false;
-      } else if (!regionMatches(key, parentLcp, node.key, parentLcp, key.length - parentLcp)) {
+      } else if (!regionMatches(entry, parentLcp, node.entry, parentLcp, key.size() - parentLcp)) {
         return false;
-      } else if (value == node.value) {
+      } else if (entry.getValue() == node.entry.getValue()) {
         return true; // Duplicate key/value pair.
       } else {
         throw new IllegalArgumentException("Attempt to insert multiple values for same key");
       }
     }
 
-    protected long lookup(byte[] key, int parentLcd, boolean[] valid) {
-      assert Arrays.equals(this.key, key);
+    protected long lookup(DataEntry key, int parentLcd, boolean[] valid) {
+      assert regionMatches(this.entry, 0, key, 0, key.size());
       valid[0] = true;
       return value;
     }
     
     public String toString() {
-      return "Trie-LEAF: " + Arrays.toString(key) + " -> " + value;
+      return "Trie-LEAF: " + entry + " -> " + value;
     }
   }
 }




More information about the Mulgara-svn mailing list