[Mulgara-svn] r746 - in projects/xa2/object-pool: . scratch

andrae at mulgara.org andrae at mulgara.org
Tue Apr 8 09:49:06 UTC 2008


Author: andrae
Date: 2008-04-08 02:49:03 -0700 (Tue, 08 Apr 2008)
New Revision: 746

Added:
   projects/xa2/object-pool/scratch/
   projects/xa2/object-pool/scratch/L0Block.java
   projects/xa2/object-pool/scratch/SITreeBlock.java
   projects/xa2/object-pool/scratch/TrieNode.java
Log:
Initial sketching of disk format unmarshalling code for SI-Tree index.



Added: projects/xa2/object-pool/scratch/L0Block.java
===================================================================
--- projects/xa2/object-pool/scratch/L0Block.java	                        (rev 0)
+++ projects/xa2/object-pool/scratch/L0Block.java	2008-04-08 09:49:03 UTC (rev 746)
@@ -0,0 +1,83 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 8th April 2008
+ */
+
+/**
+ * Represents a Level-0 Block in an SI-Tree.
+ */
+public class L0Block implements SITreeBlock {
+  ByteBuffer block;
+
+  int type;
+  int count;
+  int startOfTrieNodes;
+  ByteBuffer childPtrs;
+  ShortBuffer trieNodes;
+
+  TrieNode root;
+
+  public L0Block(ByteBuffer buffer) {
+    ByteBuffer block = buffer.slice();
+    block.limit(BLOCK_SIZE);
+    this.block = block.asReadOnlyBuffer();
+
+    demarshall(this.block);
+  }
+
+  private void demarshall(ByteBuffer buffer) {
+    type = buffer.getShort() & USHORT_TO_INT_MASK;
+    if (type != L0BLOCK_TYPE) {
+      throw new DemarshallException(String.format(
+          "Attempt to demarsall block-type: %d as SITree-L0Block(type=%d)", type, L0BLOCK_TYPE));
+    }
+
+    count = buffer.getShort() & USHORT_TO_INT_MASK;
+    if (count < 0) {
+      throw new DemarshallException(String.format("SITree:L0Block contains overflowed count: %d", count));
+    }
+
+    startOfTrieNodes = buffer.getShort() & USHORT_TO_INT_MASK;
+
+    // How do I check count and size here to avoid over/underflow?
+    childPtrs = buffer.slice();
+    childPtrs.limit(count * 2);
+
+    buffer.position(startOfTrieNodes * 2);
+    trieNodes = buffer.asShortBuffer().slice();
+
+    root = new TrieNode(trieNodes, 0);
+  }
+
+  public long getCount() {
+    return count;
+  }
+
+  public TrieNode getRoot() {
+    return root;
+  }
+
+  public ChildPointer getChildPointer(short n) {
+    if (n >= count) {
+      throw new DemarsallException(String.format(
+          "Attempt to obtain SItree-L0Block FwdPtr out of range: %d; count: %d", n, count));
+    }
+    childPtrs.position(n*2);
+    return new ChildPointer(buffer);
+  }
+
+  public static class ChildPointer {
+    public final int fileId;
+    public final long offset; // Note: This value is stored signed - this is java: the max file size is 2**63.
+    public final long length;
+
+    static final int CHILD_POINTER_SIZE = 14;
+
+    ChildPointer(ByteBuffer buffer) {
+      fileId = buffer.getInt() & UINT_TO_LONG_MASK;
+      offset = buffer.getLong();  // signed.
+      length = buffer.getInt() & UINT_TO_LONG_MASK;
+    }
+  }
+}

Added: projects/xa2/object-pool/scratch/SITreeBlock.java
===================================================================
--- projects/xa2/object-pool/scratch/SITreeBlock.java	                        (rev 0)
+++ projects/xa2/object-pool/scratch/SITreeBlock.java	2008-04-08 09:49:03 UTC (rev 746)
@@ -0,0 +1,35 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 8th April 2008
+ */
+
+/**
+ * Represents a Level-0 Block in an SI-Tree.
+ * Note: I *must* go through this code and check for sign-extension bugs.
+ */
+
+// NOTE: Java uses sign-extension for integral primitive widening conversions, and MSB-truncation for
+// narrowing conversions.  Also note that all values on disk are unsigned even though they must needs be
+// read/written as signed.  The appropriate conversion expressions are:
+//
+// Bigger unsignedSmaller = ((int)signedSmaller) & 0x00...FF...; For smaller 00's and FF's to mask sign-extension
+// Smaller signedSmaller = (Smaller)unsignedBigger;  -- note MSB truncation will 'do the right thing' here.
+
+public abstract class SITreeBlock {
+
+  public static int BLOCK_SIZE = 32 * 1024;
+
+  public static short L0BLOCK_TYPE = 1;
+  public static short LNBLOCK_TYPE = 2;
+
+  public static final short UBYTE_TO_SHORT_MASK = 0x00FF;
+  public static final int USHORT_TO_INT_MASK = 0x0000FFFF;
+  public static final long UINT_TO_LONG_MASK = 0x00000000FFFFFFFF;
+
+  public abstract long getCount();
+
+  public abstract TrieNode getRoot();
+
+  public abstract getChildPointer(short n);
+}

Added: projects/xa2/object-pool/scratch/TrieNode.java
===================================================================
--- projects/xa2/object-pool/scratch/TrieNode.java	                        (rev 0)
+++ projects/xa2/object-pool/scratch/TrieNode.java	2008-04-08 09:49:03 UTC (rev 746)
@@ -0,0 +1,112 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 8th April 2008
+ */
+
+/**
+ * Represents a TrieNode in an SI-Tree.
+ */
+public static class TrieNode {
+  short high;
+  short[] low;
+  short[] ptrs;
+
+  public TrieNode(ShortBuffer buffer, short offset) {
+    // offset is in shorts, position is in bytes.
+    buffer.position(offset * 2);
+
+    // get high-nibble bitmap.
+    high = buffer.get();
+
+    // the ones in high define the length of the low-nibble bitmap array.
+    low = new short[ones(high)];
+    buffer.get(low);
+
+    // the ones in the low-nibble bitmap array define the length of the forwarding pointers.
+    short lows = 0;
+    for (int i = 0; i < low.length; i++) {
+      lows += ones(low[i]);
+    }
+
+    // get the forwarding pointers.
+    ptrs = new short[ones(lows)];
+    buffer.get(ptrs);
+  }
+
+  /**
+   * Returns the forwarding pointer corresponding to the key in this trie node.
+   * Note: The result is a *signed* short.  +ve pointers correspond to references to Trie-Nodes; -ve
+   * pointers correspond to references to child pointers.
+   */
+  public short lookup(byte key) {
+    // the low-nibble index is the ones left of the key's high-nibble bit in high.
+    int lowOffset = ones(HIGH, high, key);
+
+    // the ptrs index is the sum of the ones in the low-nibble array prior to low-nibble index + the ones
+    // left of the key's low-nibble bit in low[lowOffset - 1].
+    int ptrOffset = 0;
+    for (int i = 0; i < lowOffset - 1; i++) {
+      ptrOffset += ones(low[i]);
+    }
+    ptrOffset += ones(LOW, low[lowOffset - 1], key);
+
+    // the resulting index is a direct index into the forwarding-pointer array.
+    return ptrs[ptrOffset];
+  }
+
+  private int ones(short index) {
+    shortOnesTable[index & 0x0000FFFF];
+  }
+
+  enum Nibble {
+    HIGH = 0;
+    LOW = 1;
+  };
+  private int ones(Nibble nibble, short index, byte key) {
+    // Conversions from signed to unsigned.
+    int idx = index & 0x0000FFFF;
+    int ky = key & 0x000000FF;
+
+    return onesTable[idx * 512 + 2 * ky + nibble];
+  }
+  
+  // The number of ones in a 16-bit binary number indexed by 16-bit int.
+  // Note: This is a 64KB array.  By using nibbles instead of bytes, and indexing by 8-bit unsigned instead
+  // of 16-bit, we could get away with as little as 127-bytes here.
+  static final byte[] shortOnesTable;
+
+  // The number of ones in a 16-bit binary number relative to the high/low nibble of an unsigned byte
+  // represented as a single bit in a 16-bit unsigned int.
+  // Note: This is a 32MB array - this can be easily reduced to as little as 16K by moving bit manipulation
+  // operations from the initialisation below to the lookup functions above.  Classic time/space tradeoff.
+  static final byte[] onesTable;
+
+  static {
+    shortOnesTable = new byte[64*1024];
+    for (i = 0; i < shortOnesTable.length; i++) {
+      short bit = 0x8000;
+      while (bit != 0) {
+        shortOnesTable[i] += (i & bit == 0) ? 0 : 1;
+        bit >> 1;
+      }
+    }
+
+    onesTable = new byte[64*1024 * 2*256];
+    for (int i = 0; i < 64*1024; i++) {
+      for (int b = 0; b < 0x000000FF; b++) {
+        int nibbleBit = 0x00008000 >> (b & 0x000000F0 >> 4);
+        while (nibbleBit != 0) {
+          nibbleBit = nibbleBit << 1;
+          onesTable[i*512 + b*2 + HIGH] = (i & nibbleBit == 0) ? 0 : 1;
+        }
+
+        nibbleBit = 0x00008000 >> (b & 0x0000000F);
+        while (nibbleBit != 0) {
+          nibbleBit = nibbleBit << 1;
+          onesTable[i*512 + b*2 + LOW] = (i & nibbleBit == 0) ? 0 : 1;
+        }
+      }
+    }
+  }
+}




More information about the Mulgara-svn mailing list