[Mulgara-svn] r746 - in projects/xa2/object-pool: . scratch
andrae at mulgara.org
andrae at mulgara.org
Tue Apr 8 09:49:06 UTC 2008
Author: andrae
Date: 2008-04-08 02:49:03 -0700 (Tue, 08 Apr 2008)
New Revision: 746
Added:
projects/xa2/object-pool/scratch/
projects/xa2/object-pool/scratch/L0Block.java
projects/xa2/object-pool/scratch/SITreeBlock.java
projects/xa2/object-pool/scratch/TrieNode.java
Log:
Initial sketching of disk format unmarshalling code for SI-Tree index.
Added: projects/xa2/object-pool/scratch/L0Block.java
===================================================================
--- projects/xa2/object-pool/scratch/L0Block.java (rev 0)
+++ projects/xa2/object-pool/scratch/L0Block.java 2008-04-08 09:49:03 UTC (rev 746)
@@ -0,0 +1,83 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 8th April 2008
+ */
+
+/**
+ * Represents a Level-0 Block in an SI-Tree.
+ */
+public class L0Block implements SITreeBlock {
+ ByteBuffer block;
+
+ int type;
+ int count;
+ int startOfTrieNodes;
+ ByteBuffer childPtrs;
+ ShortBuffer trieNodes;
+
+ TrieNode root;
+
+ public L0Block(ByteBuffer buffer) {
+ ByteBuffer block = buffer.slice();
+ block.limit(BLOCK_SIZE);
+ this.block = block.asReadOnlyBuffer();
+
+ demarshall(this.block);
+ }
+
+ private void demarshall(ByteBuffer buffer) {
+ type = buffer.getShort() & USHORT_TO_INT_MASK;
+ if (type != L0BLOCK_TYPE) {
+ throw new DemarshallException(String.format(
+ "Attempt to demarsall block-type: %d as SITree-L0Block(type=%d)", type, L0BLOCK_TYPE));
+ }
+
+ count = buffer.getShort() & USHORT_TO_INT_MASK;
+ if (count < 0) {
+ throw new DemarshallException(String.format("SITree:L0Block contains overflowed count: %d", count));
+ }
+
+ startOfTrieNodes = buffer.getShort() & USHORT_TO_INT_MASK;
+
+ // How do I check count and size here to avoid over/underflow?
+ childPtrs = buffer.slice();
+ childPtrs.limit(count * 2);
+
+ buffer.position(startOfTrieNodes * 2);
+ trieNodes = buffer.asShortBuffer().slice();
+
+ root = new TrieNode(trieNodes, 0);
+ }
+
+ public long getCount() {
+ return count;
+ }
+
+ public TrieNode getRoot() {
+ return root;
+ }
+
+ public ChildPointer getChildPointer(short n) {
+ if (n >= count) {
+ throw new DemarsallException(String.format(
+ "Attempt to obtain SItree-L0Block FwdPtr out of range: %d; count: %d", n, count));
+ }
+ childPtrs.position(n*2);
+ return new ChildPointer(buffer);
+ }
+
+ public static class ChildPointer {
+ public final int fileId;
+ public final long offset; // Note: This value is stored signed - this is java: the max file size is 2**63.
+ public final long length;
+
+ static final int CHILD_POINTER_SIZE = 14;
+
+ ChildPointer(ByteBuffer buffer) {
+ fileId = buffer.getInt() & UINT_TO_LONG_MASK;
+ offset = buffer.getLong(); // signed.
+ length = buffer.getInt() & UINT_TO_LONG_MASK;
+ }
+ }
+}
Added: projects/xa2/object-pool/scratch/SITreeBlock.java
===================================================================
--- projects/xa2/object-pool/scratch/SITreeBlock.java (rev 0)
+++ projects/xa2/object-pool/scratch/SITreeBlock.java 2008-04-08 09:49:03 UTC (rev 746)
@@ -0,0 +1,35 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 8th April 2008
+ */
+
+/**
+ * Represents a Level-0 Block in an SI-Tree.
+ * Note: I *must* go through this code and check for sign-extension bugs.
+ */
+
+// NOTE: Java uses sign-extension for integral primitive widening conversions, and MSB-truncation for
+// narrowing conversions. Also note that all values on disk are unsigned even though they must needs be
+// read/written as signed. The appropriate conversion expressions are:
+//
+// Bigger unsignedSmaller = ((int)signedSmaller) & 0x00...FF...; For smaller 00's and FF's to mask sign-extension
+// Smaller signedSmaller = (Smaller)unsignedBigger; -- note MSB truncation will 'do the right thing' here.
+
+public abstract class SITreeBlock {
+
+ public static int BLOCK_SIZE = 32 * 1024;
+
+ public static short L0BLOCK_TYPE = 1;
+ public static short LNBLOCK_TYPE = 2;
+
+ public static final short UBYTE_TO_SHORT_MASK = 0x00FF;
+ public static final int USHORT_TO_INT_MASK = 0x0000FFFF;
+ public static final long UINT_TO_LONG_MASK = 0x00000000FFFFFFFF;
+
+ public abstract long getCount();
+
+ public abstract TrieNode getRoot();
+
+ public abstract getChildPointer(short n);
+}
Added: projects/xa2/object-pool/scratch/TrieNode.java
===================================================================
--- projects/xa2/object-pool/scratch/TrieNode.java (rev 0)
+++ projects/xa2/object-pool/scratch/TrieNode.java 2008-04-08 09:49:03 UTC (rev 746)
@@ -0,0 +1,112 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 8th April 2008
+ */
+
+/**
+ * Represents a TrieNode in an SI-Tree.
+ */
+public static class TrieNode {
+ short high;
+ short[] low;
+ short[] ptrs;
+
+ public TrieNode(ShortBuffer buffer, short offset) {
+ // offset is in shorts, position is in bytes.
+ buffer.position(offset * 2);
+
+ // get high-nibble bitmap.
+ high = buffer.get();
+
+ // the ones in high define the length of the low-nibble bitmap array.
+ low = new short[ones(high)];
+ buffer.get(low);
+
+ // the ones in the low-nibble bitmap array define the length of the forwarding pointers.
+ short lows = 0;
+ for (int i = 0; i < low.length; i++) {
+ lows += ones(low[i]);
+ }
+
+ // get the forwarding pointers.
+ ptrs = new short[ones(lows)];
+ buffer.get(ptrs);
+ }
+
+ /**
+ * Returns the forwarding pointer corresponding to the key in this trie node.
+ * Note: The result is a *signed* short. +ve pointers correspond to references to Trie-Nodes; -ve
+ * pointers correspond to references to child pointers.
+ */
+ public short lookup(byte key) {
+ // the low-nibble index is the ones left of the key's high-nibble bit in high.
+ int lowOffset = ones(HIGH, high, key);
+
+ // the ptrs index is the sum of the ones in the low-nibble array prior to low-nibble index + the ones
+ // left of the key's low-nibble bit in low[lowOffset - 1].
+ int ptrOffset = 0;
+ for (int i = 0; i < lowOffset - 1; i++) {
+ ptrOffset += ones(low[i]);
+ }
+ ptrOffset += ones(LOW, low[lowOffset - 1], key);
+
+ // the resulting index is a direct index into the forwarding-pointer array.
+ return ptrs[ptrOffset];
+ }
+
+ private int ones(short index) {
+ shortOnesTable[index & 0x0000FFFF];
+ }
+
+ enum Nibble {
+ HIGH = 0;
+ LOW = 1;
+ };
+ private int ones(Nibble nibble, short index, byte key) {
+ // Conversions from signed to unsigned.
+ int idx = index & 0x0000FFFF;
+ int ky = key & 0x000000FF;
+
+ return onesTable[idx * 512 + 2 * ky + nibble];
+ }
+
+ // The number of ones in a 16-bit binary number indexed by 16-bit int.
+ // Note: This is a 64KB array. By using nibbles instead of bytes, and indexing by 8-bit unsigned instead
+ // of 16-bit, we could get away with as little as 127-bytes here.
+ static final byte[] shortOnesTable;
+
+ // The number of ones in a 16-bit binary number relative to the high/low nibble of an unsigned byte
+ // represented as a single bit in a 16-bit unsigned int.
+ // Note: This is a 32MB array - this can be easily reduced to as little as 16K by moving bit manipulation
+ // operations from the initialisation below to the lookup functions above. Classic time/space tradeoff.
+ static final byte[] onesTable;
+
+ static {
+ shortOnesTable = new byte[64*1024];
+ for (i = 0; i < shortOnesTable.length; i++) {
+ short bit = 0x8000;
+ while (bit != 0) {
+ shortOnesTable[i] += (i & bit == 0) ? 0 : 1;
+ bit >> 1;
+ }
+ }
+
+ onesTable = new byte[64*1024 * 2*256];
+ for (int i = 0; i < 64*1024; i++) {
+ for (int b = 0; b < 0x000000FF; b++) {
+ int nibbleBit = 0x00008000 >> (b & 0x000000F0 >> 4);
+ while (nibbleBit != 0) {
+ nibbleBit = nibbleBit << 1;
+ onesTable[i*512 + b*2 + HIGH] = (i & nibbleBit == 0) ? 0 : 1;
+ }
+
+ nibbleBit = 0x00008000 >> (b & 0x0000000F);
+ while (nibbleBit != 0) {
+ nibbleBit = nibbleBit << 1;
+ onesTable[i*512 + b*2 + LOW] = (i & nibbleBit == 0) ? 0 : 1;
+ }
+ }
+ }
+ }
+}
More information about the Mulgara-svn
mailing list