[Mulgara-svn] r779 - projects/xa2/object-pool/src

andrae at mulgara.org andrae at mulgara.org
Mon Apr 14 01:21:59 UTC 2008


Author: andrae
Date: 2008-04-13 18:21:58 -0700 (Sun, 13 Apr 2008)
New Revision: 779

Added:
   projects/xa2/object-pool/src/ByteMap.java
Log:
Implements a map from byte->T.
Does so using a compressed 256-ary array as described in the citation below.
Note this is an in-memory version - serialising to disk however is trivial and amounts to storing the high
short, the array of low-shorts, and then serialising the data-references as an array of fixed-width offsets.

Tanhermhong, T., et al., "A Structure-Shared Trie Compression Method",
Language, Information and Computing : Proc' 15th Pacific Asia Conf, 2001, 129-138



Copied: projects/xa2/object-pool/src/ByteMap.java (from rev 752, projects/xa2/object-pool/scratch/TrieNode.java)
===================================================================
--- projects/xa2/object-pool/src/ByteMap.java	                        (rev 0)
+++ projects/xa2/object-pool/src/ByteMap.java	2008-04-14 01:21:58 UTC (rev 779)
@@ -0,0 +1,125 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 8th April 2008
+ */
+
+/**
+ * Represents a map from a byte to data.
+ * ie. a compressed 256 element array.
+ */
+public static class ByteMap<T> {
+  short high;
+  short[] low;
+  ArrayList<T> data;
+
+  public TrieNode() {
+    data = new ArrayList<T>();
+  }
+
+  public void insert(byte key, T datum) {
+    short h = hiNibbleAsBitInShort(key);
+    short l;
+    if (high & h != 0) {
+      // We already have a low entry for this hi-nibble.
+      int lowOffset = ones(HIGH, high, key);
+      l = lowNibbleAsBitInShort(key);
+      if (low[lowOffset] & l != 0) {
+        // We already have a data entry for this byte.
+      } else {
+        // We need to insert a low entry for this byte.
+      }
+    } else {
+      // We need to insert a new low entry for this hi-nibble.
+      short[] newlow = new short[low.length + 1];
+      int lowOffset = ones(HIGH, high, key);
+      System.arraycopy(low, 0, newLow, 0, lowOffset);
+      System.arraycopy(low, lowOffset, newLow, lowOffset + 1, low.length - lowOffset);
+    }
+  }
+
+  /**
+   * Returns the forwarding pointer corresponding to the key in this trie node.
+   * Note: The result is a *signed* short.  +ve pointers correspond to references to Trie-Nodes; -ve
+   * pointers correspond to references to child pointers.
+   */
+  public T lookup(byte key) {
+    // the low-nibble index is the ones left of the key's high-nibble bit in high.
+    int lowOffset = ones(HIGH, high, key);
+
+    // the ptrs index is the sum of the ones in the low-nibble array prior to low-nibble index + the ones
+    // left of the key's low-nibble bit in low[lowOffset - 1].
+    int ptrOffset = 0;
+    for (int i = 0; i < lowOffset - 1; i++) {
+      ptrOffset += ones(low[i]);
+    }
+    ptrOffset += ones(LOW, low[lowOffset - 1], key);
+
+    // the resulting index is a direct index into the forwarding-pointer array.
+    return data.get(ptrOffset);
+  }
+
+  private int ones(short index) {
+    shortOnesTable[index & 0x0000FFFF];
+  }
+
+  enum Nibble {
+    HIGH = 0;
+    LOW = 1;
+  };
+
+  private int ones(Nibble nibble, short index, byte key) {
+    // Conversions from signed to unsigned.
+    int idx = index & 0x0000FFFF;
+    int ky = key & 0x000000FF;
+
+    return onesTable[idx * 512 + 2 * ky + nibble];
+  }
+  
+  public short hiNibbleAsBitInShort(byte b) {
+    int nibbleBit = 0x00008000 >> (b & 0x000000F0 >> 4);
+  }
+
+  public short lowNibbleAsBitInShort(byte b) {
+    int nibbleBit = 0x00008000 >> (b & 0x0000000F);
+  }
+
+  // The number of ones in a 16-bit binary number indexed by 16-bit int.
+  // Note: This is a 64KB array.  By using nibbles instead of bytes, and indexing by 8-bit unsigned instead
+  // of 16-bit, we could get away with as little as 127-bytes here.
+  static final byte[] shortOnesTable;
+
+  // The number of ones in a 16-bit binary number relative to the high/low nibble of an unsigned byte
+  // represented as a single bit in a 16-bit unsigned int.
+  // Note: This is a 32MB array - this can be easily reduced to as little as 16K by moving bit manipulation
+  // operations from the initialisation below to the lookup functions above.  Classic time/space tradeoff.
+  static final byte[] onesTable;
+
+  static {
+    shortOnesTable = new byte[64*1024];
+    for (i = 0; i < shortOnesTable.length; i++) {
+      short bit = 0x8000;
+      while (bit != 0) {
+        shortOnesTable[i] += (i & bit == 0) ? 0 : 1;
+        bit >> 1;
+      }
+    }
+
+    onesTable = new byte[64*1024 * 2*256];
+    for (int i = 0; i < 64*1024; i++) {
+      for (int b = 0; b < 0x000000FF; b++) {
+        int nibbleBit = 0x00008000 >> (b & 0x000000F0 >> 4);
+        while (nibbleBit != 0) {
+          nibbleBit = nibbleBit << 1;
+          onesTable[i*512 + b*2 + HIGH] = (i & nibbleBit == 0) ? 0 : 1;
+        }
+
+        nibbleBit = 0x00008000 >> (b & 0x0000000F);
+        while (nibbleBit != 0) {
+          nibbleBit = nibbleBit << 1;
+          onesTable[i*512 + b*2 + LOW] = (i & nibbleBit == 0) ? 0 : 1;
+        }
+      }
+    }
+  }
+}




More information about the Mulgara-svn mailing list