[Mulgara-svn] r799 - in projects/xa2/object-pool/src: . gen

andrae at mulgara.org andrae at mulgara.org
Fri Apr 18 04:43:27 UTC 2008


Author: andrae
Date: 2008-04-17 21:43:27 -0700 (Thu, 17 Apr 2008)
New Revision: 799

Added:
   projects/xa2/object-pool/src/OnesTable.dat
   projects/xa2/object-pool/src/gen/
   projects/xa2/object-pool/src/gen/OnesTable.java
   projects/xa2/object-pool/src/gen/OnesTableGenerator.java
Modified:
   projects/xa2/object-pool/src/ByteMap.java
   projects/xa2/object-pool/src/CompMemTrie.java
   projects/xa2/object-pool/src/CompMemTrieTest.java
Log:
Eliminates the array building at class initialisation of ByteMap.  Moved to an external generator program and
the arrays exported to an external binary file.



Modified: projects/xa2/object-pool/src/ByteMap.java
===================================================================
--- projects/xa2/object-pool/src/ByteMap.java	2008-04-18 03:54:49 UTC (rev 798)
+++ projects/xa2/object-pool/src/ByteMap.java	2008-04-18 04:43:27 UTC (rev 799)
@@ -4,8 +4,11 @@
  * Date 8th April 2008
  */
 
+import static gen.OnesTable.*;
+
 import java.util.ArrayList;
 
+
 /**
  * Represents a map from a byte to data.
  * ie. a compressed 256 element array.
@@ -18,7 +21,7 @@
   public ByteMap() {
     high = 0;
     low = new short[0];
-    data = new ArrayList<T>();
+    data = new ArrayList<T>(0);
   }
 
   /**
@@ -102,65 +105,11 @@
     }
   }
 
-  private static int ones(short index) {
-    return shortOnesTable[index & 0x0000FFFF];
+  public int size() {
+    return data.size();
   }
 
-  static private final int HIGH = 0;
-  static private final int LOW = 1;
-
-  private static int ones(int nibble, short index, byte key) {
-    // Conversions from signed to unsigned.
-    int idx = index & 0x0000FFFF;
-    int ky = key & 0x000000FF;
-
-    return onesTable[idx * 512 + 2 * ky + nibble];
+  public int memSize() {
+    return 2 + 2 * low.length + 2 * data.size();
   }
-  
-  public static int hiNibbleAsBitInShort(byte b) {
-    return 0x00008000 >> ((b & 0x000000F0) >> 4);
-  }
-
-  public static int lowNibbleAsBitInShort(byte b) {
-    return 0x00008000 >> (b & 0x0000000F);
-  }
-
-  // The number of ones in a 16-bit binary number indexed by 16-bit int.
-  // Note: This is a 64KB array.  By using nibbles instead of bytes, and indexing by 8-bit unsigned instead
-  // of 16-bit, we could get away with as little as 127-bytes here.
-  static final byte[] shortOnesTable;
-
-  // The number of ones in a 16-bit binary number relative to the high/low nibble of an unsigned byte
-  // represented as a single bit in a 16-bit unsigned int.
-  // Note: This is a 32MB array - this can be easily reduced to as little as 16K by moving bit manipulation
-  // operations from the initialisation below to the lookup functions above.  Classic time/space tradeoff.
-  static final byte[] onesTable;
-
-  static {
-    shortOnesTable = new byte[64*1024];
-    for (int i = 0; i < shortOnesTable.length; i++) {
-      int bit = 0x00008000;
-      while (bit != 0) {
-        shortOnesTable[i] += ((i & bit) == 0) ? 0 : 1;
-        bit = bit >> 1;
-      }
-    }
-
-    onesTable = new byte[64*1024 * 2*256];
-    for (int i = 0; i < 0x00010000; i++) {
-      for (int b = 0; b < 0x00000100; b++) {
-        int nibbleBit = hiNibbleAsBitInShort((byte)b);
-        while (nibbleBit < 0x00010000) {
-          nibbleBit = nibbleBit << 1;
-          onesTable[i*512 + b*2 + HIGH] += ((i & nibbleBit) == 0) ? 0 : 1;
-        }
-
-        nibbleBit = lowNibbleAsBitInShort((byte)b);
-        while (nibbleBit != 0) {
-          nibbleBit = nibbleBit << 1;
-          onesTable[i*512 + b*2 + LOW] += ((i & nibbleBit) == 0) ? 0 : 1;
-        }
-      }
-    }
-  }
 }

Modified: projects/xa2/object-pool/src/CompMemTrie.java
===================================================================
--- projects/xa2/object-pool/src/CompMemTrie.java	2008-04-18 03:54:49 UTC (rev 798)
+++ projects/xa2/object-pool/src/CompMemTrie.java	2008-04-18 04:43:27 UTC (rev 799)
@@ -44,7 +44,7 @@
     protected static class InsertAbove extends Exception {} ;
     protected static final InsertAbove cont = new InsertAbove();
 
-    protected TrieLeaf least;
+    protected TrieLeaf aLeaf;
 
     public void insert(byte[] key, long value) throws InsertAbove {
       insert(new TrieLeaf(key, value), 0);
@@ -82,13 +82,17 @@
       this.children = new ByteMap<TrieNode>();
       this.term = new TrieLeaf(key, value);
       this.offset = term.key.length;
-      this.least = this.term;
+      this.aLeaf = this.term;
     }
 
     public TrieBranch(TrieNode oldRoot, byte[] key, long value) {
       this(oldRoot, new TrieLeaf(key, value));
     }
 
+    public int memSize() {
+      return 4 + children.memSize() + (term == null ? 0 : 2);
+    }
+
     private TrieBranch(TrieNode oldNode, TrieLeaf newNode) {
       super();
       assert oldNode != null;
@@ -97,9 +101,12 @@
       offset = 0;
       children = new ByteMap<TrieNode>();
       term = null;
-      least = null;
 
-      byte[] lhs = oldNode.least.key;
+      // as every child node shares a common lcp, any child will suffice when preparing the new
+      // lcp/offset of the new node.
+      aLeaf = newNode;
+
+      byte[] lhs = oldNode.aLeaf.key;
       byte[] rhs = newNode.key;
 
       int i = 0;
@@ -109,8 +116,6 @@
           children.insert(lhs[i], oldNode);
           children.insert(rhs[i], newNode);
 
-          least = lhs[i] < rhs[i] ? oldNode.least : newNode;
-
           return; // Escape here.
         }
         i++;
@@ -120,13 +125,11 @@
         offset = i;
         children.insert(lhs[i], oldNode);
         term = newNode;
-        least = newNode;
       } else if (i < rhs.length) {
         if (oldNode instanceof TrieLeaf) {
           offset = i;
           children.insert(rhs[i], newNode);
           term = (TrieLeaf)oldNode;
-          least = (TrieLeaf)oldNode;
         } else {
           throw new IllegalStateException("Attempt to create new branch node with leaf > branch.");
         }
@@ -136,7 +139,7 @@
     }
 
     protected void insert(TrieLeaf node, int parentLcp) throws InsertAbove {
-      if (!regionMatches(least.key, parentLcp, node.key, parentLcp, offset - parentLcp)) {
+      if (!regionMatches(aLeaf.key, parentLcp, node.key, parentLcp, offset - parentLcp)) {
         throw cont;
       } else {
         // new node matches the lcp of this node.
@@ -144,7 +147,6 @@
           // new node is expected to terminate here.
           if (term == null) {
             term = node;
-            least = node;
           } else {
             term.insert(node, offset);
           }
@@ -159,9 +161,6 @@
               // there is an existing child node branching on this branching key.
               child.insert(node, offset);
             } catch (InsertAbove k) {
-                // as every child node shares a common lcp, any child will suffice when preparing the new
-                // lcp/offset of the new node.  The least node is only required as the new parent's least node
-                // will be the smallest of the inserted node and this node's least node.
                 children.insert(node.key[offset], new TrieBranch(child, node));
             }
           }
@@ -170,7 +169,7 @@
     }
 
     protected long lookup(byte[] key, int parentLcd) throws NotFound {
-      if (!regionMatches(least.key, parentLcd, key, parentLcd, offset - parentLcd)) {
+      if (!regionMatches(aLeaf.key, parentLcd, key, parentLcd, offset - parentLcd)) {
         throw new NotFound();
       } else {
         // new node matches the lcp of this node.
@@ -195,7 +194,7 @@
     }
     
     public String toString() {
-      return "Trie-BRANCH[" + (term != null) + " on " + offset + " and least[" + least + "]";
+      return "Trie-BRANCH[" + (term != null) + " on " + offset + " with " + children.size() + " and aLeaf[" + aLeaf + "]";
     }
   }
 
@@ -208,7 +207,7 @@
       this.key = new byte[key.length];
       System.arraycopy(key, 0, this.key, 0, key.length);
       this.value = value;
-      this.least = this;
+      this.aLeaf = this;
     }
 
     protected void insert(TrieLeaf node, int parentLcp) throws InsertAbove {

Modified: projects/xa2/object-pool/src/CompMemTrieTest.java
===================================================================
--- projects/xa2/object-pool/src/CompMemTrieTest.java	2008-04-18 03:54:49 UTC (rev 798)
+++ projects/xa2/object-pool/src/CompMemTrieTest.java	2008-04-18 04:43:27 UTC (rev 799)
@@ -152,9 +152,11 @@
     }
     long _endInsert = System.currentTimeMillis();
     System.out.println("Input " + namesMap.size() + " entries");
-    System.out.println("  " + nL + " long entries ave: " + (_avlL / nL) + " in: " + _aggregateL + "ms");
-    System.out.println("  " + nS + " short entries ave: " + (_avlS / nS) + " in: " + _aggregateS + "ms");
-    System.out.println(chan.position() + " bytes read from /dev/urandom");
+    System.out.printf("  %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+    System.out.printf("  %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+    System.out.println(chan.position() + " bytes read from " + file);
     chan.close();
 
     long _startLookup = System.currentTimeMillis();
@@ -241,9 +243,11 @@
     }
     long _endInsert = System.currentTimeMillis();
     System.out.println("Input " + namesMap.size() + " entries");
-    System.out.println("  " + nL + " long entries ave: " + (_avlL / nL) + " in: " + _aggregateL + "ms");
-    System.out.println("  " + nS + " short entries ave: " + (_avlS / nS) + " in: " + _aggregateS + "ms");
-    System.out.println(chan.position() + " bytes read from /dev/urandom");
+    System.out.printf("  %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+    System.out.printf("  %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+    System.out.println(chan.position() + " bytes read from " + file);
     chan.close();
 
     long _startLookup = System.currentTimeMillis();
@@ -378,10 +382,14 @@
     }
     long _endInsert = System.currentTimeMillis();
     System.out.println("Input " + namesMap.size() + " entries");
-    System.out.println("  " + nLL + " very long entries ave: " + (_avlLL / nLL) + " in: " + _aggregateLL + "ms");
-    System.out.println("  " + nL + " long entries ave: " + (_avlL / nL) + " in: " + _aggregateL + "ms");
-    System.out.println("  " + nS + " short entries ave: " + (_avlS / nS) + " in: " + _aggregateS + "ms");
-    System.out.println("  " + nSS + " very short entries ave: " + (_avlSS / nSS) + " in: " + _aggregateSS + "ms");
+    System.out.printf("  %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
+    System.out.printf("  %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+    System.out.printf("  %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
+    System.out.printf("  %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
     chan.close();
 
     long _startLookup = System.currentTimeMillis();

Added: projects/xa2/object-pool/src/OnesTable.dat
===================================================================
(Binary files differ)


Property changes on: projects/xa2/object-pool/src/OnesTable.dat
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Added: projects/xa2/object-pool/src/gen/OnesTable.java
===================================================================
--- projects/xa2/object-pool/src/gen/OnesTable.java	                        (rev 0)
+++ projects/xa2/object-pool/src/gen/OnesTable.java	2008-04-18 04:43:27 UTC (rev 799)
@@ -0,0 +1,59 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 18th April 2008
+ */
+package gen;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+public class OnesTable {
+  public static final int HIGH = 0;
+  public static final int LOW = 1;
+
+  public static int hiNibbleAsBitInShort(byte b) {
+    return 0x00008000 >> ((b & 0x000000F0) >> 4);
+  }
+
+  public static int lowNibbleAsBitInShort(byte b) {
+    return 0x00008000 >> (b & 0x0000000F);
+  }
+
+  public static int ones(short index) {
+    return shortOnesTable[index & 0x0000FFFF];
+  }
+
+  public static int ones(int nibble, short index, byte key) {
+    // Conversions from signed to unsigned.
+    int idx = index & 0x0000FFFF;
+    int ky = key & 0x000000FF;
+
+    return onesTable[idx * 512 + 2 * ky + nibble];
+  }
+  
+  // The number of ones in a 16-bit binary number indexed by 16-bit int.
+  // Note: This is a 64KB array.  By using nibbles instead of bytes, and indexing by 8-bit unsigned instead
+  // of 16-bit, we could get away with as little as 127-bytes here.
+  private static final byte[] shortOnesTable;
+
+  // The number of ones in a 16-bit binary number relative to the high/low nibble of an unsigned byte
+  // represented as a single bit in a 16-bit unsigned int.
+  // Note: This is a 32MB array - this can be easily reduced to as little as 16K by moving bit manipulation
+  // operations from the initialisation below to the lookup functions above.  Classic time/space tradeoff.
+  private static final byte[] onesTable;
+
+  static {
+    try {
+      FileChannel fc = new FileInputStream("OnesTable.dat").getChannel();
+      shortOnesTable = new byte[64*1024];
+      fc.read(ByteBuffer.wrap(shortOnesTable));
+      onesTable = new byte[64*1024 * 2*256];
+      fc.read(ByteBuffer.wrap(onesTable));
+    } catch (Exception e) {
+      throw new IllegalStateException("Failed to initialize OnesTable", e);
+    }
+  }
+}

Added: projects/xa2/object-pool/src/gen/OnesTableGenerator.java
===================================================================
--- projects/xa2/object-pool/src/gen/OnesTableGenerator.java	                        (rev 0)
+++ projects/xa2/object-pool/src/gen/OnesTableGenerator.java	2008-04-18 04:43:27 UTC (rev 799)
@@ -0,0 +1,73 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 17th April 2008
+ */
+package gen;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+public class OnesTableGenerator {
+  public static final int HIGH = 0;
+  public static final int LOW = 1;
+
+  public static int hiNibbleAsBitInShort(byte b) {
+    return 0x00008000 >> ((b & 0x000000F0) >> 4);
+  }
+
+  public static int lowNibbleAsBitInShort(byte b) {
+    return 0x00008000 >> (b & 0x0000000F);
+  }
+
+  // The number of ones in a 16-bit binary number indexed by 16-bit int.
+  // Note: This is a 64KB array.  By using nibbles instead of bytes, and indexing by 8-bit unsigned instead
+  // of 16-bit, we could get away with as little as 127-bytes here.
+  public static final byte[] shortOnesTable;
+
+  // The number of ones in a 16-bit binary number relative to the high/low nibble of an unsigned byte
+  // represented as a single bit in a 16-bit unsigned int.
+  // Note: This is a 32MB array - this can be easily reduced to as little as 16K by moving bit manipulation
+  // operations from the initialisation below to the lookup functions above.  Classic time/space tradeoff.
+  public static final byte[] onesTable;
+
+  static {
+    shortOnesTable = new byte[64*1024];
+    for (int i = 0; i < shortOnesTable.length; i++) {
+      int bit = 0x00008000;
+      while (bit != 0) {
+        shortOnesTable[i] += ((i & bit) == 0) ? 0 : 1;
+        bit = bit >> 1;
+      }
+    }
+
+    onesTable = new byte[64*1024 * 2*256];
+    for (int i = 0; i < 0x00010000; i++) {
+      for (int b = 0; b < 0x00000100; b++) {
+        int nibbleBit = hiNibbleAsBitInShort((byte)b);
+        while (nibbleBit < 0x00010000) {
+          nibbleBit = nibbleBit << 1;
+          onesTable[i*512 + b*2 + HIGH] += ((i & nibbleBit) == 0) ? 0 : 1;
+        }
+
+        nibbleBit = lowNibbleAsBitInShort((byte)b);
+        while (nibbleBit != 0) {
+          nibbleBit = nibbleBit << 1;
+          onesTable[i*512 + b*2 + LOW] += ((i & nibbleBit) == 0) ? 0 : 1;
+        }
+      }
+    }
+  }
+
+  public static void main(String[] arg) throws Exception {
+    File output = new File(arg[0]);
+    FileChannel fc = new FileOutputStream(output).getChannel();
+    
+    fc.write(ByteBuffer.wrap(shortOnesTable));
+    fc.write(ByteBuffer.wrap(onesTable));
+
+    fc.close();
+  }
+}




More information about the Mulgara-svn mailing list