[Mulgara-svn] r824 - projects/xa2/object-pool/src

andrae at mulgara.org andrae at mulgara.org
Wed Apr 23 06:47:19 UTC 2008


Author: andrae
Date: 2008-04-22 23:47:18 -0700 (Tue, 22 Apr 2008)
New Revision: 824

Added:
   projects/xa2/object-pool/src/CompBlockTrie.java
   projects/xa2/object-pool/src/CompBlockTrieTest.java
   projects/xa2/object-pool/src/tmp/
Modified:
   projects/xa2/object-pool/src/ByteMap.java
   projects/xa2/object-pool/src/CompMemTrie.java
Log:
Implemented serializing a compressed-trie node to disk.  This also includes a wrapper CompBlockTrie that
tracks the amount of space required to serialize the trie and prevents it exceeding 1 Block.  In 32K the
resulting branching factor is >1260/block.



Modified: projects/xa2/object-pool/src/ByteMap.java
===================================================================
--- projects/xa2/object-pool/src/ByteMap.java	2008-04-23 06:43:53 UTC (rev 823)
+++ projects/xa2/object-pool/src/ByteMap.java	2008-04-23 06:47:18 UTC (rev 824)
@@ -6,7 +6,9 @@
 
 import static gen.OnesTable.*;
 
-import java.util.ArrayList;
+import java.nio.ByteBuffer;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
 
 
 /**
@@ -16,18 +18,20 @@
 public class ByteMap<T> implements Iterable<T> {
   short high;
   short[] low;
-  ArrayList<T> data;
+  T[] data;
 
+  @SuppressWarnings("unchecked")
   public ByteMap() {
     high = 0;
     low = new short[0];
-    data = new ArrayList<T>(2);
+    data = (T[])new Object[0];
   }
 
   /**
    * @param datum Value to be associated with key.  Note: Must not be null.
    * @return the old value associated with key, or null if this is a new mapping.
    */
+  @SuppressWarnings("unchecked")
   public T insert(byte key, T datum) {
     if (datum == null) {
       throw new IllegalArgumentException("Attempted to insert null into ByteMap");
@@ -46,11 +50,21 @@
       int l = lowNibbleAsBitInShort(key);
       if ((low[lowOffset] & l) != 0) {
         // We already have a data entry for this byte.
-        return data.set(dataOffset, datum);
+        T t = data[dataOffset];
+        data[dataOffset] = datum;
+
+        return t;
       } else {
         // We need to insert a data entry for this byte.
+        // Extend data array.
+        T[] newData = (T[])new Object[data.length + 1];
+        System.arraycopy(data, 0, newData, 0, dataOffset);
+        System.arraycopy(data, dataOffset, newData, dataOffset + 1, data.length - dataOffset);
+        data = newData;
+        // Set new entry in data array; indicate it in low entry.
         low[lowOffset] = (short)(low[lowOffset] | l);
-        data.add(dataOffset, datum);
+        data[dataOffset] = datum;
+
         return null;
       }
     } else {
@@ -59,9 +73,9 @@
       int lowOffset = ones(HIGH, high, key);
       System.arraycopy(low, 0, newLow, 0, lowOffset);
       System.arraycopy(low, lowOffset, newLow, lowOffset + 1, low.length - lowOffset);
-      newLow[lowOffset] = 0;
       high = (short)(high | h);
       low = newLow;
+
       return insert(key, datum);
     }
   }
@@ -97,24 +111,35 @@
     }
     dataOffset += ones(LOW, low[lowOffset], key);
 
-    if (dataOffset >= data.size()) {
+    if (dataOffset >= data.length) {
       return null;
     } else {
       // the resulting index is a direct index into the forwarding-pointer array.
-      return data.get(dataOffset);
+      return data[dataOffset];
     }
   }
 
   public Iterator<T> iterator() {
-    return children.iterator();
+    return new Iterator<T>() {
+        private int index = 0;
+        public boolean hasNext() { return index < data.length; }
+        public T next() {
+          if (index < data.length) {
+            return data[index++];
+          } else {
+            throw new NoSuchElementException("Data Iterator in ByteMap");
+          }
+        }
+        public void remove() { throw new UnsupportedOperationException("Data Iterator in ByteMap"); }
+    };
   }
 
   public int size() {
-    return data.size();
+    return data.length;
   }
 
   public int memSize() {
-    return 2 + 2 * low.length + 2 * data.size();
+    return 2 + 2 * low.length + 2 * data.length;
   }
 
   public int headerSize() {
@@ -122,9 +147,9 @@
   }
 
   public void writeHeader(ByteBuffer bb) {
-    bb.putByte(high);
+    bb.putShort(high);
     for (int i = 0; i < low.length; i++) {
-      bb.putByte(low[i]);
+      bb.putShort(low[i]);
     }
   }
 }

Copied: projects/xa2/object-pool/src/CompBlockTrie.java (from rev 801, projects/xa2/object-pool/src/CompMemTrie.java)
===================================================================
--- projects/xa2/object-pool/src/CompBlockTrie.java	                        (rev 0)
+++ projects/xa2/object-pool/src/CompBlockTrie.java	2008-04-23 06:47:18 UTC (rev 824)
@@ -0,0 +1,64 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 22nd April 2008
+ */
+
+import java.util.Arrays;
+
+/**
+ * Extends CompMemTrie to guarantee the trie will fit within a single block.
+ */
+public class CompBlockTrie extends CompMemTrie {
+  @SuppressWarnings("serial")
+  public static int MAGIC = 0x00010001;
+  // Calculated as the worst case size per entry.
+  // Assumes a full leaf and branch per entry.
+  // Block: 2-byte type-header
+  //        4-byte offset
+  //        2-byte ByteMap-High
+  //        4-byte ByteMap-Lows (2-entries).
+  //        4-byte Children Ptrs.
+  // Leaf:  2-byte type-header
+  //        8-byte offset.
+  //       26-bytes Total.
+  // Best case of course is 10-bytes leaf + 2-bytes children in branch.
+  private static short WORST_CASE_ENTRY_SIZE = 26;
+  private static short BEST_CASE_ENTRY_SIZE = 12;
+
+  /** Worst-case space left in trie measured in entries */
+  private short space;
+
+  /**
+   * The blocksize for this Block-Trie.
+   * Currently the maximum block size supported is 32K, which is worst-case:
+   *   1260 entries covered by a single I0 index in 32KB.
+   *   1.59 million entries by a single I1 index in 40MB.
+   *   2.00 billion entries by a single I2 index in 49GB.
+   *   2.52 trillion entries by a single I3 index in 60TB.
+   */ 
+  private short blockSize;
+
+  public CompBlockTrie(short blockSize) {
+    assert blockSize > 1024;
+    this.blockSize = blockSize;
+    this.space = (short)((blockSize - 6) / WORST_CASE_ENTRY_SIZE); // -6 leaves room for header info.
+  }
+
+  public void insert(byte[] key, long value) throws CompMemTrie.InsufficientSpace {
+    // Space was calculated on the basis of worst-case - if we were worst case we have now filled the
+    // trie-block, but if we haven't we should recalculate the space available. 
+    if (space == 0) {
+      int size = root.totalIndexSize();
+      space = (short)((blockSize - size - 6) / WORST_CASE_ENTRY_SIZE);
+      if (space < 0) {
+        throw new IllegalStateException("Overfilled CompBlockTrie");
+      } else if (space == 0) {
+        throw new CompMemTrie.InsufficientSpace("Trie Node is filled");
+      }
+    }
+
+    super.insert(key, value);
+    space--;
+  }
+}

Copied: projects/xa2/object-pool/src/CompBlockTrieTest.java (from rev 799, projects/xa2/object-pool/src/CompMemTrieTest.java)
===================================================================
--- projects/xa2/object-pool/src/CompBlockTrieTest.java	                        (rev 0)
+++ projects/xa2/object-pool/src/CompBlockTrieTest.java	2008-04-23 06:47:18 UTC (rev 824)
@@ -0,0 +1,444 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 9th April 2008
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+/**
+ * Basic tests of the CompBlockTrie.
+ */
+public class CompBlockTrieTest {
+  public static void main(String[] args) throws Exception {
+    testWithFile(new File("../scratch/propernames.uniq"));
+//    testWithFile(new File("../scratch/connectives.uniq"));
+//    testWithFile(new File("../scratch/web2a.uniq"));
+//    testWithFile(new File("../scratch/web2.uniq"));
+//    testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
+//    testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
+//    testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
+//    testWithRandomFile(new File("/dev/urandom"));
+//    testLoadWithRandomFile(new File("/dev/urandom"));
+  }
+
+  private static final short BLOCK_SIZE = (short)(32 * 1024);
+  public static void testWithFile(File file) throws Exception {
+    Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
+
+    FileChannel indexFile = new RandomAccessFile("tmp/sp_idx", "rw").getChannel().truncate(0);
+    FileChannel dataFile = new RandomAccessFile("tmp/sp_dat", "rw").getChannel().truncate(0);
+    ByteBuffer indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
+  
+    ArrayList<CompBlockTrie> tries = new ArrayList<CompBlockTrie>();
+    CompBlockTrie trie = new CompBlockTrie(BLOCK_SIZE);
+    tries.add(trie);
+
+    System.out.println("Inserting lines from " + file);
+
+    BufferedReader names = new BufferedReader(new FileReader(file));
+    long n = 0;
+    String name = names.readLine();
+    long _startInsert = System.currentTimeMillis();
+    for (;;) {
+      // The use of the double loop means we pay the cost of setting up the exception handler only once per
+      // block, not once per entry.
+      try {
+        while (name != null) {
+          trie.insert(name.getBytes(), n);
+          // Note: exception thrown here if trie is full.  So name will remain valid when we reenter loop with
+          // a new trie.
+          namesMap.put(name.getBytes(), n);
+          name = names.readLine();
+          n++;
+        }
+        break;  // From for(;;)-loop.
+      } catch (CompMemTrie.InsufficientSpace ec) {
+        trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
+        indexFile.write(indexBuffer);
+        trie = new CompBlockTrie(BLOCK_SIZE);
+        tries.add(trie);
+      }
+    }
+
+    indexFile.force(true);
+    dataFile.force(true);
+
+    long _endInsert = System.currentTimeMillis();
+    names.close();
+
+    System.out.println("Checking lines from " + file);
+    long _startLookup = System.currentTimeMillis();
+    for (byte[] key : namesMap.keySet()) {
+      boolean found = false;
+      for (CompBlockTrie t : tries) {
+        if (t.lookup(key) == namesMap.get(key)) {
+          found = true;
+        }
+      }
+      if (!found) {
+        throw new IllegalStateException("Trie doesn't match Map");
+      }
+    }
+    long _endLookup = System.currentTimeMillis();
+    
+    System.out.println("Test Succeeded with " + file +
+        " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+  }
+
+  public static class Bytes {
+    public final byte[] bytes;
+    
+    public Bytes(byte[] bytes) {
+      this.bytes = new byte[bytes.length];
+      System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+    }
+    
+    public boolean equals(Object o) {
+      if (o instanceof Bytes) {
+        return Arrays.equals(bytes, ((Bytes)o).bytes);
+      } else {
+        return false;
+      }
+    }
+    
+    public int hashCode() {
+      int hc = Arrays.hashCode(bytes);
+      return hc;
+    }
+  }
+
+  public static void testWithRandomFile(File file) throws Exception {
+    Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+    CompMemTrie namesTrie = new CompMemTrie();
+
+    System.out.println("Inserting random bytes from " + file);
+    FileChannel chan = new FileInputStream(file).getChannel();
+    ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+    long n = 0;
+    byte[] key;
+    // Set limit to 0 so initial compact works correctly.
+    buffer.clear().limit(0);
+    long _startInsert = System.currentTimeMillis();
+    long _aggregateL = 0;
+    long _avlL = 0;
+    long _aggregateS = 0;
+    long _avlS = 0;
+    int nL = 0;
+    int nS = 0;
+
+    while (n < 5000) {
+      if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+        break;
+      }
+      buffer.flip();
+
+      key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+      buffer.get(key);
+      Bytes keyBytes = new Bytes(key);
+      
+      if (namesMap.containsKey(keyBytes)) {
+//        namesTrie.insert(key, namesMap.get(keyBytes));
+      } else {
+        n++;
+        namesMap.put(keyBytes, n);
+        long _si = System.currentTimeMillis();
+        namesTrie.insert(key, n);
+        _aggregateL += System.currentTimeMillis() - _si;
+        _avlL += key.length;
+        nL++;
+
+        if (namesTrie.lookup(key) != n) {
+          throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+        }
+      }
+
+      for (int i = 0; i < 10; i++) {
+        key = new byte[((int)buffer.get()) & 0x000000FF];
+        buffer.get(key);
+        keyBytes = new Bytes(key);
+        if (namesMap.containsKey(keyBytes)) {
+//          namesTrie.insert(key, namesMap.get(keyBytes));
+        } else {
+          n++;
+          namesMap.put(keyBytes, n);
+          long _ss = System.currentTimeMillis();
+          namesTrie.insert(key, n);
+          _aggregateS += System.currentTimeMillis() - _ss;
+          _avlS += key.length;
+          nS++;
+
+          if (namesTrie.lookup(key) != n) {
+            throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+          }
+        }
+      }
+    }
+    long _endInsert = System.currentTimeMillis();
+    System.out.println("Input " + namesMap.size() + " entries");
+    System.out.printf("  %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+    System.out.printf("  %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+    System.out.println(chan.position() + " bytes read from " + file);
+    chan.close();
+
+    long _startLookup = System.currentTimeMillis();
+    System.out.println("Checking random bytes from " + file);
+    for (Bytes k : namesMap.keySet()) {
+      int i = 0;
+      if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+        throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
+      }
+      i++;
+    }
+    long _endLookup = System.currentTimeMillis();
+    
+    System.out.println("Test Succeeded with " + file +
+        " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+  }
+
+  public static void testWithRandomFileTuned(File file, int max, int small) throws Exception {
+    Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+    CompMemTrie namesTrie = new CompMemTrie();
+
+    System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
+    FileChannel chan = new FileInputStream(file).getChannel();
+    ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+    long n = 0;
+    byte[] key;
+    // Set limit to 0 so initial compact works correctly.
+    buffer.clear().limit(0);
+    long _startInsert = System.currentTimeMillis();
+    long _aggregateL = 0;
+    long _avlL = 0;
+    long _aggregateS = 0;
+    long _avlS = 0;
+    int nL = 0;
+    int nS = 0;
+
+    while (n < max) {
+      if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+        break;
+      }
+      buffer.flip();
+
+      key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+      buffer.get(key);
+      Bytes keyBytes = new Bytes(key);
+      
+      if (namesMap.containsKey(keyBytes)) {
+//        namesTrie.insert(key, namesMap.get(keyBytes));
+      } else {
+        n++;
+        namesMap.put(keyBytes, n);
+        long _si = System.currentTimeMillis();
+        namesTrie.insert(key, n);
+        _aggregateL += System.currentTimeMillis() - _si;
+        _avlL += key.length;
+        nL++;
+
+        if (namesTrie.lookup(key) != n) {
+          throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+        }
+      }
+
+      for (int i = 0; i < small; i++) {
+        key = new byte[((int)buffer.get()) & 0x000000FF];
+        buffer.get(key);
+        keyBytes = new Bytes(key);
+        if (namesMap.containsKey(keyBytes)) {
+//          namesTrie.insert(key, namesMap.get(keyBytes));
+        } else {
+          n++;
+          namesMap.put(keyBytes, n);
+          long _ss = System.currentTimeMillis();
+          namesTrie.insert(key, n);
+          _aggregateS += System.currentTimeMillis() - _ss;
+          _avlS += key.length;
+          nS++;
+
+          if (namesTrie.lookup(key) != n) {
+            throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+          }
+        }
+      }
+    }
+    long _endInsert = System.currentTimeMillis();
+    System.out.println("Input " + namesMap.size() + " entries");
+    System.out.printf("  %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+    System.out.printf("  %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+    System.out.println(chan.position() + " bytes read from " + file);
+    chan.close();
+
+    long _startLookup = System.currentTimeMillis();
+    System.out.println("Checking random bytes from " + file);
+    for (Bytes k : namesMap.keySet()) {
+      int i = 0;
+      if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+        throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
+      }
+      i++;
+    }
+    long _endLookup = System.currentTimeMillis();
+    
+    System.out.println("Test Succeeded with " + file +
+        " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+  }
+
+  public static void testLoadWithRandomFile(File file) throws Exception {
+    Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+    CompMemTrie namesTrie = new CompMemTrie();
+
+    System.out.println("Inserting random bytes from " + file);
+    FileChannel chan = new FileInputStream(file).getChannel();
+    ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
+
+    long n = 0;
+    byte[] key;
+    // Set limit to 0 so initial compact works correctly.
+    buffer.clear().limit(0);
+    long _startInsert = System.currentTimeMillis();
+    long _aggregateL = 0;
+    long _avlL = 0;
+    int nL = 0;
+    long _aggregateLL = 0;
+    long _avlLL = 0;
+    int nLL = 0;
+    long _aggregateS = 0;
+    long _avlS = 0;
+    int nS = 0;
+    long _aggregateSS = 0;
+    long _avlSS = 0;
+    int nSS = 0;
+
+    for (int i = 0; i < 100; i++) {
+      if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
+        break;
+      }
+      buffer.flip();
+
+      key = new byte[buffer.getInt() & 0x0007FFFF];
+      buffer.get(key);
+      Bytes keyBytes = new Bytes(key);
+      
+      if (namesMap.containsKey(keyBytes)) {
+        namesTrie.insert(key, namesMap.get(keyBytes));
+      } else {
+        n++;
+        namesMap.put(keyBytes, n);
+        long _sll = System.currentTimeMillis();
+        namesTrie.insert(key, n);
+        _aggregateLL += System.currentTimeMillis() - _sll;
+        _avlLL += key.length;
+        nLL++;
+
+        if (namesTrie.lookup(key) != n) {
+          throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+        }
+      }
+
+      for (int j = 0; j < 10; j++) {
+        key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+        buffer.get(key);
+        keyBytes = new Bytes(key);
+        if (namesMap.containsKey(keyBytes)) {
+          namesTrie.insert(key, namesMap.get(keyBytes));
+        } else {
+          n++;
+          namesMap.put(keyBytes, n);
+          long _sl = System.currentTimeMillis();
+          namesTrie.insert(key, n);
+          _aggregateL += System.currentTimeMillis() - _sl;
+          _avlL += key.length;
+          nL++;
+
+          if (namesTrie.lookup(key) != n) {
+            throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+          }
+        }
+
+        for (int k = 0; k < 10; k++) {
+          key = new byte[((int)buffer.get()) & 0x000000FF];
+          buffer.get(key);
+          keyBytes = new Bytes(key);
+          if (namesMap.containsKey(keyBytes)) {
+            namesTrie.insert(key, namesMap.get(keyBytes));
+          } else {
+            n++;
+            namesMap.put(keyBytes, n);
+            long _ss = System.currentTimeMillis();
+            namesTrie.insert(key, n);
+            _aggregateS += System.currentTimeMillis() - _ss;
+            _avlS += key.length;
+            nS++;
+
+            if (namesTrie.lookup(key) != n) {
+              throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+            }
+          }
+
+          for (int ii = 0; ii < 1000; ii++) {
+            key = new byte[((int)buffer.get()) & 0x0000001F];
+            buffer.get(key);
+            keyBytes = new Bytes(key);
+            if (namesMap.containsKey(keyBytes)) {
+              namesTrie.insert(key, namesMap.get(keyBytes));
+            } else {
+              n++;
+              namesMap.put(keyBytes, n);
+              long _sss = System.currentTimeMillis();
+              namesTrie.insert(key, n);
+              _aggregateS += System.currentTimeMillis() - _sss;
+              _avlSS += key.length;
+              nSS++;
+
+              if (namesTrie.lookup(key) != n) {
+                throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+              }
+            }
+          }
+        }
+      }
+    }
+    long _endInsert = System.currentTimeMillis();
+    System.out.println("Input " + namesMap.size() + " entries");
+    System.out.printf("  %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
+    System.out.printf("  %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+    System.out.printf("  %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
+    System.out.printf("  %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+    chan.close();
+
+    long _startLookup = System.currentTimeMillis();
+    System.out.println("Checking random bytes from " + file);
+    for (Bytes k : namesMap.keySet()) {
+      int i = 0;
+      if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+        throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
+      }
+      i++;
+    }
+    long _endLookup = System.currentTimeMillis();
+    
+    System.out.println("Test Succeeded with " + file +
+        " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+  }
+}

Modified: projects/xa2/object-pool/src/CompMemTrie.java
===================================================================
--- projects/xa2/object-pool/src/CompMemTrie.java	2008-04-23 06:43:53 UTC (rev 823)
+++ projects/xa2/object-pool/src/CompMemTrie.java	2008-04-23 06:47:18 UTC (rev 824)
@@ -4,6 +4,9 @@
  * Date 15th April 2008
  */
 
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
 import java.util.Arrays;
 
 /**
@@ -11,22 +14,22 @@
  */
 public class CompMemTrie {
   @SuppressWarnings("serial")
-  public static class CompMemTrieException extends Exception {};
-  public static class NotFound extends CompMemTrieException {};
-  public static class InsufficientSpace extends CompMemTrieException {};
+  public static class NotFound extends Exception {};
+  public static class InsufficientSpace extends Exception
+      { public InsufficientSpace(String s) { super(s); } };
 
   static byte TYPE_BRANCH_TERM = 0x01;   // Indicates a trie branch with a termination entry..
   static byte TYPE_BRANCH_NOTERM = 0x02; // Indicates a trie branch without a termination entry..
   static byte TYPE_LEAF = 0x03; // Indicates a trie branch without a termination entry..
 
-  private TrieNode root;
+  protected TrieNode root;
   public static int MAGIC = 0x00010001;
 
   public CompMemTrie() {
     this.root = null;
   }
 
-  public void insert(byte[] key, long value) {
+  public void insert(byte[] key, long value) throws InsufficientSpace {
     if (root == null) {
       root = new TrieLeaf(key, value);
     } else {
@@ -46,21 +49,17 @@
     return root.lookup(key);
   }
 
-  public void write(ByteBuffer index, ByteBuffer data) {
+  public void write(ByteBuffer index, FileChannel data) throws InsufficientSpace, IOException {
     if (root == null) {
       throw new IllegalStateException("Attempt to write empty trie");
     }
     int indexreq = root.totalIndexSize() + 4 + 2;  // + sizeof(MAGIC) + sizeof(ptr_to_root)
-    if (indexreq > index.hasRemaining()) {
-      throw new InsufficientSpaceException("Attempt to write trie index to bytebuffer with insufficient space");
+    if (indexreq > index.remaining()) {
+      throw new InsufficientSpace("Attempt to write trie index to bytebuffer with insufficient space");
     }
     if (indexreq > 0x00010000) {
-      throw new InsufficientSpaceException("Attempt to write trie index larger than 64K");
+      throw new InsufficientSpace("Attempt to write trie index larger than 64K");
     }
-    int datareq = root.totalDataSize();
-    if (datareq > data.hasRemaining()) {
-      throw new InsufficientSpaceException("Attempt to write trie data to bytebuffer with insufficient space");
-    }
 
     index.putInt(MAGIC);
     root.write(index, data);
@@ -68,7 +67,7 @@
   }
 
 
-  private abstract static class TrieNode {
+  protected abstract static class TrieNode {
     @SuppressWarnings("serial")
     protected static class InsertAbove extends Exception {} ;
     protected static final InsertAbove cont = new InsertAbove();
@@ -95,7 +94,7 @@
      * The data buffer is the target of the byte[]/value pairs stored in the leaves.  We want to keep these
      * separate for caching purposes.
      */
-    protected abstract void write(ByteBuffer index, ByteBuffer data);
+    protected abstract void write(ByteBuffer index, FileChannel data) throws IOException;
     protected abstract int totalIndexSize();
     protected abstract int totalDataSize();
     protected abstract void insert(TrieLeaf node, int parentLcd) throws InsertAbove;
@@ -161,15 +160,15 @@
       return total;
     }
 
-    protected void write(ByteBuffer index, ByteBuffer data) {
+    protected void write(ByteBuffer index, FileChannel data) throws IOException {
       for (TrieNode child : children) {
         child.write(index, data);
       }
 
-      location = (short)bb.position();
+      location = (short)index.position();
       
-      index.putByte((term == null) ? TYPE_BRANCH_NOTERM : TYPE_BRANCH_TERM);
-      index.putByte(aLeaf.key[offset]);
+      index.put((term == null) ? TYPE_BRANCH_NOTERM : TYPE_BRANCH_TERM);
+      index.put((byte)0x00);  // Padding to keep things short-aligned.
       index.putInt(offset);
       if (term != null) {
         index.putShort(term.location);
@@ -324,15 +323,17 @@
       return 8 + 4 + key.length;
     }
 
-    protected void write(ByteBuffer index, ByteBuffer data) {
+    protected void write(ByteBuffer index, FileChannel data) throws IOException {
+      ByteBuffer dataBuf = ByteBuffer.allocateDirect(8 + 4 + key.length);
       keyLocation = data.position();
-      data.putLong(value);
-      data.putInt(key.length);
-      data.put(key);
+      dataBuf.putLong(value);
+      dataBuf.putInt(key.length);
+      dataBuf.put(key);
+      data.write(dataBuf);
 
-      location = (short)index.position;
-      index.putByte(TYPE_LEAF);
-      index.putByte(0x00); // Pad to 16-bit aligned.
+      location = (short)index.position();
+      index.put(TYPE_LEAF);
+      index.put((byte)0x00); // Pad to 16-bit aligned.
       index.putLong(keyLocation);
     }
 




More information about the Mulgara-svn mailing list