[Mulgara-svn] r914 - in projects/xa2/object-pool/src: data scheduler trie

andrae at mulgara.org andrae at mulgara.org
Mon May 12 06:45:18 UTC 2008


Author: andrae
Date: 2008-05-11 23:45:18 -0700 (Sun, 11 May 2008)
New Revision: 914

Added:
   projects/xa2/object-pool/src/scheduler/CachingScheduler.java.bak
   projects/xa2/object-pool/src/scheduler/FileHandleCMap.java
   projects/xa2/object-pool/src/trie/MemoryTrieTest.java
Modified:
   projects/xa2/object-pool/src/data/DataEntry.java
   projects/xa2/object-pool/src/data/DataFile.java
   projects/xa2/object-pool/src/scheduler/FileHandle.java
   projects/xa2/object-pool/src/scheduler/FileHandleFactory.java
   projects/xa2/object-pool/src/scheduler/IOScheduler.java
   projects/xa2/object-pool/src/trie/ByteMap.java
   projects/xa2/object-pool/src/trie/DiskTrie.java
   projects/xa2/object-pool/src/trie/MemoryTrie.java
   projects/xa2/object-pool/src/trie/OnesTable.java
Log:
Several changes.

1. Migrated into multi-package structure.
2. Started basic IOScheduler---this will need to eventually become a much less basic scheduler.
3. Abstracted data-file format into separate class.
4. Migrated Trie structures to use new data-entry class.
5. Wrote tests for MemoryTrie based on CompMemTrie.
6. Deprecated Comp*Trie classes.
7. Added FileHandle class.  Major focus of this will be tracking IO caching failures and logical seeks.



Modified: projects/xa2/object-pool/src/data/DataEntry.java
===================================================================
--- projects/xa2/object-pool/src/data/DataEntry.java	2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/data/DataEntry.java	2008-05-12 06:45:18 UTC (rev 914)
@@ -3,110 +3,189 @@
  * Author Andrae Muys
  * Date 6th May 2008
  */
+package data;
 
 import java.io.IOException;
+import java.nio.BufferUnderflowException;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.channels.FileChannel;
 import java.util.HashMap;
 import java.util.Map;
 
+//import scheduler.Block;
+//import scheduler.FileHandle;
+
 /**
  * Abstracts the concept of a data file entry.
  */
 public abstract class DataEntry {
+  protected static final int HEADER_LENGTH = 12;
+
   protected long value;
 
-  public abstract int size();
+  public abstract int dataSize();
 
-  public abstract int totalSize() {
-    return 8 + 4 + size(); // VALUE + LENGTH + DATA
-  }
-
   public abstract void write(FileChannel chan) throws IOException;
   public abstract DataEntry canonicalize();
 
-  public abstract void rewind();
-  public abstract byte get(int position) throws IOException;
-  public abstract byte get() throws IOException;
+  public abstract DataEntry rewind();
+  public abstract ByteBuffer slice(int offset, int length);
+  public abstract byte get(int position) throws BufferUnderflowException;
+  public abstract byte get() throws BufferUnderflowException;
 
+  public int totalSize() {
+    return HEADER_LENGTH + dataSize(); // VALUE + LENGTH + DATA
+  }
+
   public long getValue() {
     return value;
   }
 
+  public static DataEntry getEntry(byte[] data, long value) {
+    return new MemoryBackedDataEntry(value, data, false);
+  }
+/*
   protected static class DiskBackedDataEntry extends DataEntry {
     private FileHandle handle;
     private long position;
-    private ByteBuffer entry;
     private int length;
 
     // FIXME: Need to replace this with a background populated ConcurrentLinkedQueue.
     private Block[] blocks;
 
-    private Block currBlock;
+    private ByteBuffer currBuffer;
+    private int currBlock;
+    private int currPosition;
 
-
     DiskBackedDataEntry(FileHandle handle, long position) {
       this.handle = handle;
       this.position = position;
-      this.curr = 0;
 
-      this.currBlock = handle.readBlock(position);
-      this.entry = currBlock.offset(position & (BLOCK_SIZE - 1));  // block size is a power of 2 therefore mask is size - 1.
+      Block block = handle.readBlock(position);
+      this.currBlock = 0;
+      this.currBuffer = currBlock.offset(position & (BLOCK_SIZE - 1));  // block size is a power of 2 therefore mask is size - 1.
+      this.currPosition = 0;
 
-      this.value = entry.getLong();
-      this.length = entry.getInt();
+      this.value = currBuffer.getLong();
+      this.length = currBuffer.getInt();
 
-      if (length <= entry.remaining()) {
-        entry.limit(length);
-        blocks[] = new Block[] { block };
-      } else {
-        entry.limit(entry.capacity());
-        int remaining = length - entry.remaining();
-        int totalBlocks = remaining / BLOCK_SIZE + (remaining % BLOCK_SIZE > 0 ? 1 : 0);
-        blocks[] = new Block[remainingBlocks];
-        blocks[0] = block;
+      currBuffer.mark();
+      // Set the buffer to the entire block for length calculations.
+      currBuffer.limit(currBuffer.capacity());
+      int remaining = length - currBuffer.remaining();
+      int totalBlocks = remaining / BLOCK_SIZE + (remaining % BLOCK_SIZE > 0 ? 1 : 0);
+      blocks = new Block[totalBlocks];
+      blocks[0] = block;
+      if (totalBlocks == 1) {
+        // [mark,length] establishes the data covered by this entry.
+        currBuffer.limit(length);
       }
+      // FIXME: We eventually want to submit the remaining blocks to the scheduler for speculative background
+      // fetch.
     }
 
+    /**
+     * Convert 1 block entries into memory-backed entries.
+     *
+     * This makes the block's memory recoverable should we require it, and as the object-pool is subject to a
+     * large number of stabbing-queries this is a good thing.  Should we scan the pool, the block is likely to
+     * still be cached so we shouldn't need to reload it; the only cost of canonicalizing in this case is an
+     * unnecessary copy.  However releasing a 32K-2M block in preference to a 20-30byte array is preferable if
+     * we are doing a stabbing query.
+     * Especially as a DiskBackedDataEntry requires 48-bytes anyway vs. 12 for a MemoryBackedDataEntry.
+     * /
     public DataEntry canonicalize() {
       if (blocks.length == 1) {
-        byte[] data = new byte[entry.reset().remaining()];
-        entry.get(data);
+        byte[] data = new byte[currBuffer.reset().remaining()];
+        currBuffer.get(data);
         return new MemoryBackedDataEntry(value, data);
       } else {
         return this;
       }
     }
 
-    public int size() {
+    public int dataSize() {
       return length;
     }
 
-    public void rewrind() {
-      curr = 0;
+    public DataEntry rewind() {
+      if (currBlock == 0) {
+        currBuffer.rewind();
+      } else {
+        currBlock = 0;
+        // block size is a power of 2 therefore mask is size - 1.
+        currBuffer = blocks[0].offset((position & (BLOCK_SIZE - 1)) + HEADER_LENGTH);
+        if (totalBlocks == 1) {
+          // [mark,length] establishes the data covered by this entry.
+          // Note: This should never occur as we expect DataEntries to be canonicalized, which would result in
+          // a 1 block entry being replaced by a MemoryBackedDataEntry.
+          currBuffer.limit(length);
+        } else {
+          currBuffer.limit(currBuffer.capacity());
+        }
+      }
+
+      return this;
     }
 
     public byte get() throws IOException {
-      byte b = get(curr);
-      curr++;
-      return b;
+      if (currBuffer.hasRemaining()) {
+        currPosition++;
+        return currBuffer.get();
+      } else {
+        return get(currPosition);
+      }
     }
 
     public byte get(int off) {
-      int bN = (off + 12) / BLOCK_SIZE;
-      if (blocks[bN] == null) {
-        blocks[bN] = handle.readBlock(off + 12);
+      if (off < 0) {
+        throw new BufferUnderflowException("Attempt to use -ve offset");
       }
+      if (off >= length) {
+        throw new BufferOverflowException("Attempt to use offset > length");
+      }
 
-        
+      currBlock = (off + HEADER_LENGTH) / BLOCK_SIZE;
+      if (blocks[currBlock] == null) {
+        blocks[currBlock] = handle.readBlock(position + currBlock * BLOCKSIZE);
+      }
+      currBuffer = blocks[currBlock].offset(position & (BLOCK_SIZE - 1));  // block size is a power of 2 therefore mask is size - 1.
+
+      // Allow for header.
+      if (currBlock == 0) {
+        currBuffer.position(HEADER_LENGTH);
+      }
+      // Allow for partial final block.
+      if (currBlock == blocks.length - 1) {
+        currBuffer.limit(length % BLOCK_SIZE + (position & (BLOCKSIZE - 1)));
+      }
+
+      currPosition = off;
+
+      return get();
     }
 
     public void write(FileChannel chan) throws IOException {
-      handle.transferTo(chan, position, 12+length);
+      handle.transferTo(chan, position, HEADER_LENGTH + length);
     }
+
+    /**
+     * FIXME: We really need to return our own ByteBuffer implementation here that exploits lazy loading.
+     *
+     * Note this is a really really inefficient implementation.
+     * /
+    public ByteBuffer slice(int offset, int length) throws IOException {
+      ByteBuffer buffer = ByteBuffer.allocate(length);
+      buffer.put(get(offset));
+      for (int i = 1; i < length; i++) {
+        buffer.put(get());
+      }
+
+      return buffer;
+    }
   }
-
+*/
   protected static class MemoryBackedDataEntry extends DataEntry {
     private ByteBuffer data;
 
@@ -125,7 +204,22 @@
       }
     }
 
-    public int size() {
+    public byte get() throws BufferUnderflowException {
+      return data.get();
+    }
+
+    public byte get(int offset) throws BufferUnderflowException {
+      data.position(offset);
+      return data.get(offset);
+    }
+
+    public DataEntry rewind() {
+      data.rewind();
+
+      return this;
+    }
+
+    public int dataSize() {
       return data.capacity();
     }
 
@@ -134,12 +228,39 @@
     }
 
     public void write(FileChannel chan) throws IOException {
-      ByteBuffer bb = ByteBuffer.allocateDirect(12 + data.length);
+      ByteBuffer bb = ByteBuffer.allocate(HEADER_LENGTH);
       bb.clear();
       bb.putLong(value);
-      bb.putInt(data.length);
-      bb.put(data);
-      chan.write((ByteBuffer)bb.flip());
+      bb.putInt(data.capacity());
+      chan.write(new ByteBuffer[] { (ByteBuffer)bb.flip(), (ByteBuffer)bb.duplicate().clear() });
     }
+
+    public ByteBuffer slice(int offset, int length) {
+      ByteBuffer bb = data.duplicate();
+      bb.position(offset);
+      bb = bb.slice();
+      bb.limit(length);
+
+      return bb;
+    }
+
+    public boolean equals(Object rhs) {
+      if (rhs == this) {
+        return true;
+      }
+      if (rhs.getClass().equals(getClass())) {
+        MemoryBackedDataEntry mbde = (MemoryBackedDataEntry)rhs;
+        return value == mbde.value && data.duplicate().clear().equals(mbde.data.duplicate().clear());
+      } else {
+        // FIXME: Do I need to consider comparing with DiskBackedDataEntry here?
+        return false;
+      }
+    }
+
+    public int hashCode() {
+      // Note: If I do consider DBDE's equal to MBDE keeping it hashcode compatible will be all but
+      // impossible.  I doubt I want to do the linear hashCode computation required for a DBDE.
+      return Long.valueOf(value).hashCode() ^ data.duplicate().clear().hashCode();
+    }
   }
 }

Modified: projects/xa2/object-pool/src/data/DataFile.java
===================================================================
--- projects/xa2/object-pool/src/data/DataFile.java	2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/data/DataFile.java	2008-05-12 06:45:18 UTC (rev 914)
@@ -3,6 +3,7 @@
  * Author Andrae Muys
  * Date 1st May 2008
  */
+package data;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
@@ -21,11 +22,11 @@
 
   private FileHandle handle;
 
-  public DataFile(File file, IOScheduler scheduler) {
-    this.handle = scheduler.open(file, MAGIC, LOG2_BLOCK_SIZE);
+  public DataFile(String filename, IOScheduler scheduler) {
+    this.handle = scheduler.open(filename, MAGIC, LOG2_BLOCK_SIZE);
   }
 
   public DataEntry getEntry(long position) throws IOException {
-    return new DataEntry(this, position);
+    return new DiskBackedDataEntry(handle, position).canonicalize();
   }
 }

Copied: projects/xa2/object-pool/src/scheduler/CachingScheduler.java.bak (from rev 891, projects/xa2/object-pool/src/scheduler/IOScheduler.java)
===================================================================
--- projects/xa2/object-pool/src/scheduler/CachingScheduler.java.bak	                        (rev 0)
+++ projects/xa2/object-pool/src/scheduler/CachingScheduler.java.bak	2008-05-12 06:45:18 UTC (rev 914)
@@ -0,0 +1,35 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 30th April 2008
+ */
+package scheduler;
+
+import java.lang.ref.SoftReference;
+import java.util.HashMap;
+import java.util.Map;
+
+public class IOScheduler {
+  private Map<FileHandle, FileCache> fileCache;
+  private File[] disks;
+  private FileHandleCMap fileMap;
+
+  public IOScheduler(File[] disks) {
+    this.disks = disks;
+    fileCache = new HashMap<FileHandle, FileCache>();
+  }
+
+  public FileHandle newFile(File file) {
+    if (file.exists()) {
+      throw new IllegalArgumentException("File exists: " + file);
+    }
+    fileMap.put(file, new FileHandle(file));
+  }
+
+  public FileHandle openFile(File file) {
+    if (!file.exists()) {
+      throw new IllegalArgumentException("File does not exist: " + file);
+    }
+    fileMap.put(file, new FileHandle(file));
+  }
+}

Modified: projects/xa2/object-pool/src/scheduler/FileHandle.java
===================================================================
--- projects/xa2/object-pool/src/scheduler/FileHandle.java	2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/scheduler/FileHandle.java	2008-05-12 06:45:18 UTC (rev 914)
@@ -6,34 +6,71 @@
 package scheduler;
 
 import java.io.File;
+import java.nio.channels.FileChannel;
 
 public class FileHandle {
   private File file;
   private FileChannel channel;
+  private int blocksize;
 
   private long seeks;
 
-  FileHandle(File file) {
+  FileHandle(File file, FileChannel channel, int magic, int blocksize) {
     this.file = file;
+    this.channel = channel;
+    this.blocksize = blocksize;
     this.seeks = 0;
+  }
 
-    this.channel = file.exists() ?
-        new FileInputStream(file).getChannel() :
-        new FileOutputStream(file).getChannel();
+  FileHandle(File file, FileInputStream stream, int magic, int blocksize) {
+    this(file, stream.getChannel(), magic, blocksize);
+
+    ByteBuffer bb = ByteBuffer.allocate(blocksize);
+    bb.putInt(magic);
+    bb.putInt(blocksize);
+    bb.position(0);
+    bb.limit(bb.capacity());
+
+    channel.write(bb);
   }
 
+  FileHandle(File file, FileOutputStream stream, int magic, int blocksize) {
+    this(file, stream.getChannel(), magic, blocksize);
+
+    ByteBuffer bb = ByteBuffer.allocate(blocksize);
+    this.channel.read(bb);
+
+    bb.clear();
+    int filemagic = bb.getInt();
+    if (filemagic != magic) {
+      bb.order(bb.order().equals(ByteOrder.BIG_ENDIAN) ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
+      bb.clear();
+      filemagic = bb.getInt();
+      if (filemagic == magic) {
+        throw new IllegalArgumentException("Attempt to read file using incorrect endian format");
+      } else {
+        throw new IllegalArgumentException("Bad magic in index buffer: " + filemagic + ", MAGIC=" + magic);
+      }
+    }
+
+    int filebs = bb.getInt();
+    if (filebs != blocksize) {
+      throw new IllegalArgumentException("Attempt to read file(" + file + ") using incorrect blocksize: " + blocksize);
+    }
+  }
+
   public File getFile() {
     return file;
   }
 
-  Block readBlock(Long blockId, Block block) {
+  Block readBlock(Long blockId) {
     long position = blockId.longValue();
     if (channel.position() != position) {
       seeks++;
       channel.position(position);
     }
 
-    channel.read(block.prepare(blockId));
+    channel.read(new Block().block.prepare(blockId));
 
     return block;
   }
@@ -42,5 +79,7 @@
     Long position = new Long(channel.position());
 
     channel.write(block.prepare(blockId));
+
+    return position;
   }
 }

Copied: projects/xa2/object-pool/src/scheduler/FileHandleCMap.java (from rev 891, projects/xa2/object-pool/src/scheduler/FileHandleFactory.java)
===================================================================
--- projects/xa2/object-pool/src/scheduler/FileHandleCMap.java	                        (rev 0)
+++ projects/xa2/object-pool/src/scheduler/FileHandleCMap.java	2008-05-12 06:45:18 UTC (rev 914)
@@ -0,0 +1,30 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 30th April 2008
+ */
+package scheduler;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A canonical mapping of File objects to FileHandles.
+ */
+public class FileHandleCMap {
+  private Map<File, FileHandle> handles;
+
+  public FileHandleCMap() {
+    this.handles = new HashMap<File, FileHandle>();
+  }
+
+  public FileHandle getHandle(File file) {
+    FileHandle result = handles.get(file);
+    if (result == null) {
+      result = new FileHandle(file, this);
+      handles.put(result);
+    }
+    return result;
+  }
+}

Modified: projects/xa2/object-pool/src/scheduler/FileHandleFactory.java
===================================================================
--- projects/xa2/object-pool/src/scheduler/FileHandleFactory.java	2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/scheduler/FileHandleFactory.java	2008-05-12 06:45:18 UTC (rev 914)
@@ -6,25 +6,34 @@
 package scheduler;
 
 import java.io.File;
-import java.util.HashMap;
-import java.util.Map;
 
 /**
  * A canonical mapping of File objects to FileHandles.
  */
-public class FileHandleCMap {
-  private Map<File, FileHandle> handles;
+public class FileHandleFactory {
+  public FileHandleFactory() {}
 
-  public FileHandleCMap() {
-    this.handles = new HashMap<File, FileHandle>();
+  public FileHandle createNew(File file, int magic, int blocksize) {
+    if (file.exists()) {
+      throw new IllegalArgumentException("File exists: " + file);
+    }
+
+    return new FileHandle(new FileInputStream(file), magic, blocksize);
   }
 
-  public FileHandle getHandle(File file) {
-    FileHandle result = handles.get(file);
-    if (result == null) {
-      result = new FileHandle(file, this);
-      handles.put(result);
+  public FileHandle openExisting(File file, int magic, int blocksize) {
+    if (!file.exists()) {
+      throw new IllegalArgumentException("File does not exist: " + file);
     }
-    return result;
+
+    return new FileHandle(new FileOutputStream(file), magic, blocksize);
   }
+
+  public FileHandle open(File file, int magic, int blocksize) {
+    if (file.exists()) {
+      return openExisting(file, magic, blocksize);
+    } else {
+      return openNew(file, magic, blocksize);
+    }
+  }
 }

Modified: projects/xa2/object-pool/src/scheduler/IOScheduler.java
===================================================================
--- projects/xa2/object-pool/src/scheduler/IOScheduler.java	2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/scheduler/IOScheduler.java	2008-05-12 06:45:18 UTC (rev 914)
@@ -5,31 +5,28 @@
  */
 package scheduler;
 
-import java.lang.ref.SoftReference;
-import java.util.HashMap;
-import java.util.Map;
+import java.io.File;
 
 public class IOScheduler {
-  private Map<FileHandle, FileCache> fileCache;
-  private File[] disks;
-  private FileHandleCMap fileMap;
+  private FileHandleFactory fileHandleFactory;
+  private File dir;
 
-  public IOScheduler(File[] disks) {
-    this.disks = disks;
-    fileCache = new HashMap<FileHandle, FileCache>();
-  }
-
-  public FileHandle newFile(File file) {
-    if (file.exists()) {
-      throw new IllegalArgumentException("File exists: " + file);
+  public IOScheduler(File dir, FileHandleFactory fileHandleFactory) {
+    this.fileHandleFactory = fileHandleFactory;
+    this.dir = dir;
+    if (dir.exists()) {
+      if (!dir.isDirectory()) {
+        throw new IllegalArgumentException("Scheduler argument not directory: " + dir);
+      }
     }
-    fileMap.put(file, new FileHandle(file));
+    if (!dir.mkdirs()) {
+      throw new IllegalStateException("Failed to make directory: " + dir);
+    }
   }
 
-  public FileHandle openFile(File file) {
-    if (!file.exists()) {
-      throw new IllegalArgumentException("File does not exist: " + file);
-    }
-    fileMap.put(file, new FileHandle(file));
+  public FileHandle open(String name, int magic, int blocksize) {
+    File file = new File(dir, name);
+
+    return fileHandleFactory.open(file, int, blocksize);
   }
 }

Modified: projects/xa2/object-pool/src/trie/ByteMap.java
===================================================================
--- projects/xa2/object-pool/src/trie/ByteMap.java	2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/trie/ByteMap.java	2008-05-12 06:45:18 UTC (rev 914)
@@ -3,6 +3,7 @@
  * Author Andrae Muys
  * Date 8th April 2008
  */
+package trie;
 
 import static trie.OnesTable.*;
 

Modified: projects/xa2/object-pool/src/trie/DiskTrie.java
===================================================================
--- projects/xa2/object-pool/src/trie/DiskTrie.java	2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/trie/DiskTrie.java	2008-05-12 06:45:18 UTC (rev 914)
@@ -262,7 +262,7 @@
     index.get();  // skip padding.
     long keyLocation = index.getLong();
 
-    DataEntry entry = dataFile.getEntry(keyLocation).canonicalize();
+    DataEntry entry = dataFile.getEntry(keyLocation);
 
     return new TrieLeaf(entry, false);
   }

Modified: projects/xa2/object-pool/src/trie/MemoryTrie.java
===================================================================
--- projects/xa2/object-pool/src/trie/MemoryTrie.java	2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/trie/MemoryTrie.java	2008-05-12 06:45:18 UTC (rev 914)
@@ -3,19 +3,20 @@
  * Author Andrae Muys
  * Date 15th April 2008
  */
+package trie;
 
-import java.util.Arrays;
+import data.DataEntry;
 
 /**
  * Implements an in-memory trie - uses ByteMaps to implement trie nodes.
  */
-public class CompMemTrie {
+public class MemoryTrie {
   @SuppressWarnings("serial")
   public static class NotFound extends Exception {};
   private static NotFound notfound = new NotFound();
 
   protected TrieNode root;
-  public CompMemTrie() {
+  public MemoryTrie() {
     this.root = null;
   }
 
@@ -72,7 +73,7 @@
       if (lhsOff < 0 || rhsOff < 0) {
         return false;
       }
-      if (lhs.size() < lhsOff + len || rhs.size() < rhsOff + len) {
+      if (lhs.dataSize() < lhsOff + len || rhs.dataSize() < rhsOff + len) {
         return false;
       }
 
@@ -92,12 +93,12 @@
     public TrieBranch(DataEntry entry) {
       this.children = new ByteMap<TrieNode>();
       this.term = new TrieLeaf(entry);
-      this.offset = term.entry.size();
+      this.offset = term.entry.dataSize();
       this.aLeaf = this.term;
     }
 
-    public TrieBranch(TrieNode oldRoot, byte[] key, long value) {
-      this(oldRoot, new TrieLeaf(key, value));
+    public TrieBranch(TrieNode oldRoot, DataEntry entry) {
+      this(oldRoot, new TrieLeaf(entry));
     }
 
     private TrieBranch(TrieNode oldNode, TrieLeaf newNode) {
@@ -116,8 +117,9 @@
       DataEntry lhs = oldNode.aLeaf.entry.rewind();
       DataEntry rhs = newNode.entry.rewind();
 
+      // FIXME: Replace this loop with a compareTo.
       int i = 0;
-      while (i < lhs.size() && i < rhs.size()) {
+      while (i < lhs.dataSize() && i < rhs.dataSize()) {
         byte lb = lhs.get();
         byte rb = rhs.get();
         if (lb != rb) {
@@ -130,11 +132,11 @@
         i++;
       }
 
-      if (i < lhs.size()) {
+      if (i < lhs.dataSize()) {
         offset = i;
         children.insert(lhs.get(), oldNode);
         term = newNode;
-      } else if (i < rhs.size()) {
+      } else if (i < rhs.dataSize()) {
         if (oldNode instanceof TrieLeaf) {
           offset = i;
           children.insert(rhs.get(), newNode);
@@ -152,7 +154,7 @@
         return false;
       } else {
         // new node matches the lcp of this node.
-        if (node.entry.size() == offset) {
+        if (node.entry.dataSize() == offset) {
           // new node is expected to terminate here.
           if (term == null) {
             term = node;
@@ -168,7 +170,7 @@
           } else {
             // there is an existing child node branching on this branching key.
             if (!child.insert(node, offset)) {
-              children.insert(node.entry(offset), new TrieBranch(child, node));
+              children.insert(node.entry.get(offset), new TrieBranch(child, node));
             }
           }
         }
@@ -185,7 +187,7 @@
       } else {
         // new node matches the lcp of this node.
         TrieNode child;
-        if (key.size() == offset) {
+        if (key.dataSize() == offset) {
           // new node is expected to terminate here.
           if (term != null) {
             valid[0] = true;
@@ -221,9 +223,9 @@
     }
 
     protected boolean insert(TrieLeaf node, int parentLcp) {
-      if (entry.size() != node.entry.size()) {
+      if (entry.dataSize() != node.entry.dataSize()) {
         return false;
-      } else if (!regionMatches(entry, parentLcp, node.entry, parentLcp, key.size() - parentLcp)) {
+      } else if (!regionMatches(entry, parentLcp, node.entry, parentLcp, node.entry.dataSize() - parentLcp)) {
         return false;
       } else if (entry.getValue() == node.entry.getValue()) {
         return true; // Duplicate key/value pair.
@@ -233,13 +235,13 @@
     }
 
     protected long lookup(DataEntry key, int parentLcd, boolean[] valid) {
-      assert regionMatches(this.entry, 0, key, 0, key.size());
+      assert regionMatches(this.entry, 0, key, 0, key.dataSize());
       valid[0] = true;
-      return value;
+      return entry.getValue();
     }
     
     public String toString() {
-      return "Trie-LEAF: " + entry + " -> " + value;
+      return "Trie-LEAF: " + entry;
     }
   }
 }

Copied: projects/xa2/object-pool/src/trie/MemoryTrieTest.java (from rev 872, projects/xa2/object-pool/src/trie/CompMemTrieTest.java)
===================================================================
--- projects/xa2/object-pool/src/trie/MemoryTrieTest.java	                        (rev 0)
+++ projects/xa2/object-pool/src/trie/MemoryTrieTest.java	2008-05-12 06:45:18 UTC (rev 914)
@@ -0,0 +1,413 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 9th May 2008
+ */
+package trie;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+import data.DataEntry;
+
+/**
+ * Basic tests of the MemoryTrie
+ */
+public class MemoryTrieTest {
+  public static void main(String[] args) throws Exception {
+    testWithFile(new File("../scratch/propernames.uniq"), 0);
+    testWithFile(new File("../scratch/propernames.uniq"), 1);
+    testWithFile(new File("../scratch/propernames.uniq"), 10);
+    testWithFile(new File("../scratch/propernames.uniq"), 100);
+    testWithFile(new File("../scratch/propernames.uniq"), 1000);
+    testWithFile(new File("../scratch/propernames.uniq"), 10000);
+    testWithFile(new File("../scratch/connectives.uniq"), 10000);
+    testWithFile(new File("../scratch/web2a.uniq"), 100000);
+    testWithFile(new File("../scratch/web2.uniq"), 1000000);
+    testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
+    testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
+    testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
+    testWithRandomFile(new File("/dev/urandom"));
+    testLoadWithRandomFile(new File("/dev/urandom"));
+  }
+
+
+  public static void testWithFile(File file, int max) throws Exception {
+    Set<DataEntry> namesSet = new HashSet<DataEntry>();
+    MemoryTrie namesTrie = new MemoryTrie();
+
+    System.out.println("Inserting lines from " + file);
+    BufferedReader names = new BufferedReader(new FileReader(file));
+    long n = 0;
+    String name = names.readLine();
+    long _startInsert = System.currentTimeMillis();
+    while (name != null && n < max) {
+      DataEntry entry = DataEntry.getEntry(name.getBytes(), n);
+      namesSet.add(entry);
+      namesTrie.insert(entry);
+      name = names.readLine();
+      n++;
+    }
+    long _endInsert = System.currentTimeMillis();
+    names.close();
+
+    System.out.println("Checking " + n + " lines from " + file);
+    long _startLookup = System.currentTimeMillis();
+    for (DataEntry key : namesSet) {
+      if (namesTrie.lookup(key) != key.getValue()) {
+        throw new IllegalStateException("Trie doesn't match Map");
+      }
+    }
+    long _endLookup = System.currentTimeMillis();
+    
+    System.out.println("Test Succeeded with " + file +
+        " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+  }
+
+
+  public static class Bytes {
+    public final byte[] bytes;
+    
+    public Bytes(byte[] bytes) {
+      this.bytes = new byte[bytes.length];
+      System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+    }
+    
+    public boolean equals(Object o) {
+      if (o instanceof Bytes) {
+        return Arrays.equals(bytes, ((Bytes)o).bytes);
+      } else {
+        return false;
+      }
+    }
+    
+    public int hashCode() {
+      int hc = Arrays.hashCode(bytes);
+      return hc;
+    }
+  }
+
+  public static void testWithRandomFile(File file) throws Exception {
+    Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+    MemoryTrie namesTrie = new MemoryTrie();
+
+    System.out.println("Inserting random bytes from " + file);
+    FileChannel chan = new FileInputStream(file).getChannel();
+    ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+    long n = 0;
+    byte[] key;
+    // Set limit to 0 so initial compact works correctly.
+    buffer.clear().limit(0);
+    long _startInsert = System.currentTimeMillis();
+    long _aggregateL = 0;
+    long _avlL = 0;
+    long _aggregateS = 0;
+    long _avlS = 0;
+    int nL = 0;
+    int nS = 0;
+
+    while (n < 5000) {
+      if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+        break;
+      }
+      buffer.flip();
+
+      key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+      buffer.get(key);
+      Bytes keyBytes = new Bytes(key);
+      
+      if (!namesMap.containsKey(keyBytes)) {
+        n++;
+        namesMap.put(keyBytes, n);
+        long _si = System.currentTimeMillis();
+        namesTrie.insert(DataEntry.getEntry(key, n));
+        _aggregateL += System.currentTimeMillis() - _si;
+        _avlL += key.length;
+        nL++;
+
+        if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+          throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+        }
+      }
+
+      for (int i = 0; i < 10; i++) {
+        key = new byte[((int)buffer.get()) & 0x000000FF];
+        buffer.get(key);
+        keyBytes = new Bytes(key);
+        if (!namesMap.containsKey(keyBytes)) {
+          n++;
+          namesMap.put(keyBytes, n);
+          long _ss = System.currentTimeMillis();
+          namesTrie.insert(DataEntry.getEntry(key, n));
+          _aggregateS += System.currentTimeMillis() - _ss;
+          _avlS += key.length;
+          nS++;
+
+          if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+            throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+          }
+        }
+      }
+    }
+    long _endInsert = System.currentTimeMillis();
+    System.out.println("Input " + namesMap.size() + " entries");
+    System.out.printf("  %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+    System.out.printf("  %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+    System.out.println(chan.position() + " bytes read from " + file);
+    chan.close();
+
+    long _startLookup = System.currentTimeMillis();
+    System.out.println("Checking random bytes from " + file);
+    for (Bytes k : namesMap.keySet()) {
+      int i = 0;
+      if (namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) != namesMap.get(k)) {
+        throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" +
+        Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) +
+        " value': " + namesMap.get(k));
+      }
+      i++;
+    }
+    long _endLookup = System.currentTimeMillis();
+    
+    System.out.println("Test Succeeded with " + file +
+        " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+  }
+
+
+  public static void testWithRandomFileTuned(File file, int max, int small) throws Exception {
+    Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+    MemoryTrie namesTrie = new MemoryTrie();
+
+    System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
+    FileChannel chan = new FileInputStream(file).getChannel();
+    ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+    long n = 0;
+    byte[] key;
+    // Set limit to 0 so initial compact works correctly.
+    buffer.clear().limit(0);
+    long _startInsert = System.currentTimeMillis();
+    long _aggregateL = 0;
+    long _avlL = 0;
+    long _aggregateS = 0;
+    long _avlS = 0;
+    int nL = 0;
+    int nS = 0;
+
+    while (n < max) {
+      if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+        break;
+      }
+      buffer.flip();
+
+      key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+      buffer.get(key);
+      Bytes keyBytes = new Bytes(key);
+      
+      if (!namesMap.containsKey(keyBytes)) {
+        n++;
+        namesMap.put(keyBytes, n);
+        long _si = System.currentTimeMillis();
+        namesTrie.insert(DataEntry.getEntry(key, n));
+        _aggregateL += System.currentTimeMillis() - _si;
+        _avlL += key.length;
+        nL++;
+
+        if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+          throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+        }
+      }
+
+      for (int i = 0; i < small; i++) {
+        key = new byte[((int)buffer.get()) & 0x000000FF];
+        buffer.get(key);
+        keyBytes = new Bytes(key);
+        if (!namesMap.containsKey(keyBytes)) {
+          n++;
+          namesMap.put(keyBytes, n);
+          long _ss = System.currentTimeMillis();
+          namesTrie.insert(DataEntry.getEntry(key, n));
+          _aggregateS += System.currentTimeMillis() - _ss;
+          _avlS += key.length;
+          nS++;
+
+          if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+            throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+          }
+        }
+      }
+    }
+    long _endInsert = System.currentTimeMillis();
+    System.out.println("Input " + namesMap.size() + " entries");
+    System.out.printf("  %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+    System.out.printf("  %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+    System.out.println(chan.position() + " bytes read from " + file);
+    chan.close();
+
+    long _startLookup = System.currentTimeMillis();
+    System.out.println("Checking random bytes from " + file);
+    for (Bytes k : namesMap.keySet()) {
+      int i = 0;
+      if (namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) != namesMap.get(k)) {
+        throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" +
+            Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) +
+            " value': " + namesMap.get(k));
+      }
+      i++;
+    }
+    long _endLookup = System.currentTimeMillis();
+    
+    System.out.println("Test Succeeded with " + file +
+        " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+  }
+
+  public static void testLoadWithRandomFile(File file) throws Exception {
+    Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+    MemoryTrie namesTrie = new MemoryTrie();
+
+    System.out.println("Inserting random bytes from " + file);
+    FileChannel chan = new FileInputStream(file).getChannel();
+    ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
+
+    long n = 0;
+    byte[] key;
+    // Set limit to 0 so initial compact works correctly.
+    buffer.clear().limit(0);
+    long _startInsert = System.currentTimeMillis();
+    long _aggregateL = 0;
+    long _avlL = 0;
+    int nL = 0;
+    long _aggregateLL = 0;
+    long _avlLL = 0;
+    int nLL = 0;
+    long _aggregateS = 0;
+    long _avlS = 0;
+    int nS = 0;
+    long _aggregateSS = 0;
+    long _avlSS = 0;
+    int nSS = 0;
+
+    for (int i = 0; i < 100; i++) {
+      if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
+        break;
+      }
+      buffer.flip();
+
+      key = new byte[buffer.getInt() & 0x0007FFFF];
+      buffer.get(key);
+      Bytes keyBytes = new Bytes(key);
+      
+      if (!namesMap.containsKey(keyBytes)) {
+        n++;
+        namesMap.put(keyBytes, n);
+        long _sll = System.currentTimeMillis();
+        namesTrie.insert(DataEntry.getEntry(key, n));
+        _aggregateLL += System.currentTimeMillis() - _sll;
+        _avlLL += key.length;
+        nLL++;
+
+        if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+          throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+        }
+      }
+
+      for (int j = 0; j < 10; j++) {
+        key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+        buffer.get(key);
+        keyBytes = new Bytes(key);
+        if (!namesMap.containsKey(keyBytes)) {
+          n++;
+          namesMap.put(keyBytes, n);
+          long _sl = System.currentTimeMillis();
+          namesTrie.insert(DataEntry.getEntry(key, n));
+          _aggregateL += System.currentTimeMillis() - _sl;
+          _avlL += key.length;
+          nL++;
+
+          if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+            throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+          }
+        }
+
+        for (int k = 0; k < 10; k++) {
+          key = new byte[((int)buffer.get()) & 0x000000FF];
+          buffer.get(key);
+          keyBytes = new Bytes(key);
+          if (!namesMap.containsKey(keyBytes)) {
+            n++;
+            namesMap.put(keyBytes, n);
+            long _ss = System.currentTimeMillis();
+            namesTrie.insert(DataEntry.getEntry(key, n));
+            _aggregateS += System.currentTimeMillis() - _ss;
+            _avlS += key.length;
+            nS++;
+
+            if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+              throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+            }
+          }
+
+          for (int ii = 0; ii < 1000; ii++) {
+            key = new byte[((int)buffer.get()) & 0x0000001F];
+            buffer.get(key);
+            keyBytes = new Bytes(key);
+            if (!namesMap.containsKey(keyBytes)) {
+              n++;
+              namesMap.put(keyBytes, n);
+              long _sss = System.currentTimeMillis();
+              namesTrie.insert(DataEntry.getEntry(key, n));
+              _aggregateS += System.currentTimeMillis() - _sss;
+              _avlSS += key.length;
+              nSS++;
+
+              if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+                throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+              }
+            }
+          }
+        }
+      }
+    }
+    long _endInsert = System.currentTimeMillis();
+    System.out.println("Input " + namesMap.size() + " entries");
+    System.out.printf("  %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
+    System.out.printf("  %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+    System.out.printf("  %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
+    System.out.printf("  %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+        nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+    chan.close();
+
+    long _startLookup = System.currentTimeMillis();
+    System.out.println("Checking random bytes from " + file);
+    for (Bytes k : namesMap.keySet()) {
+      int i = 0;
+      if (namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) != namesMap.get(k)) {
+        throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" +
+            Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) +
+            " value': " + namesMap.get(k));
+      }
+      i++;
+    }
+    long _endLookup = System.currentTimeMillis();
+    
+    System.out.println("Test Succeeded with " + file +
+        " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+  }
+}

Modified: projects/xa2/object-pool/src/trie/OnesTable.java
===================================================================
--- projects/xa2/object-pool/src/trie/OnesTable.java	2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/trie/OnesTable.java	2008-05-12 06:45:18 UTC (rev 914)
@@ -46,7 +46,7 @@
 
   static {
     try {
-      FileChannel fc = new FileInputStream("OnesTable.dat").getChannel();
+      FileChannel fc = new FileInputStream("trie/OnesTable.dat").getChannel();
       shortOnesTable = new byte[64*1024];
       fc.read(ByteBuffer.wrap(shortOnesTable));
       onesTable = new byte[64*1024 * 2*256];




More information about the Mulgara-svn mailing list