[Mulgara-svn] r844 - in projects/xa2/object-pool/src: . gen

andrae at mulgara.org andrae at mulgara.org
Thu Apr 24 08:48:53 UTC 2008


Author: andrae
Date: 2008-04-24 01:48:52 -0700 (Thu, 24 Apr 2008)
New Revision: 844

Modified:
   projects/xa2/object-pool/src/CompBlockTrie.java
   projects/xa2/object-pool/src/CompBlockTrieTest.java
   projects/xa2/object-pool/src/CompMemTrie.java
   projects/xa2/object-pool/src/gen/OnesTable.java
Log:
Round-tripping now works most of the time - I have a block alignment bug I still need to fix - I expect with
that bug fixed I should be in a position to move on to the ISAM-tree component.



Modified: projects/xa2/object-pool/src/CompBlockTrie.java
===================================================================
--- projects/xa2/object-pool/src/CompBlockTrie.java	2008-04-24 06:58:17 UTC (rev 843)
+++ projects/xa2/object-pool/src/CompBlockTrie.java	2008-04-24 08:48:52 UTC (rev 844)
@@ -7,14 +7,12 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
-import java.util.Arrays;
 
 /**
  * Extends CompMemTrie to guarantee the trie will fit within a single block.
  */
 public class CompBlockTrie extends CompMemTrie {
   @SuppressWarnings("serial")
-  public static int MAGIC = 0x00010001;
   // Calculated as the worst case size per entry.
   // Assumes a full leaf and branch per entry.
   // Block: 2-byte type-header
@@ -27,7 +25,7 @@
   //       26-bytes Total.
   // Best case of course is 10-bytes leaf + 2-bytes children in branch.
   private static short WORST_CASE_ENTRY_SIZE = 26;
-  private static short BEST_CASE_ENTRY_SIZE = 12;
+//private static short BEST_CASE_ENTRY_SIZE = 12;
 
   /** Worst-case space left in trie measured in entries */
   private short space;

Modified: projects/xa2/object-pool/src/CompBlockTrieTest.java
===================================================================
--- projects/xa2/object-pool/src/CompBlockTrieTest.java	2008-04-24 06:58:17 UTC (rev 843)
+++ projects/xa2/object-pool/src/CompBlockTrieTest.java	2008-04-24 08:48:52 UTC (rev 844)
@@ -11,6 +11,7 @@
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
@@ -21,10 +22,20 @@
  */
 public class CompBlockTrieTest {
   public static void main(String[] args) throws Exception {
-    testWithFile(new File("../scratch/propernames.uniq"));
-//    testWithFile(new File("../scratch/connectives.uniq"));
-//    testWithFile(new File("../scratch/web2a.uniq"));
-//    testWithFile(new File("../scratch/web2.uniq"));
+//    testWithLimitedFile(new File("../scratch/propernames.uniq"), 1);
+    testWithLimitedFile(new File("../scratch/propernames.uniq"), 2);
+    testWithLimitedFile(new File("../scratch/propernames.uniq"), 3);
+    testWithLimitedFile(new File("../scratch/propernames.uniq"), 4);
+    testWithLimitedFile(new File("../scratch/propernames.uniq"), 5);
+    testWithLimitedFile(new File("../scratch/propernames.uniq"), 6);
+    testWithLimitedFile(new File("../scratch/propernames.uniq"), 7);
+    testWithLimitedFile(new File("../scratch/propernames.uniq"), 8);
+    testWithLimitedFile(new File("../scratch/propernames.uniq"), 9);
+    testWithLimitedFile(new File("../scratch/propernames.uniq"), 10);
+    testWithLimitedFile(new File("../scratch/propernames.uniq"), Integer.MAX_VALUE);
+    testWithLimitedFile(new File("../scratch/connectives.uniq"), Integer.MAX_VALUE);
+    testWithLimitedFile(new File("../scratch/web2a.uniq"), Integer.MAX_VALUE);
+    testWithLimitedFile(new File("../scratch/web2.uniq"), Integer.MAX_VALUE);
 //    testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
 //    testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
 //    testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
@@ -33,43 +44,45 @@
   }
 
   private static final int BLOCK_SIZE = 32 * 1024;
-  public static void testWithFile(File file) throws Exception {
+  public static void testWithLimitedFile(File file, int limit) throws Exception {
     Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
 
-    FileChannel indexFile = new RandomAccessFile("tmp/sp_idx", "rw").getChannel().truncate(0);
-    FileChannel dataFile = new RandomAccessFile("tmp/sp_dat", "rw").getChannel().truncate(0);
+    FileChannel indexFile = new FileOutputStream("tmp/sp_idx").getChannel();
+    FileChannel dataFile = new FileOutputStream("tmp/sp_dat").getChannel();
     ByteBuffer indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
   
     ArrayList<CompBlockTrie> tries = new ArrayList<CompBlockTrie>();
     CompBlockTrie trie = new CompBlockTrie(BLOCK_SIZE);
-    tries.add(trie);
 
-    System.out.println("Inserting lines from " + file);
+    System.out.println("Inserting " + limit + " lines from " + file);
 
     BufferedReader names = new BufferedReader(new FileReader(file));
-    long n = 0;
+    long n = 0xDEADBEEF00000000L;
     String name = names.readLine();
     long _startInsert = System.currentTimeMillis();
-    for (;;) {
+    for (int i = 0; i < limit; i++) {
       // The use of the double loop means we pay the cost of setting up the exception handler only once per
       // block, not once per entry.
       try {
-        while (name != null) {
-          trie.insert(name.getBytes(), n);
-          // Note: exception thrown here if trie is full.  So name will remain valid when we reenter loop with
-          // a new trie.
-          namesMap.put(name.getBytes(), n);
-          name = names.readLine();
-          n++;
+        if (name == null) {
+          break;
         }
-        break;  // From for(;;)-loop.
+        trie.insert(name.getBytes(), n);
+        // Note: exception thrown here if trie is full.  So name will remain valid when we reenter loop with
+        // a new trie.
+        namesMap.put(name.getBytes(), n);
+        name = names.readLine();
+        n++;
       } catch (CompMemTrie.InsufficientSpace ec) {
         trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
-        indexFile.write(indexBuffer);
+        indexFile.write((ByteBuffer)indexBuffer.flip());
+        tries.add(trie);
         trie = new CompBlockTrie(BLOCK_SIZE);
-        tries.add(trie);
       }
     }
+    trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
+    indexFile.write((ByteBuffer)indexBuffer.flip());
+    tries.add(trie);
 
     indexFile.force(true);
     dataFile.force(true);
@@ -79,8 +92,8 @@
     long _endInsert = System.currentTimeMillis();
     names.close();
 
-    indexFile = new RandomAccessFile("tmp/sp_idx", "rw").getChannel().truncate(0);
-    dataFile = new RandomAccessFile("tmp/sp_dat", "rw").getChannel().truncate(0);
+    indexFile = new FileInputStream("tmp/sp_idx").getChannel();
+    dataFile = new FileInputStream("tmp/sp_dat").getChannel();
     indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
     ArrayList<CompBlockTrie> readTries = new ArrayList<CompBlockTrie>();
 
@@ -92,24 +105,32 @@
       readTries.add(ctrie);
     }
 
+    long _endRead = System.currentTimeMillis();
+    
     System.out.println("Checking lines from " + file);
     long _startLookup = System.currentTimeMillis();
 
     for (byte[] key : namesMap.keySet()) {
       boolean found = false;
       for (CompBlockTrie t : tries) {
-        if (t.lookup(key) == namesMap.get(key)) {
-          found = true;
-        }
+        try {
+          if (t.lookup(key) == namesMap.get(key)) {
+            found = true;
+            break;
+          }
+        } catch (CompMemTrie.NotFound nf) {}
       }
       if (!found) {
         throw new IllegalStateException("Trie doesn't match Map");
       }
       found = false;
       for (CompBlockTrie t : readTries) {
-        if (t.lookup(key) == namesMap.get(key)) {
-          found = true;
-        }
+        try {
+          if (t.lookup(key) == namesMap.get(key)) {
+            found = true;
+            break;
+          }
+        } catch (CompMemTrie.NotFound nf) {}
       }
       if (!found) {
         throw new IllegalStateException("Read-Trie doesn't match Map");
@@ -119,7 +140,9 @@
     long _endLookup = System.currentTimeMillis();
     
     System.out.println("Test Succeeded with " + file +
-        " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+        " insert:" + (_endInsert - _startInsert) + 
+        " read:" + (_endRead - _startRead) + 
+        " lookup:" + (_endLookup - _startLookup));
   }
 
   public static class Bytes {

Modified: projects/xa2/object-pool/src/CompMemTrie.java
===================================================================
--- projects/xa2/object-pool/src/CompMemTrie.java	2008-04-24 06:58:17 UTC (rev 843)
+++ projects/xa2/object-pool/src/CompMemTrie.java	2008-04-24 08:48:52 UTC (rev 844)
@@ -18,12 +18,14 @@
 public class CompMemTrie {
   @SuppressWarnings("serial")
   public static class NotFound extends Exception {};
+  @SuppressWarnings("serial")
   public static class InsufficientSpace extends Exception
       { public InsufficientSpace(String s) { super(s); } };
 
   static final byte TYPE_BRANCH_TERM = 0x01;   // Indicates a trie branch with a termination entry..
   static final byte TYPE_BRANCH_NOTERM = 0x02; // Indicates a trie branch without a termination entry..
   static final byte TYPE_LEAF = 0x03; // Indicates a trie branch without a termination entry..
+  static final byte TYPE_ROOT_LOC = 0x04; // Indicates the following short indicates a branch.
   public final static int MAGIC = 0x00020001;
   public final static int INVERT_MAGIC = 0x01000200;
 
@@ -57,7 +59,7 @@
     if (root == null) {
       throw new IllegalStateException("Attempt to write empty trie");
     }
-    int indexreq = root.totalIndexSize() + 4 + 2;  // + sizeof(MAGIC) + sizeof(ptr_to_root)
+    int indexreq = root.totalIndexSize() + 4 + 4;  // + sizeof(MAGIC) + sizeof(root_loc_type + root_loc)
     if (indexreq > index.remaining()) {
       throw new InsufficientSpace("Attempt to write trie index to bytebuffer with insufficient space");
     }
@@ -67,7 +69,9 @@
 
     index.putInt(MAGIC);
     root.write(index, data);
-    index.putShort(index.limit() - 2, root.location);
+    index.put(TYPE_ROOT_LOC);
+    index.put((byte)0xFF);
+    index.putShort(root.location);
   }
 
   public CompMemTrie(ByteBuffer index, FileChannel data) throws IOException {
@@ -81,8 +85,6 @@
       }
     }
 
-    short rootLocation = index.getShort(index.limit() - 2);
-
     // I should be able to replace this with a stack.
     // The child->parent relationship is implicit in the ordering of the nodes in the file.
     // The only thing we need to know is when to stop reading nodes and that is provided by rootLocation.
@@ -90,7 +92,9 @@
     // root node.
     // For now I'm hoping the HashMap will help with debugging - but it is wasteful of memory in the long run.
     HashMap<Short, TrieNode> nodeMap = new HashMap<Short, TrieNode>();
-    while (index.position() < rootLocation) {
+
+    short rootLocation = -1;
+    while (rootLocation == -1) {
       short location = (short)index.position();
       byte type = index.get();
       switch (type) {
@@ -103,6 +107,12 @@
         case TYPE_LEAF:
           nodeMap.put(location, new TrieLeaf(index, data));
           break;
+        case TYPE_ROOT_LOC:
+          index.get();
+          rootLocation = index.getShort();
+          break;
+        default:
+          throw new IllegalArgumentException("Invalid trie-node");
       }
     }
 
@@ -211,6 +221,17 @@
         term = (TrieLeaf)nodeMap.get(index.getShort());
       }
       children = new ByteMap<TrieNode>(index, nodeMap);
+      if (term == null) {
+        // This two-step process is required to avoid the cast from Object[] to TrieNode[] inserted by java
+        // generics which triggers the obvious ClassCastException.  By extracting explicitly to an Object[]
+        // first; then indexing to obtain an Object; then casting the Object to TrieNode the compiler doesn't
+        // insert the erroneous cast.
+        Object[] d = children.data;
+        aLeaf = ((TrieNode)(d[0])).aLeaf;
+      } else {
+        aLeaf = term;
+      }
+//      aLeaf = (term == null) ? ((TrieNode)(children.data[0])).aLeaf : term;
     }
 
     protected void write(ByteBuffer index, FileChannel data) throws IOException {
@@ -224,7 +245,7 @@
       location = (short)index.position();
       
       index.put((term == null) ? TYPE_BRANCH_NOTERM : TYPE_BRANCH_TERM);
-      index.put((byte)0x00);  // Padding to keep things short-aligned.
+      index.put((byte)0xFF);  // Padding to keep things short-aligned.
       index.putInt(offset);
       if (term != null) {
         index.putShort(term.location);
@@ -360,12 +381,14 @@
       index.get();
       this.keyLocation = index.getLong();
       data.position(keyLocation);
-      ByteBuffer bb = ByteBuffer.allocateDirect(14);
+      ByteBuffer bb = ByteBuffer.allocateDirect(8+4); // sizeof(value) + sizof(length).
       data.read(bb);
+      bb.flip();
       value = bb.getLong();
       int length = bb.getInt();
       this.key = new byte[length]; // FIXME: I eventually need to replace this with a lazy load.
       data.read(ByteBuffer.wrap(key));
+      this.aLeaf = this;
     }
 
     protected void insert(TrieLeaf node, int parentLcp) throws InsertAbove {
@@ -399,11 +422,11 @@
       dataBuf.putLong(value);
       dataBuf.putInt(key.length);
       dataBuf.put(key);
-      data.write(dataBuf);
+      data.write((ByteBuffer)dataBuf.flip());
 
       location = (short)index.position();
       index.put(TYPE_LEAF);
-      index.put((byte)0x00); // Pad to 16-bit aligned.
+      index.put((byte)0xFF); // Pad to 16-bit aligned.
       index.putLong(keyLocation);
     }
 

Modified: projects/xa2/object-pool/src/gen/OnesTable.java
===================================================================
--- projects/xa2/object-pool/src/gen/OnesTable.java	2008-04-24 06:58:17 UTC (rev 843)
+++ projects/xa2/object-pool/src/gen/OnesTable.java	2008-04-24 08:48:52 UTC (rev 844)
@@ -5,7 +5,6 @@
  */
 package gen;
 
-import java.io.File;
 import java.io.FileInputStream;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;




More information about the Mulgara-svn mailing list