[Mulgara-svn] r769 - projects/xa2/object-pool/src

andrae at mulgara.org andrae at mulgara.org
Fri Apr 11 02:58:31 UTC 2008


Author: andrae
Date: 2008-04-10 19:58:30 -0700 (Thu, 10 Apr 2008)
New Revision: 769

Added:
   projects/xa2/object-pool/src/MemoryTrie.java
Log:
Moved Trie code into src dir.



Copied: projects/xa2/object-pool/src/MemoryTrie.java (from rev 754, projects/xa2/object-pool/scratch/TrieTest.java)
===================================================================
--- projects/xa2/object-pool/src/MemoryTrie.java	                        (rev 0)
+++ projects/xa2/object-pool/src/MemoryTrie.java	2008-04-11 02:58:30 UTC (rev 769)
@@ -0,0 +1,267 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 9th April 2008
+ */
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+
+/**
+ * Simple test class to help crystalise my thinking on tries.
+ */
+public class TrieTest {
+  @SuppressWarnings("serial")
+  public static class NotFound extends Exception {};
+
+  public static class Trie {
+    TrieBranch root;
+
+    public Trie() {
+      this.root = null;
+    }
+
+    public void insert(String key, long value) {
+      if (root == null) {
+        root = new TrieBranch(key, value);
+      } else {
+        try {
+          root.insert(key, value);
+        } catch (TrieNode.InsertAbove k) {
+          root = new TrieBranch(root, key, value);
+        }
+      }
+    }
+
+    public long lookup(String key) throws NotFound {
+      if (root == null) {
+        throw new NotFound();
+      }
+
+      return root.lookup(key);
+    }
+  }
+
+  public abstract static class TrieNode {
+    @SuppressWarnings("serial")
+	public static class InsertAbove extends Exception {} ;
+    protected static final InsertAbove cont = new InsertAbove();
+
+    protected TrieLeaf least;
+
+    protected abstract void insert(TrieLeaf node, int parentLcd) throws InsertAbove;
+    protected abstract long lookup(byte[] key, int parentLcd) throws NotFound;
+
+    public static boolean regionMatches(byte[] lhs, int lhsOff, byte[] rhs, int rhsOff, int len) {
+      if (lhsOff < 0 || rhsOff < 0) {
+        return false;
+      }
+      if (lhs.length < lhsOff + len || rhs.length < rhsOff + len) {
+        return false;
+      }
+      for (int i = 0; i < len; i++) {
+        if (lhs[lhsOff + i] != rhs[rhsOff + i]) return false;
+      }
+
+      return true;
+    }
+  }
+
+  public static class TrieBranch extends TrieNode {
+    int offset;
+    Map<Byte, TrieNode> children;
+    TrieLeaf term;
+
+    public TrieBranch(String str, long value) {
+      super();
+      this.children = new HashMap<Byte, TrieNode>();
+      this.term = new TrieLeaf(str, value);
+      this.offset = term.key.length;
+      this.least = this.term;
+    }
+
+    public TrieBranch(TrieBranch oldRoot, String key, long value) {
+      this(oldRoot, new TrieLeaf(key, value));
+    }
+
+    protected TrieBranch(TrieNode oldNode, TrieLeaf newNode) {
+      super();
+      assert oldNode != null;
+      assert newNode != null;
+
+      offset = 0;
+      children = new HashMap<Byte, TrieNode>();
+      term = null;
+      least = null;
+
+      byte[] lhs = oldNode.least.key;
+      byte[] rhs = newNode.key;
+
+      int i = 0;
+      while (i < lhs.length && i < rhs.length) {
+        if (lhs[i] != rhs[i]) {
+          offset = i;
+          children.put(lhs[i], oldNode);
+          children.put(rhs[i], newNode);
+
+          least = lhs[i] < rhs[i] ? oldNode.least : newNode;
+
+          return; // Escape here.
+        }
+        i++;
+      }
+
+      if (i < lhs.length) {
+        offset = i;
+        children.put(lhs[i], oldNode);
+        term = newNode;
+        least = newNode;
+      } else if (i < rhs.length) {
+        if (oldNode instanceof TrieLeaf) {
+          offset = i;
+          children.put(rhs[i], newNode);
+          term = (TrieLeaf)oldNode;
+          least = (TrieLeaf)oldNode;
+        } else {
+          throw new IllegalStateException("Attempt to create new branch node with leaf > branch.");
+        }
+      } else {
+        throw new IllegalStateException("Attempt to create new branch node with equal children");
+      }
+    }
+
+    public void insert(String str, long value) throws InsertAbove {
+      insert(new TrieLeaf(str, value), 0);
+    }
+
+    public long lookup(String str) throws NotFound {
+      return lookup(str.getBytes(), 0);
+    }
+
+    protected void insert(TrieLeaf node, int parentLcp) throws InsertAbove {
+      if (!regionMatches(least.key, parentLcp, node.key, parentLcp, offset - parentLcp)) {
+        throw cont;
+      } else {
+        // new node matches the lcp of this node.
+        if (node.key.length == offset) {
+          // new node is expected to terminate here.
+          if (term == null) {
+            term = node;
+            least = node;
+          } else {
+            term.insert(node, offset);
+          }
+        } else {
+          // new node is expected to terminate in one of this nodes children.
+          TrieNode child = children.get(node.key[offset]);
+          if (child == null) {
+            // this is the first node to be inserted on this branching key.
+            children.put(node.key[offset], node);
+          } else {
+            try {
+              // there is an existing child node branching on this branching key.
+              child.insert(node, offset);
+            } catch (InsertAbove k) {
+                // as every child node shares a common lcp, any child will suffice when preparing the new
+                // lcp/offset of the new node.  The least node is only required as the new parent's least node
+                // will be the smallest of the inserted node and this node's least node.
+                children.put(node.key[offset], new TrieBranch(child, node));
+            }
+          }
+        }
+      }
+    }
+
+    protected long lookup(byte[] key, int parentLcd) throws NotFound {
+      if (!regionMatches(least.key, parentLcd, key, parentLcd, offset - parentLcd)) {
+        throw new NotFound();
+      } else {
+        // new node matches the lcp of this node.
+        TrieNode child;
+        if (key.length == offset) {
+          // new node is expected to terminate here.
+          if (term != null) {
+            return term.value;
+          } else {
+            throw new NotFound();
+          }
+        } else {
+          // new node is expected to terminate in one of this nodes children.
+          child = children.get(key[offset]);
+          if (child != null) {
+            return child.lookup(key, offset);
+          } else {
+            throw new NotFound();
+          }
+        }
+      }
+    }
+  }
+
+  public static class TrieLeaf extends TrieNode {
+    final byte[] key;
+    final long value;
+
+    TrieLeaf(String key, long value) {
+      super();
+      this.key = key.getBytes();
+      this.value = value;
+      this.least = this;
+    }
+
+    protected void insert(TrieLeaf node, int parentLcp) throws InsertAbove {
+      if (key.length != node.key.length) {
+        throw cont;
+      } else if (!regionMatches(key, parentLcp, node.key, parentLcp, key.length - parentLcp)) {
+        throw cont;
+      } else if (value == node.value) {
+        return; // Duplicate key/value pair.
+      } else {
+        throw new IllegalArgumentException("Attempt to insert multiple values for same key");
+      }
+    }
+
+    protected long lookup(byte[] key, int parentLcd) {
+      assert Arrays.equals(this.key, key);
+      return value;
+    }
+  }
+
+
+  public static void main(String[] args) throws Exception {
+    testWithFile(new File("./propernames.uniq"));
+    testWithFile(new File("./connectives.uniq"));
+    testWithFile(new File("./web2a.uniq"));
+    testWithFile(new File("./web2.uniq"));
+  }
+
+  public static void testWithFile(File file) throws Exception {
+    Map<String, Long> namesMap = new HashMap<String, Long>();
+    Trie namesTrie = new Trie();
+
+    System.out.println("Inserting lines from " + file);
+    BufferedReader names = new BufferedReader(new FileReader(file));
+    long n = 0;
+    String name = names.readLine();
+    while (name != null) {
+      namesMap.put(name, n);
+      namesTrie.insert(name, n);
+      name = names.readLine();
+      n++;
+    }
+    names.close();
+
+    System.out.println("Checking lines from " + file);
+    for (String key : namesMap.keySet()) {
+      if (namesTrie.lookup(key) != namesMap.get(key)) {
+        throw new IllegalStateException("Trie doesn't match Map");
+      }
+    }
+    
+    System.out.println("Test Succeeded with " + file);
+  }
+}




More information about the Mulgara-svn mailing list