[Mulgara-svn] r769 - projects/xa2/object-pool/src
andrae at mulgara.org
andrae at mulgara.org
Fri Apr 11 02:58:31 UTC 2008
Author: andrae
Date: 2008-04-10 19:58:30 -0700 (Thu, 10 Apr 2008)
New Revision: 769
Added:
projects/xa2/object-pool/src/MemoryTrie.java
Log:
Moved Trie code into src dir.
Copied: projects/xa2/object-pool/src/MemoryTrie.java (from rev 754, projects/xa2/object-pool/scratch/TrieTest.java)
===================================================================
--- projects/xa2/object-pool/src/MemoryTrie.java (rev 0)
+++ projects/xa2/object-pool/src/MemoryTrie.java 2008-04-11 02:58:30 UTC (rev 769)
@@ -0,0 +1,267 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 9th April 2008
+ */
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+
+/**
+ * Simple test class to help crystalise my thinking on tries.
+ */
+public class TrieTest {
+ @SuppressWarnings("serial")
+ public static class NotFound extends Exception {};
+
+ public static class Trie {
+ TrieBranch root;
+
+ public Trie() {
+ this.root = null;
+ }
+
+ public void insert(String key, long value) {
+ if (root == null) {
+ root = new TrieBranch(key, value);
+ } else {
+ try {
+ root.insert(key, value);
+ } catch (TrieNode.InsertAbove k) {
+ root = new TrieBranch(root, key, value);
+ }
+ }
+ }
+
+ public long lookup(String key) throws NotFound {
+ if (root == null) {
+ throw new NotFound();
+ }
+
+ return root.lookup(key);
+ }
+ }
+
+ public abstract static class TrieNode {
+ @SuppressWarnings("serial")
+ public static class InsertAbove extends Exception {} ;
+ protected static final InsertAbove cont = new InsertAbove();
+
+ protected TrieLeaf least;
+
+ protected abstract void insert(TrieLeaf node, int parentLcd) throws InsertAbove;
+ protected abstract long lookup(byte[] key, int parentLcd) throws NotFound;
+
+ public static boolean regionMatches(byte[] lhs, int lhsOff, byte[] rhs, int rhsOff, int len) {
+ if (lhsOff < 0 || rhsOff < 0) {
+ return false;
+ }
+ if (lhs.length < lhsOff + len || rhs.length < rhsOff + len) {
+ return false;
+ }
+ for (int i = 0; i < len; i++) {
+ if (lhs[lhsOff + i] != rhs[rhsOff + i]) return false;
+ }
+
+ return true;
+ }
+ }
+
+ public static class TrieBranch extends TrieNode {
+ int offset;
+ Map<Byte, TrieNode> children;
+ TrieLeaf term;
+
+ public TrieBranch(String str, long value) {
+ super();
+ this.children = new HashMap<Byte, TrieNode>();
+ this.term = new TrieLeaf(str, value);
+ this.offset = term.key.length;
+ this.least = this.term;
+ }
+
+ public TrieBranch(TrieBranch oldRoot, String key, long value) {
+ this(oldRoot, new TrieLeaf(key, value));
+ }
+
+ protected TrieBranch(TrieNode oldNode, TrieLeaf newNode) {
+ super();
+ assert oldNode != null;
+ assert newNode != null;
+
+ offset = 0;
+ children = new HashMap<Byte, TrieNode>();
+ term = null;
+ least = null;
+
+ byte[] lhs = oldNode.least.key;
+ byte[] rhs = newNode.key;
+
+ int i = 0;
+ while (i < lhs.length && i < rhs.length) {
+ if (lhs[i] != rhs[i]) {
+ offset = i;
+ children.put(lhs[i], oldNode);
+ children.put(rhs[i], newNode);
+
+ least = lhs[i] < rhs[i] ? oldNode.least : newNode;
+
+ return; // Escape here.
+ }
+ i++;
+ }
+
+ if (i < lhs.length) {
+ offset = i;
+ children.put(lhs[i], oldNode);
+ term = newNode;
+ least = newNode;
+ } else if (i < rhs.length) {
+ if (oldNode instanceof TrieLeaf) {
+ offset = i;
+ children.put(rhs[i], newNode);
+ term = (TrieLeaf)oldNode;
+ least = (TrieLeaf)oldNode;
+ } else {
+ throw new IllegalStateException("Attempt to create new branch node with leaf > branch.");
+ }
+ } else {
+ throw new IllegalStateException("Attempt to create new branch node with equal children");
+ }
+ }
+
+ public void insert(String str, long value) throws InsertAbove {
+ insert(new TrieLeaf(str, value), 0);
+ }
+
+ public long lookup(String str) throws NotFound {
+ return lookup(str.getBytes(), 0);
+ }
+
+ protected void insert(TrieLeaf node, int parentLcp) throws InsertAbove {
+ if (!regionMatches(least.key, parentLcp, node.key, parentLcp, offset - parentLcp)) {
+ throw cont;
+ } else {
+ // new node matches the lcp of this node.
+ if (node.key.length == offset) {
+ // new node is expected to terminate here.
+ if (term == null) {
+ term = node;
+ least = node;
+ } else {
+ term.insert(node, offset);
+ }
+ } else {
+ // new node is expected to terminate in one of this nodes children.
+ TrieNode child = children.get(node.key[offset]);
+ if (child == null) {
+ // this is the first node to be inserted on this branching key.
+ children.put(node.key[offset], node);
+ } else {
+ try {
+ // there is an existing child node branching on this branching key.
+ child.insert(node, offset);
+ } catch (InsertAbove k) {
+ // as every child node shares a common lcp, any child will suffice when preparing the new
+ // lcp/offset of the new node. The least node is only required as the new parent's least node
+ // will be the smallest of the inserted node and this node's least node.
+ children.put(node.key[offset], new TrieBranch(child, node));
+ }
+ }
+ }
+ }
+ }
+
+ protected long lookup(byte[] key, int parentLcd) throws NotFound {
+ if (!regionMatches(least.key, parentLcd, key, parentLcd, offset - parentLcd)) {
+ throw new NotFound();
+ } else {
+ // new node matches the lcp of this node.
+ TrieNode child;
+ if (key.length == offset) {
+ // new node is expected to terminate here.
+ if (term != null) {
+ return term.value;
+ } else {
+ throw new NotFound();
+ }
+ } else {
+ // new node is expected to terminate in one of this nodes children.
+ child = children.get(key[offset]);
+ if (child != null) {
+ return child.lookup(key, offset);
+ } else {
+ throw new NotFound();
+ }
+ }
+ }
+ }
+ }
+
+ public static class TrieLeaf extends TrieNode {
+ final byte[] key;
+ final long value;
+
+ TrieLeaf(String key, long value) {
+ super();
+ this.key = key.getBytes();
+ this.value = value;
+ this.least = this;
+ }
+
+ protected void insert(TrieLeaf node, int parentLcp) throws InsertAbove {
+ if (key.length != node.key.length) {
+ throw cont;
+ } else if (!regionMatches(key, parentLcp, node.key, parentLcp, key.length - parentLcp)) {
+ throw cont;
+ } else if (value == node.value) {
+ return; // Duplicate key/value pair.
+ } else {
+ throw new IllegalArgumentException("Attempt to insert multiple values for same key");
+ }
+ }
+
+ protected long lookup(byte[] key, int parentLcd) {
+ assert Arrays.equals(this.key, key);
+ return value;
+ }
+ }
+
+
+ public static void main(String[] args) throws Exception {
+ testWithFile(new File("./propernames.uniq"));
+ testWithFile(new File("./connectives.uniq"));
+ testWithFile(new File("./web2a.uniq"));
+ testWithFile(new File("./web2.uniq"));
+ }
+
+ public static void testWithFile(File file) throws Exception {
+ Map<String, Long> namesMap = new HashMap<String, Long>();
+ Trie namesTrie = new Trie();
+
+ System.out.println("Inserting lines from " + file);
+ BufferedReader names = new BufferedReader(new FileReader(file));
+ long n = 0;
+ String name = names.readLine();
+ while (name != null) {
+ namesMap.put(name, n);
+ namesTrie.insert(name, n);
+ name = names.readLine();
+ n++;
+ }
+ names.close();
+
+ System.out.println("Checking lines from " + file);
+ for (String key : namesMap.keySet()) {
+ if (namesTrie.lookup(key) != namesMap.get(key)) {
+ throw new IllegalStateException("Trie doesn't match Map");
+ }
+ }
+
+ System.out.println("Test Succeeded with " + file);
+ }
+}
More information about the Mulgara-svn
mailing list