[Mulgara-svn] r914 - in projects/xa2/object-pool/src: data scheduler trie
andrae at mulgara.org
andrae at mulgara.org
Mon May 12 06:45:18 UTC 2008
Author: andrae
Date: 2008-05-11 23:45:18 -0700 (Sun, 11 May 2008)
New Revision: 914
Added:
projects/xa2/object-pool/src/scheduler/CachingScheduler.java.bak
projects/xa2/object-pool/src/scheduler/FileHandleCMap.java
projects/xa2/object-pool/src/trie/MemoryTrieTest.java
Modified:
projects/xa2/object-pool/src/data/DataEntry.java
projects/xa2/object-pool/src/data/DataFile.java
projects/xa2/object-pool/src/scheduler/FileHandle.java
projects/xa2/object-pool/src/scheduler/FileHandleFactory.java
projects/xa2/object-pool/src/scheduler/IOScheduler.java
projects/xa2/object-pool/src/trie/ByteMap.java
projects/xa2/object-pool/src/trie/DiskTrie.java
projects/xa2/object-pool/src/trie/MemoryTrie.java
projects/xa2/object-pool/src/trie/OnesTable.java
Log:
Several changes.
1. Migrated into multi-package structure.
2. Started basic IOScheduler---this will need to eventually become a much less basic scheduler.
3. Abstracted data-file format into separate class.
4. Migrated Trie structures to use new data-entry class.
5. Wrote tests for MemoryTrie based on CompMemTrie.
6. Deprecated Comp*Trie classes.
7. Added FileHandle class. Major focus of this will be tracking IO caching failures and logical seeks.
Modified: projects/xa2/object-pool/src/data/DataEntry.java
===================================================================
--- projects/xa2/object-pool/src/data/DataEntry.java 2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/data/DataEntry.java 2008-05-12 06:45:18 UTC (rev 914)
@@ -3,110 +3,189 @@
* Author Andrae Muys
* Date 6th May 2008
*/
+package data;
import java.io.IOException;
+import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.util.HashMap;
import java.util.Map;
+//import scheduler.Block;
+//import scheduler.FileHandle;
+
/**
* Abstracts the concept of a data file entry.
*/
public abstract class DataEntry {
+ protected static final int HEADER_LENGTH = 12;
+
protected long value;
- public abstract int size();
+ public abstract int dataSize();
- public abstract int totalSize() {
- return 8 + 4 + size(); // VALUE + LENGTH + DATA
- }
-
public abstract void write(FileChannel chan) throws IOException;
public abstract DataEntry canonicalize();
- public abstract void rewind();
- public abstract byte get(int position) throws IOException;
- public abstract byte get() throws IOException;
+ public abstract DataEntry rewind();
+ public abstract ByteBuffer slice(int offset, int length);
+ public abstract byte get(int position) throws BufferUnderflowException;
+ public abstract byte get() throws BufferUnderflowException;
+ public int totalSize() {
+ return HEADER_LENGTH + dataSize(); // VALUE + LENGTH + DATA
+ }
+
public long getValue() {
return value;
}
+ public static DataEntry getEntry(byte[] data, long value) {
+ return new MemoryBackedDataEntry(value, data, false);
+ }
+/*
protected static class DiskBackedDataEntry extends DataEntry {
private FileHandle handle;
private long position;
- private ByteBuffer entry;
private int length;
// FIXME: Need to replace this with a background populated ConcurrentLinkedQueue.
private Block[] blocks;
- private Block currBlock;
+ private ByteBuffer currBuffer;
+ private int currBlock;
+ private int currPosition;
-
DiskBackedDataEntry(FileHandle handle, long position) {
this.handle = handle;
this.position = position;
- this.curr = 0;
- this.currBlock = handle.readBlock(position);
- this.entry = currBlock.offset(position & (BLOCK_SIZE - 1)); // block size is a power of 2 therefore mask is size - 1.
+ Block block = handle.readBlock(position);
+ this.currBlock = 0;
+ this.currBuffer = currBlock.offset(position & (BLOCK_SIZE - 1)); // block size is a power of 2 therefore mask is size - 1.
+ this.currPosition = 0;
- this.value = entry.getLong();
- this.length = entry.getInt();
+ this.value = currBuffer.getLong();
+ this.length = currBuffer.getInt();
- if (length <= entry.remaining()) {
- entry.limit(length);
- blocks[] = new Block[] { block };
- } else {
- entry.limit(entry.capacity());
- int remaining = length - entry.remaining();
- int totalBlocks = remaining / BLOCK_SIZE + (remaining % BLOCK_SIZE > 0 ? 1 : 0);
- blocks[] = new Block[remainingBlocks];
- blocks[0] = block;
+ currBuffer.mark();
+ // Set the buffer to the entire block for length calculations.
+ currBuffer.limit(currBuffer.capacity());
+ int remaining = length - currBuffer.remaining();
+ int totalBlocks = remaining / BLOCK_SIZE + (remaining % BLOCK_SIZE > 0 ? 1 : 0);
+ blocks = new Block[totalBlocks];
+ blocks[0] = block;
+ if (totalBlocks == 1) {
+ // [mark,length] establishes the data covered by this entry.
+ currBuffer.limit(length);
}
+ // FIXME: We eventually want to submit the remaining blocks to the scheduler for speculative background
+ // fetch.
}
+ /**
+ * Convert 1 block entries into memory-backed entries.
+ *
+ * This makes the block's memory recoverable should we require it, and as the object-pool is subject to a
+ * large number of stabbing-queries this is a good thing. Should we scan the pool, the block is likely to
+ * still be cached so we shouldn't need to reload it; the only cost of canonicalizing in this case is an
+ * unnecessary copy. However releasing a 32K-2M block in preference to a 20-30byte array is preferable if
+ * we are doing a stabbing query.
+ * Especially as a DiskBackedDataEntry requires 48-bytes anyway vs. 12 for a MemoryBackedDataEntry.
+ * /
public DataEntry canonicalize() {
if (blocks.length == 1) {
- byte[] data = new byte[entry.reset().remaining()];
- entry.get(data);
+ byte[] data = new byte[currBuffer.reset().remaining()];
+ currBuffer.get(data);
return new MemoryBackedDataEntry(value, data);
} else {
return this;
}
}
- public int size() {
+ public int dataSize() {
return length;
}
- public void rewrind() {
- curr = 0;
+ public DataEntry rewind() {
+ if (currBlock == 0) {
+ currBuffer.rewind();
+ } else {
+ currBlock = 0;
+ // block size is a power of 2 therefore mask is size - 1.
+ currBuffer = blocks[0].offset((position & (BLOCK_SIZE - 1)) + HEADER_LENGTH);
+ if (totalBlocks == 1) {
+ // [mark,length] establishes the data covered by this entry.
+ // Note: This should never occur as we expect DataEntries to be canonicalized, which would result in
+ // a 1 block entry being replaced by a MemoryBackedDataEntry.
+ currBuffer.limit(length);
+ } else {
+ currBuffer.limit(currBuffer.capacity());
+ }
+ }
+
+ return this;
}
public byte get() throws IOException {
- byte b = get(curr);
- curr++;
- return b;
+ if (currBuffer.hasRemaining()) {
+ currPosition++;
+ return currBuffer.get();
+ } else {
+ return get(currPosition);
+ }
}
public byte get(int off) {
- int bN = (off + 12) / BLOCK_SIZE;
- if (blocks[bN] == null) {
- blocks[bN] = handle.readBlock(off + 12);
+ if (off < 0) {
+ throw new BufferUnderflowException("Attempt to use -ve offset");
}
+ if (off >= length) {
+ throw new BufferOverflowException("Attempt to use offset > length");
+ }
-
+ currBlock = (off + HEADER_LENGTH) / BLOCK_SIZE;
+ if (blocks[currBlock] == null) {
+ blocks[currBlock] = handle.readBlock(position + currBlock * BLOCKSIZE);
+ }
+ currBuffer = blocks[currBlock].offset(position & (BLOCK_SIZE - 1)); // block size is a power of 2 therefore mask is size - 1.
+
+ // Allow for header.
+ if (currBlock == 0) {
+ currBuffer.position(HEADER_LENGTH);
+ }
+ // Allow for partial final block.
+ if (currBlock == blocks.length - 1) {
+ currBuffer.limit(length % BLOCK_SIZE + (position & (BLOCKSIZE - 1)));
+ }
+
+ currPosition = off;
+
+ return get();
}
public void write(FileChannel chan) throws IOException {
- handle.transferTo(chan, position, 12+length);
+ handle.transferTo(chan, position, HEADER_LENGTH + length);
}
+
+ /**
+ * FIXME: We really need to return our own ByteBuffer implementation here that exploits lazy loading.
+ *
+ * Note this is a really really inefficient implementation.
+ * /
+ public ByteBuffer slice(int offset, int length) throws IOException {
+ ByteBuffer buffer = ByteBuffer.allocate(length);
+ buffer.put(get(offset));
+ for (int i = 1; i < length; i++) {
+ buffer.put(get());
+ }
+
+ return buffer;
+ }
}
-
+*/
protected static class MemoryBackedDataEntry extends DataEntry {
private ByteBuffer data;
@@ -125,7 +204,22 @@
}
}
- public int size() {
+ public byte get() throws BufferUnderflowException {
+ return data.get();
+ }
+
+ public byte get(int offset) throws BufferUnderflowException {
+ data.position(offset);
+ return data.get(offset);
+ }
+
+ public DataEntry rewind() {
+ data.rewind();
+
+ return this;
+ }
+
+ public int dataSize() {
return data.capacity();
}
@@ -134,12 +228,39 @@
}
public void write(FileChannel chan) throws IOException {
- ByteBuffer bb = ByteBuffer.allocateDirect(12 + data.length);
+ ByteBuffer bb = ByteBuffer.allocate(HEADER_LENGTH);
bb.clear();
bb.putLong(value);
- bb.putInt(data.length);
- bb.put(data);
- chan.write((ByteBuffer)bb.flip());
+ bb.putInt(data.capacity());
+ chan.write(new ByteBuffer[] { (ByteBuffer)bb.flip(), (ByteBuffer)bb.duplicate().clear() });
}
+
+ public ByteBuffer slice(int offset, int length) {
+ ByteBuffer bb = data.duplicate();
+ bb.position(offset);
+ bb = bb.slice();
+ bb.limit(length);
+
+ return bb;
+ }
+
+ public boolean equals(Object rhs) {
+ if (rhs == this) {
+ return true;
+ }
+ if (rhs.getClass().equals(getClass())) {
+ MemoryBackedDataEntry mbde = (MemoryBackedDataEntry)rhs;
+ return value == mbde.value && data.duplicate().clear().equals(mbde.data.duplicate().clear());
+ } else {
+ // FIXME: Do I need to consider comparing with DiskBackedDataEntry here?
+ return false;
+ }
+ }
+
+ public int hashCode() {
+ // Note: If I do consider DBDE's equal to MBDE keeping it hashcode compatible will be all but
+ // impossible. I doubt I want to do the linear hashCode computation required for a DBDE.
+ return Long.valueOf(value).hashCode() ^ data.duplicate().clear().hashCode();
+ }
}
}
Modified: projects/xa2/object-pool/src/data/DataFile.java
===================================================================
--- projects/xa2/object-pool/src/data/DataFile.java 2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/data/DataFile.java 2008-05-12 06:45:18 UTC (rev 914)
@@ -3,6 +3,7 @@
* Author Andrae Muys
* Date 1st May 2008
*/
+package data;
import java.io.IOException;
import java.nio.ByteBuffer;
@@ -21,11 +22,11 @@
private FileHandle handle;
- public DataFile(File file, IOScheduler scheduler) {
- this.handle = scheduler.open(file, MAGIC, LOG2_BLOCK_SIZE);
+ public DataFile(String filename, IOScheduler scheduler) {
+ this.handle = scheduler.open(filename, MAGIC, LOG2_BLOCK_SIZE);
}
public DataEntry getEntry(long position) throws IOException {
- return new DataEntry(this, position);
+ return new DiskBackedDataEntry(handle, position).canonicalize();
}
}
Copied: projects/xa2/object-pool/src/scheduler/CachingScheduler.java.bak (from rev 891, projects/xa2/object-pool/src/scheduler/IOScheduler.java)
===================================================================
--- projects/xa2/object-pool/src/scheduler/CachingScheduler.java.bak (rev 0)
+++ projects/xa2/object-pool/src/scheduler/CachingScheduler.java.bak 2008-05-12 06:45:18 UTC (rev 914)
@@ -0,0 +1,35 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 30th April 2008
+ */
+package scheduler;
+
+import java.lang.ref.SoftReference;
+import java.util.HashMap;
+import java.util.Map;
+
+public class IOScheduler {
+ private Map<FileHandle, FileCache> fileCache;
+ private File[] disks;
+ private FileHandleCMap fileMap;
+
+ public IOScheduler(File[] disks) {
+ this.disks = disks;
+ fileCache = new HashMap<FileHandle, FileCache>();
+ }
+
+ public FileHandle newFile(File file) {
+ if (file.exists()) {
+ throw new IllegalArgumentException("File exists: " + file);
+ }
+ fileMap.put(file, new FileHandle(file));
+ }
+
+ public FileHandle openFile(File file) {
+ if (!file.exists()) {
+ throw new IllegalArgumentException("File does not exist: " + file);
+ }
+ fileMap.put(file, new FileHandle(file));
+ }
+}
Modified: projects/xa2/object-pool/src/scheduler/FileHandle.java
===================================================================
--- projects/xa2/object-pool/src/scheduler/FileHandle.java 2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/scheduler/FileHandle.java 2008-05-12 06:45:18 UTC (rev 914)
@@ -6,34 +6,71 @@
package scheduler;
import java.io.File;
+import java.nio.channels.FileChannel;
public class FileHandle {
private File file;
private FileChannel channel;
+ private int blocksize;
private long seeks;
- FileHandle(File file) {
+ FileHandle(File file, FileChannel channel, int magic, int blocksize) {
this.file = file;
+ this.channel = channel;
+ this.blocksize = blocksize;
this.seeks = 0;
+ }
- this.channel = file.exists() ?
- new FileInputStream(file).getChannel() :
- new FileOutputStream(file).getChannel();
+ FileHandle(File file, FileInputStream stream, int magic, int blocksize) {
+ this(file, stream.getChannel(), magic, blocksize);
+
+ ByteBuffer bb = ByteBuffer.allocate(blocksize);
+ bb.putInt(magic);
+ bb.putInt(blocksize);
+ bb.position(0);
+ bb.limit(bb.capacity());
+
+ channel.write(bb);
}
+ FileHandle(File file, FileOutputStream stream, int magic, int blocksize) {
+ this(file, stream.getChannel(), magic, blocksize);
+
+ ByteBuffer bb = ByteBuffer.allocate(blocksize);
+ this.channel.read(bb);
+
+ bb.clear();
+ int filemagic = bb.getInt();
+ if (filemagic != magic) {
+ bb.order(bb.order().equals(ByteOrder.BIG_ENDIAN) ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
+ bb.clear();
+ filemagic = bb.getInt();
+ if (filemagic == magic) {
+ throw new IllegalArgumentException("Attempt to read file using incorrect endian format");
+ } else {
+ throw new IllegalArgumentException("Bad magic in index buffer: " + filemagic + ", MAGIC=" + magic);
+ }
+ }
+
+ int filebs = bb.getInt();
+ if (filebs != blocksize) {
+ throw new IllegalArgumentException("Attempt to read file(" + file + ") using incorrect blocksize: " + blocksize);
+ }
+ }
+
public File getFile() {
return file;
}
- Block readBlock(Long blockId, Block block) {
+ Block readBlock(Long blockId) {
long position = blockId.longValue();
if (channel.position() != position) {
seeks++;
channel.position(position);
}
- channel.read(block.prepare(blockId));
+ channel.read(new Block().block.prepare(blockId));
return block;
}
@@ -42,5 +79,7 @@
Long position = new Long(channel.position());
channel.write(block.prepare(blockId));
+
+ return position;
}
}
Copied: projects/xa2/object-pool/src/scheduler/FileHandleCMap.java (from rev 891, projects/xa2/object-pool/src/scheduler/FileHandleFactory.java)
===================================================================
--- projects/xa2/object-pool/src/scheduler/FileHandleCMap.java (rev 0)
+++ projects/xa2/object-pool/src/scheduler/FileHandleCMap.java 2008-05-12 06:45:18 UTC (rev 914)
@@ -0,0 +1,30 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 30th April 2008
+ */
+package scheduler;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A canonical mapping of File objects to FileHandles.
+ */
+public class FileHandleCMap {
+ private Map<File, FileHandle> handles;
+
+ public FileHandleCMap() {
+ this.handles = new HashMap<File, FileHandle>();
+ }
+
+ public FileHandle getHandle(File file) {
+ FileHandle result = handles.get(file);
+ if (result == null) {
+ result = new FileHandle(file, this);
+ handles.put(result);
+ }
+ return result;
+ }
+}
Modified: projects/xa2/object-pool/src/scheduler/FileHandleFactory.java
===================================================================
--- projects/xa2/object-pool/src/scheduler/FileHandleFactory.java 2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/scheduler/FileHandleFactory.java 2008-05-12 06:45:18 UTC (rev 914)
@@ -6,25 +6,34 @@
package scheduler;
import java.io.File;
-import java.util.HashMap;
-import java.util.Map;
/**
* A canonical mapping of File objects to FileHandles.
*/
-public class FileHandleCMap {
- private Map<File, FileHandle> handles;
+public class FileHandleFactory {
+ public FileHandleFactory() {}
- public FileHandleCMap() {
- this.handles = new HashMap<File, FileHandle>();
+ public FileHandle createNew(File file, int magic, int blocksize) {
+ if (file.exists()) {
+ throw new IllegalArgumentException("File exists: " + file);
+ }
+
+ return new FileHandle(new FileInputStream(file), magic, blocksize);
}
- public FileHandle getHandle(File file) {
- FileHandle result = handles.get(file);
- if (result == null) {
- result = new FileHandle(file, this);
- handles.put(result);
+ public FileHandle openExisting(File file, int magic, int blocksize) {
+ if (!file.exists()) {
+ throw new IllegalArgumentException("File does not exist: " + file);
}
- return result;
+
+ return new FileHandle(new FileOutputStream(file), magic, blocksize);
}
+
+ public FileHandle open(File file, int magic, int blocksize) {
+ if (file.exists()) {
+ return openExisting(file, magic, blocksize);
+ } else {
+ return openNew(file, magic, blocksize);
+ }
+ }
}
Modified: projects/xa2/object-pool/src/scheduler/IOScheduler.java
===================================================================
--- projects/xa2/object-pool/src/scheduler/IOScheduler.java 2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/scheduler/IOScheduler.java 2008-05-12 06:45:18 UTC (rev 914)
@@ -5,31 +5,28 @@
*/
package scheduler;
-import java.lang.ref.SoftReference;
-import java.util.HashMap;
-import java.util.Map;
+import java.io.File;
public class IOScheduler {
- private Map<FileHandle, FileCache> fileCache;
- private File[] disks;
- private FileHandleCMap fileMap;
+ private FileHandleFactory fileHandleFactory;
+ private File dir;
- public IOScheduler(File[] disks) {
- this.disks = disks;
- fileCache = new HashMap<FileHandle, FileCache>();
- }
-
- public FileHandle newFile(File file) {
- if (file.exists()) {
- throw new IllegalArgumentException("File exists: " + file);
+ public IOScheduler(File dir, FileHandleFactory fileHandleFactory) {
+ this.fileHandleFactory = fileHandleFactory;
+ this.dir = dir;
+ if (dir.exists()) {
+ if (!dir.isDirectory()) {
+ throw new IllegalArgumentException("Scheduler argument not directory: " + dir);
+ }
}
- fileMap.put(file, new FileHandle(file));
+ if (!dir.mkdirs()) {
+ throw new IllegalStateException("Failed to make directory: " + dir);
+ }
}
- public FileHandle openFile(File file) {
- if (!file.exists()) {
- throw new IllegalArgumentException("File does not exist: " + file);
- }
- fileMap.put(file, new FileHandle(file));
+ public FileHandle open(String name, int magic, int blocksize) {
+ File file = new File(dir, name);
+
+ return fileHandleFactory.open(file, int, blocksize);
}
}
Modified: projects/xa2/object-pool/src/trie/ByteMap.java
===================================================================
--- projects/xa2/object-pool/src/trie/ByteMap.java 2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/trie/ByteMap.java 2008-05-12 06:45:18 UTC (rev 914)
@@ -3,6 +3,7 @@
* Author Andrae Muys
* Date 8th April 2008
*/
+package trie;
import static trie.OnesTable.*;
Modified: projects/xa2/object-pool/src/trie/DiskTrie.java
===================================================================
--- projects/xa2/object-pool/src/trie/DiskTrie.java 2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/trie/DiskTrie.java 2008-05-12 06:45:18 UTC (rev 914)
@@ -262,7 +262,7 @@
index.get(); // skip padding.
long keyLocation = index.getLong();
- DataEntry entry = dataFile.getEntry(keyLocation).canonicalize();
+ DataEntry entry = dataFile.getEntry(keyLocation);
return new TrieLeaf(entry, false);
}
Modified: projects/xa2/object-pool/src/trie/MemoryTrie.java
===================================================================
--- projects/xa2/object-pool/src/trie/MemoryTrie.java 2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/trie/MemoryTrie.java 2008-05-12 06:45:18 UTC (rev 914)
@@ -3,19 +3,20 @@
* Author Andrae Muys
* Date 15th April 2008
*/
+package trie;
-import java.util.Arrays;
+import data.DataEntry;
/**
* Implements an in-memory trie - uses ByteMaps to implement trie nodes.
*/
-public class CompMemTrie {
+public class MemoryTrie {
@SuppressWarnings("serial")
public static class NotFound extends Exception {};
private static NotFound notfound = new NotFound();
protected TrieNode root;
- public CompMemTrie() {
+ public MemoryTrie() {
this.root = null;
}
@@ -72,7 +73,7 @@
if (lhsOff < 0 || rhsOff < 0) {
return false;
}
- if (lhs.size() < lhsOff + len || rhs.size() < rhsOff + len) {
+ if (lhs.dataSize() < lhsOff + len || rhs.dataSize() < rhsOff + len) {
return false;
}
@@ -92,12 +93,12 @@
public TrieBranch(DataEntry entry) {
this.children = new ByteMap<TrieNode>();
this.term = new TrieLeaf(entry);
- this.offset = term.entry.size();
+ this.offset = term.entry.dataSize();
this.aLeaf = this.term;
}
- public TrieBranch(TrieNode oldRoot, byte[] key, long value) {
- this(oldRoot, new TrieLeaf(key, value));
+ public TrieBranch(TrieNode oldRoot, DataEntry entry) {
+ this(oldRoot, new TrieLeaf(entry));
}
private TrieBranch(TrieNode oldNode, TrieLeaf newNode) {
@@ -116,8 +117,9 @@
DataEntry lhs = oldNode.aLeaf.entry.rewind();
DataEntry rhs = newNode.entry.rewind();
+ // FIXME: Replace this loop with a compareTo.
int i = 0;
- while (i < lhs.size() && i < rhs.size()) {
+ while (i < lhs.dataSize() && i < rhs.dataSize()) {
byte lb = lhs.get();
byte rb = rhs.get();
if (lb != rb) {
@@ -130,11 +132,11 @@
i++;
}
- if (i < lhs.size()) {
+ if (i < lhs.dataSize()) {
offset = i;
children.insert(lhs.get(), oldNode);
term = newNode;
- } else if (i < rhs.size()) {
+ } else if (i < rhs.dataSize()) {
if (oldNode instanceof TrieLeaf) {
offset = i;
children.insert(rhs.get(), newNode);
@@ -152,7 +154,7 @@
return false;
} else {
// new node matches the lcp of this node.
- if (node.entry.size() == offset) {
+ if (node.entry.dataSize() == offset) {
// new node is expected to terminate here.
if (term == null) {
term = node;
@@ -168,7 +170,7 @@
} else {
// there is an existing child node branching on this branching key.
if (!child.insert(node, offset)) {
- children.insert(node.entry(offset), new TrieBranch(child, node));
+ children.insert(node.entry.get(offset), new TrieBranch(child, node));
}
}
}
@@ -185,7 +187,7 @@
} else {
// new node matches the lcp of this node.
TrieNode child;
- if (key.size() == offset) {
+ if (key.dataSize() == offset) {
// new node is expected to terminate here.
if (term != null) {
valid[0] = true;
@@ -221,9 +223,9 @@
}
protected boolean insert(TrieLeaf node, int parentLcp) {
- if (entry.size() != node.entry.size()) {
+ if (entry.dataSize() != node.entry.dataSize()) {
return false;
- } else if (!regionMatches(entry, parentLcp, node.entry, parentLcp, key.size() - parentLcp)) {
+ } else if (!regionMatches(entry, parentLcp, node.entry, parentLcp, node.entry.dataSize() - parentLcp)) {
return false;
} else if (entry.getValue() == node.entry.getValue()) {
return true; // Duplicate key/value pair.
@@ -233,13 +235,13 @@
}
protected long lookup(DataEntry key, int parentLcd, boolean[] valid) {
- assert regionMatches(this.entry, 0, key, 0, key.size());
+ assert regionMatches(this.entry, 0, key, 0, key.dataSize());
valid[0] = true;
- return value;
+ return entry.getValue();
}
public String toString() {
- return "Trie-LEAF: " + entry + " -> " + value;
+ return "Trie-LEAF: " + entry;
}
}
}
Copied: projects/xa2/object-pool/src/trie/MemoryTrieTest.java (from rev 872, projects/xa2/object-pool/src/trie/CompMemTrieTest.java)
===================================================================
--- projects/xa2/object-pool/src/trie/MemoryTrieTest.java (rev 0)
+++ projects/xa2/object-pool/src/trie/MemoryTrieTest.java 2008-05-12 06:45:18 UTC (rev 914)
@@ -0,0 +1,413 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 9th May 2008
+ */
+package trie;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+import data.DataEntry;
+
+/**
+ * Basic tests of the MemoryTrie
+ */
+public class MemoryTrieTest {
+ public static void main(String[] args) throws Exception {
+ testWithFile(new File("../scratch/propernames.uniq"), 0);
+ testWithFile(new File("../scratch/propernames.uniq"), 1);
+ testWithFile(new File("../scratch/propernames.uniq"), 10);
+ testWithFile(new File("../scratch/propernames.uniq"), 100);
+ testWithFile(new File("../scratch/propernames.uniq"), 1000);
+ testWithFile(new File("../scratch/propernames.uniq"), 10000);
+ testWithFile(new File("../scratch/connectives.uniq"), 10000);
+ testWithFile(new File("../scratch/web2a.uniq"), 100000);
+ testWithFile(new File("../scratch/web2.uniq"), 1000000);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
+ testWithRandomFile(new File("/dev/urandom"));
+ testLoadWithRandomFile(new File("/dev/urandom"));
+ }
+
+
+ public static void testWithFile(File file, int max) throws Exception {
+ Set<DataEntry> namesSet = new HashSet<DataEntry>();
+ MemoryTrie namesTrie = new MemoryTrie();
+
+ System.out.println("Inserting lines from " + file);
+ BufferedReader names = new BufferedReader(new FileReader(file));
+ long n = 0;
+ String name = names.readLine();
+ long _startInsert = System.currentTimeMillis();
+ while (name != null && n < max) {
+ DataEntry entry = DataEntry.getEntry(name.getBytes(), n);
+ namesSet.add(entry);
+ namesTrie.insert(entry);
+ name = names.readLine();
+ n++;
+ }
+ long _endInsert = System.currentTimeMillis();
+ names.close();
+
+ System.out.println("Checking " + n + " lines from " + file);
+ long _startLookup = System.currentTimeMillis();
+ for (DataEntry key : namesSet) {
+ if (namesTrie.lookup(key) != key.getValue()) {
+ throw new IllegalStateException("Trie doesn't match Map");
+ }
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+
+ public static class Bytes {
+ public final byte[] bytes;
+
+ public Bytes(byte[] bytes) {
+ this.bytes = new byte[bytes.length];
+ System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof Bytes) {
+ return Arrays.equals(bytes, ((Bytes)o).bytes);
+ } else {
+ return false;
+ }
+ }
+
+ public int hashCode() {
+ int hc = Arrays.hashCode(bytes);
+ return hc;
+ }
+ }
+
+ public static void testWithRandomFile(File file) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ MemoryTrie namesTrie = new MemoryTrie();
+
+ System.out.println("Inserting random bytes from " + file);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nL = 0;
+ int nS = 0;
+
+ while (n < 5000) {
+ if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _si = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateL += System.currentTimeMillis() - _si;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int i = 0; i < 10; i++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.println(chan.position() + " bytes read from " + file);
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" +
+ Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) +
+ " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+
+ public static void testWithRandomFileTuned(File file, int max, int small) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ MemoryTrie namesTrie = new MemoryTrie();
+
+ System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nL = 0;
+ int nS = 0;
+
+ while (n < max) {
+ if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _si = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateL += System.currentTimeMillis() - _si;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int i = 0; i < small; i++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.println(chan.position() + " bytes read from " + file);
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" +
+ Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) +
+ " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static void testLoadWithRandomFile(File file) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ MemoryTrie namesTrie = new MemoryTrie();
+
+ System.out.println("Inserting random bytes from " + file);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ int nL = 0;
+ long _aggregateLL = 0;
+ long _avlLL = 0;
+ int nLL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nS = 0;
+ long _aggregateSS = 0;
+ long _avlSS = 0;
+ int nSS = 0;
+
+ for (int i = 0; i < 100; i++) {
+ if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[buffer.getInt() & 0x0007FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sll = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateLL += System.currentTimeMillis() - _sll;
+ _avlLL += key.length;
+ nLL++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int j = 0; j < 10; j++) {
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sl = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateL += System.currentTimeMillis() - _sl;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int k = 0; k < 10; k++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int ii = 0; ii < 1000; ii++) {
+ key = new byte[((int)buffer.get()) & 0x0000001F];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sss = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateS += System.currentTimeMillis() - _sss;
+ _avlSS += key.length;
+ nSS++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
+ System.out.printf(" %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" +
+ Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) +
+ " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+}
Modified: projects/xa2/object-pool/src/trie/OnesTable.java
===================================================================
--- projects/xa2/object-pool/src/trie/OnesTable.java 2008-05-09 05:45:42 UTC (rev 913)
+++ projects/xa2/object-pool/src/trie/OnesTable.java 2008-05-12 06:45:18 UTC (rev 914)
@@ -46,7 +46,7 @@
static {
try {
- FileChannel fc = new FileInputStream("OnesTable.dat").getChannel();
+ FileChannel fc = new FileInputStream("trie/OnesTable.dat").getChannel();
shortOnesTable = new byte[64*1024];
fc.read(ByteBuffer.wrap(shortOnesTable));
onesTable = new byte[64*1024 * 2*256];
More information about the Mulgara-svn
mailing list