[Mulgara-svn] r1055 - in projects/xa2/object-pool: . src src/data src/index src/scheduler src/trie
andrae at mulgara.org
andrae at mulgara.org
Mon Jul 7 05:46:47 UTC 2008
Author: andrae
Date: 2008-07-06 22:46:46 -0700 (Sun, 06 Jul 2008)
New Revision: 1055
Added:
projects/xa2/object-pool/src/index/
projects/xa2/object-pool/src/index/IndexFile.java
projects/xa2/object-pool/src/scheduler/BlockCache.java.bak
projects/xa2/object-pool/src/scheduler/FileHandleCMap.java.bak
projects/xa2/object-pool/src/trie/CompBlockTrie.java.bak
projects/xa2/object-pool/src/trie/CompBlockTrieTest.java.bak
projects/xa2/object-pool/src/trie/CompMemTrie.java.bak
projects/xa2/object-pool/src/trie/CompMemTrieTest.java.bak
projects/xa2/object-pool/src/trie/DiskTrieTest.java
projects/xa2/object-pool/src/trie/MemoryTrieUnitTest.java
projects/xa2/object-pool/src/trie/Pair.java
Removed:
projects/xa2/object-pool/src/scheduler/BlockCache.java
projects/xa2/object-pool/src/scheduler/FileHandleCMap.java
projects/xa2/object-pool/src/trie/CompBlockTrie.java
projects/xa2/object-pool/src/trie/CompBlockTrieTest.java
projects/xa2/object-pool/src/trie/CompMemTrie.java
projects/xa2/object-pool/src/trie/CompMemTrieTest.java
projects/xa2/object-pool/src/trie/MemoryTrieTest.java
Modified:
projects/xa2/object-pool/build.sh
projects/xa2/object-pool/build.xml
projects/xa2/object-pool/src/data/DataEntry.java
projects/xa2/object-pool/src/data/DataFile.java
projects/xa2/object-pool/src/scheduler/Block.java
projects/xa2/object-pool/src/scheduler/FileHandle.java
projects/xa2/object-pool/src/scheduler/FileHandleFactory.java
projects/xa2/object-pool/src/scheduler/IOScheduler.java
projects/xa2/object-pool/src/trie/ByteMap.java
projects/xa2/object-pool/src/trie/ByteMapTest.java
projects/xa2/object-pool/src/trie/DiskTrie.java
projects/xa2/object-pool/src/trie/OnesTable.java
Log:
Bringing subversion uptodate with my current sandbox.
Modified: projects/xa2/object-pool/build.sh
===================================================================
--- projects/xa2/object-pool/build.sh 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/build.sh 2008-07-07 05:46:46 UTC (rev 1055)
@@ -1,4 +1,4 @@
#!/bin/sh
unset ANT_HOME
-CLASSPATH=$CLASSPATH:lib/ant.jar:lib/ant-junit.jar:lib/ant-launcher.jar lib/ant $@
+CLASSPATH=lib/ant.jar:lib/ant-junit.jar:lib/ant-launcher.jar:lib/junit-4.4.jar:$CLASSPATH lib/ant $@
Modified: projects/xa2/object-pool/build.xml
===================================================================
--- projects/xa2/object-pool/build.xml 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/build.xml 2008-07-07 05:46:46 UTC (rev 1055)
@@ -35,7 +35,7 @@
<target name="test" depends="compile" description="Run the unit tests">
<delete dir="${test}"/>
<mkdir dir="${test}"/>
- <junit printsummary="yes" haltonfailure="no" showoutput="yes">
+ <junit printsummary="yes" haltonfailure="no" showoutput="no" fork="yes" maxmemory="512m">
<classpath>
<pathelement path="${build}"/>
<pathelement location="/Developer/Java/Ant/lib/ant-junit.jar"/>
Modified: projects/xa2/object-pool/src/data/DataEntry.java
===================================================================
--- projects/xa2/object-pool/src/data/DataEntry.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/data/DataEntry.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -47,11 +47,19 @@
return new MemoryBackedDataEntry(value, data, false);
}
+ public static DataEntry getEntry(byte[] data, long value, boolean copy) {
+ return new MemoryBackedDataEntry(value, data, copy);
+ }
+
public static DataEntry getEntry(FileHandle handle, long position) throws IOException {
return new DiskBackedDataEntry(handle, position).canonicalize();
}
+ public static DataEntry getKey(byte[] data) {
+ return new MemoryBackedKey(data, false);
+ }
+
protected static class DiskBackedDataEntry extends DataEntry {
private FileHandle handle;
private long position;
@@ -60,9 +68,11 @@
// FIXME: Need to replace this with a background populated ConcurrentLinkedQueue.
private Block[] blocks;
+ // Note: In the case of the 0th buffer this is slice()'d to avoid having to deal with data.
private ByteBuffer currBuffer;
private int currBlock;
private int currPosition;
+ private int dataStart;
DiskBackedDataEntry(FileHandle handle, long position) throws IOException {
this.handle = handle;
@@ -70,23 +80,56 @@
int blocksize = 0x01 << handle.getLog2BlockSize();
- Block block = handle.readBlock(position);
+ System.err.println("Position: " + position + " blocksize: " + blocksize + " log2bs: " + handle.getLog2BlockSize());
+ System.err.println("Reading block: " + (position >> handle.getLog2BlockSize()));
+ Block block = handle.readBlock(position >> handle.getLog2BlockSize());
+ if (block == null) {
+ throw new IllegalArgumentException("Unable to read block for position");
+ }
this.currBlock = 0;
+ System.err.println("Reading buffer: " + ((int)(position & (blocksize - 1))));
this.currBuffer = block.offset((int)(position & (blocksize - 1)));
this.currPosition = 0;
this.value = currBuffer.getLong();
this.length = currBuffer.getInt();
- currBuffer.mark();
+ System.err.printf("Value: %d, Length: %d\n", value, length);
+
+ this.dataStart = currBuffer.position();
+ currBuffer = currBuffer.slice();
+
// Set the buffer to the entire block for length calculations.
currBuffer.limit(currBuffer.capacity());
int remaining = length - currBuffer.remaining();
- int totalBlocks = remaining / blocksize + (remaining % blocksize > 0 ? 1 : 0);
+ // If remaining <= 0 then this block contains all the data required for this entry.
+ // Otherwise this entry is spread over multiple blocks, so we setup an array to support lazy-loading of
+ // these blocks.
+ System.err.printf("Setting totalBlocks to 1\n");
+ int totalBlocks = 1;
+ System.err.printf("remaining: %d, blocksize: %d\n", remaining, blocksize);
+ if (remaining > 0) {
+ System.err.printf("Calculating extra totalBlocks\n");
+ totalBlocks += remaining / blocksize;
+ System.err.printf("totalBlocks: %d\n", totalBlocks);
+ if ((remaining % blocksize) > 0) {
+ System.err.printf("Incrementing totalBlocks\n");
+ totalBlocks++;
+ System.err.printf("totalBlocks: %d\n", totalBlocks);
+ }
+ }
+
+// int totalBlocks = (remaining <= 0) ? 1 : remaining / blocksize + (((remaining % blocksize) > 0) ? 1 : 0);
+ System.err.printf("remaining / blocksize: %d, remaining %% blocksize: %d\n", (remaining / blocksize), (remaining % blocksize));
+ System.err.printf("remaining: %d, blocksize: %d, totalBlocks: %d\n", remaining, blocksize, totalBlocks);
+ System.err.printf("currBuffer.capacity: %d, currBuffer.position: %d, currBuffer.limit: %d\n",
+ currBuffer.capacity(), currBuffer.position(), currBuffer.limit());
+ assert totalBlocks > 0;
+
blocks = new Block[totalBlocks];
blocks[0] = block;
if (totalBlocks == 1) {
- // [mark,length] establishes the data covered by this entry.
+ // [dataStart,length] establishes the data covered by this entry.
currBuffer.limit(length);
}
// FIXME: We eventually want to submit the remaining blocks to the scheduler for speculative background fetch.
@@ -104,7 +147,10 @@
*/
public DataEntry canonicalize() {
if (blocks.length == 1) {
- byte[] data = new byte[currBuffer.reset().remaining()];
+ System.err.println("dataStart = " + dataStart);
+ System.err.println("currBuffer.capacity: " + currBuffer.capacity());
+ System.err.println("currBuffer.limit: " + currBuffer.limit());
+ byte[] data = new byte[currBuffer.position(0).remaining()];
currBuffer.get(data);
return new MemoryBackedDataEntry(value, data);
} else {
@@ -117,21 +163,17 @@
}
public DataEntry rewind() {
- if (currBlock == 0) {
- currBuffer.rewind();
+ currBlock = 0;
+ // block size is a power of 2 therefore mask is size - 1.
+ currBuffer = blocks[0].offset(dataStart);
+ if (blocks.length == 1) {
+ // [mark,length] establishes the data covered by this entry.
+ // Note: This should never occur as we expect DataEntries to be canonicalized, which would result in
+ // a 1 block entry being replaced by a MemoryBackedDataEntry.
+ // Note currBuffer has been slice()'d, so limit can be set to length directly.
+ currBuffer.limit(length);
} else {
- currBlock = 0;
- // block size is a power of 2 therefore mask is size - 1.
- currBuffer = blocks[0].offset((int)(position & (blocks[0].getBlockSize() - 1)) + HEADER_LENGTH);
- currBuffer.mark();
- if (blocks.length == 1) {
- // [mark,length] establishes the data covered by this entry.
- // Note: This should never occur as we expect DataEntries to be canonicalized, which would result in
- // a 1 block entry being replaced by a MemoryBackedDataEntry.
- currBuffer.limit(length);
- } else {
- currBuffer.limit(currBuffer.capacity());
- }
+ currBuffer.limit(currBuffer.capacity());
}
return this;
@@ -151,19 +193,31 @@
if (off >= length) throw new BufferOverflowException();
int blocksize = (0x01 << handle.getLog2BlockSize());
- currBlock = (off + HEADER_LENGTH) / blocksize;
+ currBlock = (off + dataStart) / blocksize;
+
if (blocks[currBlock] == null) {
blocks[currBlock] = handle.readBlock(position + currBlock * blocksize);
}
- currBuffer = blocks[currBlock].offset((int)(position & (blocksize - 1))); // block size is a power of 2 therefore mask is size - 1.
- // Allow for header.
if (currBlock == 0) {
- currBuffer.position(HEADER_LENGTH);
+ currBuffer = blocks[currBlock].offset(dataStart);
+ currBuffer.position(off);
+ } else {
+ // Offset - (amount of data in 0th block) - (blocksize*number-of-skipped blocks).
+ int blockOffset = off - (blocksize - dataStart) - (blocksize * (currBlock - 1));
+ currBuffer = blocks[currBlock].offset(blockOffset);
}
+
// Allow for partial final block.
if (currBlock == blocks.length - 1) {
- currBuffer.limit((int)(length % blocksize + (position & (blocksize - 1))));
+ // if length fits within the first block:
+ if (length - (blocksize - dataStart) <= 0) {
+ currBuffer.limit(length);
+ } else {
+ int remaining = (length - (blocksize - dataStart));
+ currBuffer.limit(remaining % blocksize);
+
+ }
}
currPosition = off;
@@ -195,7 +249,7 @@
protected static class MemoryBackedDataEntry extends DataEntry {
- private ByteBuffer data;
+ protected ByteBuffer data;
MemoryBackedDataEntry(long value, byte[] data) {
this(value, data, true);
@@ -240,7 +294,8 @@
bb.clear();
bb.putLong(value);
bb.putInt(data.capacity());
- return handle.writeBuffers(new ByteBuffer[] { (ByteBuffer)bb.flip(), (ByteBuffer)bb.duplicate().clear() });
+// System.err.printf("Writing value %d, length %d\n", value, data.capacity());
+ return handle.writeBuffers(new ByteBuffer[] { (ByteBuffer)bb.flip(), (ByteBuffer)data.duplicate().clear() }, HEADER_LENGTH);
}
public ByteBuffer slice(int offset, int length) {
@@ -271,4 +326,44 @@
return Long.valueOf(value).hashCode() ^ data.duplicate().clear().hashCode();
}
}
+
+ protected static class MemoryBackedKey extends MemoryBackedDataEntry {
+ MemoryBackedKey(byte[] data) {
+ this(data, true);
+ }
+
+ MemoryBackedKey(byte[] data, boolean copyData) {
+ // Use dummy value.
+ super(-1, data, copyData);
+ }
+
+ public boolean equals(Object rhs) {
+ if (rhs == this) {
+ return true;
+ }
+ if (rhs.getClass().equals(getClass())) {
+ MemoryBackedKey mbde = (MemoryBackedKey)rhs;
+ return data.duplicate().clear().equals(mbde.data.duplicate().clear());
+ } else {
+ return false;
+ }
+ }
+
+ public int hashCode() {
+ return data.duplicate().clear().hashCode();
+ }
+
+ public long write(FileHandle handle) throws IOException {
+ throw new UnsupportedOperationException("Attempt to write Key without Value");
+ }
+
+ public long getValue() {
+ throw new UnsupportedOperationException("Attempt to obtain value from naked Key");
+ }
+
+ public int totalSize() {
+ throw new UnsupportedOperationException("Attempt to obtain materialization details from Key");
+ }
+ }
+
}
Modified: projects/xa2/object-pool/src/data/DataFile.java
===================================================================
--- projects/xa2/object-pool/src/data/DataFile.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/data/DataFile.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -17,11 +17,12 @@
public static final int MAGIC = 0x00030002;
public static final int LOG2_BLOCK_SIZE = 15;
public static final int BLOCK_SIZE = 0x01 << LOG2_BLOCK_SIZE;
+ public static final String SUFFIX = ".dat";
private FileHandle handle;
- public DataFile(String filename, IOScheduler scheduler) throws IOException {
- this.handle = scheduler.open(filename, MAGIC, LOG2_BLOCK_SIZE);
+ public DataFile(String prefix, IOScheduler scheduler) throws IOException {
+ this.handle = scheduler.open(prefix + SUFFIX, MAGIC, LOG2_BLOCK_SIZE);
}
public DataEntry getEntry(long position) throws IOException {
@@ -31,4 +32,12 @@
public long write(DataEntry entry) throws IOException {
return entry.write(handle);
}
+
+ public void complete() throws IOException {
+ handle.complete();
+ }
+
+ public void close() throws IOException {
+ handle.close();
+ }
}
Added: projects/xa2/object-pool/src/index/IndexFile.java
===================================================================
--- projects/xa2/object-pool/src/index/IndexFile.java (rev 0)
+++ projects/xa2/object-pool/src/index/IndexFile.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -0,0 +1,68 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 14th May 2008
+ */
+package index;
+
+import java.io.IOException;
+
+import data.DataFile;
+import scheduler.Block;
+import scheduler.FileHandle;
+import scheduler.IOScheduler;
+import trie.DiskTrie;
+
+/**
+ * Abstracts the concept of a trie-based index of octet sequences.
+ */
+public class IndexFile {
+ public static final int MAGIC = 0x00050004;
+ public static final int LOG2_BLOCK_SIZE = 15;
+ public static final int BLOCK_SIZE = 0x01 << LOG2_BLOCK_SIZE;
+ public static final String SUFFIX = ".idx";
+
+ private FileHandle handle;
+ private DataFile data;
+
+ public IndexFile(String prefix, IOScheduler scheduler) throws IOException {
+ this.handle = scheduler.open(prefix + SUFFIX, MAGIC, LOG2_BLOCK_SIZE);
+ this.data = new DataFile(prefix, scheduler);
+ }
+
+ public DiskTrie readEntry(long blockId) throws IOException {
+ return new DiskTrie(handle.readBlock(blockId), data);
+ }
+
+ public DiskTrie readEntry() throws IOException {
+ Block block = handle.readBlock();
+ if (block != null) {
+ return new DiskTrie(block, data);
+ } else {
+ return null;
+ }
+ }
+
+ public DiskTrie createEntry() throws IOException {
+ return new DiskTrie(new Block(handle, LOG2_BLOCK_SIZE));
+ }
+
+ // FIXME: Handle read vs. write here. Also check to make sure this trie came from this file.
+ public void write(DiskTrie trie) throws IOException, DiskTrie.InsufficientSpace {
+ trie.write(data);
+ }
+
+ public void complete() throws IOException {
+ handle.complete();
+ data.complete();
+ }
+
+ public void close() throws IOException {
+ handle.close();
+ data.close();
+ }
+
+ public String toString() {
+ return "Trie-index-file[" + handle.toString() + "]";
+ }
+}
Modified: projects/xa2/object-pool/src/scheduler/Block.java
===================================================================
--- projects/xa2/object-pool/src/scheduler/Block.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/scheduler/Block.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -19,7 +19,9 @@
public Block(FileHandle handle, int log2bs) {
this.handle = handle;
this.blockId = null;
- this.buffer.allocateDirect(0x01 << log2bs);
+ this.buffer = ByteBuffer.allocateDirect(0x01 << log2bs);
+ System.err.println("Allocating block with capacity: " + buffer.capacity() + " in file: " + handle.getFile());
+ new Throwable().printStackTrace();
this.blockId = UNALLOCATED;
}
@@ -36,8 +38,6 @@
}
public Long getBlockId() {
- if (blockId == UNALLOCATED) throw new IllegalStateException("Attempt to obtain blockId for uninitialized block");
-
return blockId;
}
@@ -46,6 +46,8 @@
* Sets the Buffer's position = 0; limit = size; and stores the blockId.
*/
public ByteBuffer prepare(Long id) {
+ System.err.println("Setting block-id to : " + id + " for file: " + handle.getFile());
+ new Throwable().printStackTrace();
blockId = id;
return (ByteBuffer)(buffer.clear());
}
Deleted: projects/xa2/object-pool/src/scheduler/BlockCache.java
===================================================================
--- projects/xa2/object-pool/src/scheduler/BlockCache.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/scheduler/BlockCache.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -1,61 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 30th April 2008
- */
-package scheduler;
-
-import java.io.File;
-import java.ref.SoftReference;
-import java.util.WeakHashMap;
-
-public class BlockCache {
- private FileHandle handle;
- // The Block object is required to maintain a strong reference to the Long key obj.
- private WeakHashMap<Long, SoftReference<Block>> blockCache;
- private Queue<SoftReference<Block> blockPool;
-
- private static final int LOG2_BLOCK_SIZE = 21;
-
- BlockCache(FileHandle handle, Queue<SoftReference<Block>> blockPool) {
- this.handle = handle;
- this.blockCache = new WeakHashMap<Long, SoftReference<Block>>();
- this.blockPool = blockPool;
- }
-
- public Block getBlock(long position) {
- Long pos = new Long(position >> LOG2_BLOCK_SIZE);
- SoftReference<Block> blockRef = blockCache.get(pos);
- if (blockRef != null) {
- Block block = blockRef.get();
- if (block != null) {
-
- return block;
- } else {
- // Note that the combination of soft and weak references means that we need this double check.
- // Because soft-references prevent weak-references from being reaped it is legal for the gc to reap
- // the block and the key in subsequent passes.
- // Here we remove the stale entry so it can be reentered.
- blockCache.remove(pos);
- }
- }
-
- SoftReference newBlockRef;
- Block newBlock = null;
- while ((newBlockRef = blockPool.pool()) != null) {
- if ((newBlock = newBlockRef.get()) != null) {
- break;
- }
- }
- if (newBlock == null) {
- newBlock = new Block(handle, 0x01 << LOG2_BLOCK_SIZE);
- }
-
- handle.readBlock(pos >> LOG2_BLOCK_SIZE, newBlock);
- // Note we use the newBlock's position, not pos - this ensures the map's entry will be retained until
- // memory pressure forces the SoftReference to be reaped.
- blockCache.put(newBlock.getBlockId(), new SoftReference(newBlock));
-
- return newBlock;
- }
-}
Copied: projects/xa2/object-pool/src/scheduler/BlockCache.java.bak (from rev 915, projects/xa2/object-pool/src/scheduler/BlockCache.java)
===================================================================
--- projects/xa2/object-pool/src/scheduler/BlockCache.java.bak (rev 0)
+++ projects/xa2/object-pool/src/scheduler/BlockCache.java.bak 2008-07-07 05:46:46 UTC (rev 1055)
@@ -0,0 +1,61 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 30th April 2008
+ */
+package scheduler;
+
+import java.io.File;
+import java.ref.SoftReference;
+import java.util.WeakHashMap;
+
+public class BlockCache {
+ private FileHandle handle;
+ // The Block object is required to maintain a strong reference to the Long key obj.
+ private WeakHashMap<Long, SoftReference<Block>> blockCache;
+ private Queue<SoftReference<Block> blockPool;
+
+ private static final int LOG2_BLOCK_SIZE = 21;
+
+ BlockCache(FileHandle handle, Queue<SoftReference<Block>> blockPool) {
+ this.handle = handle;
+ this.blockCache = new WeakHashMap<Long, SoftReference<Block>>();
+ this.blockPool = blockPool;
+ }
+
+ public Block getBlock(long position) {
+ Long pos = new Long(position >> LOG2_BLOCK_SIZE);
+ SoftReference<Block> blockRef = blockCache.get(pos);
+ if (blockRef != null) {
+ Block block = blockRef.get();
+ if (block != null) {
+
+ return block;
+ } else {
+ // Note that the combination of soft and weak references means that we need this double check.
+ // Because soft-references prevent weak-references from being reaped it is legal for the gc to reap
+ // the block and the key in subsequent passes.
+ // Here we remove the stale entry so it can be reentered.
+ blockCache.remove(pos);
+ }
+ }
+
+ SoftReference newBlockRef;
+ Block newBlock = null;
+ while ((newBlockRef = blockPool.pool()) != null) {
+ if ((newBlock = newBlockRef.get()) != null) {
+ break;
+ }
+ }
+ if (newBlock == null) {
+ newBlock = new Block(handle, 0x01 << LOG2_BLOCK_SIZE);
+ }
+
+ handle.readBlock(pos >> LOG2_BLOCK_SIZE, newBlock);
+ // Note we use the newBlock's position, not pos - this ensures the map's entry will be retained until
+ // memory pressure forces the SoftReference to be reaped.
+ blockCache.put(newBlock.getBlockId(), new SoftReference(newBlock));
+
+ return newBlock;
+ }
+}
Modified: projects/xa2/object-pool/src/scheduler/FileHandle.java
===================================================================
--- projects/xa2/object-pool/src/scheduler/FileHandle.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/scheduler/FileHandle.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -18,19 +18,22 @@
private FileChannel channel;
private int log2bs;
+ private final int blocksize;
private long seeks;
FileHandle(File file, FileChannel channel, int magic, int log2bs) {
this.file = file;
this.channel = channel;
this.log2bs = log2bs;
+ this.blocksize = 0x01 << log2bs;
this.seeks = 0;
}
FileHandle(File file, FileOutputStream stream, int magic, int log2bs) throws IOException {
this(file, stream.getChannel(), magic, log2bs);
- ByteBuffer bb = ByteBuffer.allocate(0x01 << log2bs);
+ ByteBuffer bb = ByteBuffer.allocate(blocksize);
+ System.err.println("Allocated header: " + bb.capacity());
bb.putInt(magic);
bb.putInt(log2bs);
bb.position(0);
@@ -44,7 +47,7 @@
if (log2bs < 10) throw new IllegalArgumentException("Attempt to open file with blocksize < 1KB");
- ByteBuffer bb = ByteBuffer.allocate(0x01 << log2bs);
+ ByteBuffer bb = ByteBuffer.allocate(blocksize);
this.channel.read(bb);
bb.clear();
@@ -75,25 +78,35 @@
}
/**
- * FIXME: This should be package scope.
+ * FIXME: These should be package scope.
*/
- public Block readBlock(Long blockId) throws IOException {
- long position = blockId.longValue();
- if (channel.position() != position) {
+ public Block readBlock(long blockId) throws IOException {
+ // blockId + 1 allows higher levels to ignore the metadata stored in block-0.
+ System.err.println("Reading block: " + blockId + " from File: " + file);
+ if (channel.position() != ((blockId + 1) << log2bs)) {
seeks++;
- channel.position(position);
+ channel.position((blockId + 1) << log2bs);
}
+ return readBlock();
+ }
+
+ public Block readBlock() throws IOException {
Block block = new Block(this, log2bs);
- channel.read(block.prepare(blockId));
- return block;
+ System.err.println("Reading block from position: " + channel.position());
+ // Block::prepare takes a blockId, we can calculate this from the channel's position.
+ int read = channel.read(block.prepare((channel.position() >> log2bs) - 1));
+ if (read == blocksize) return block;
+ if (read == -1) return null;
+ throw new IllegalStateException("Partial block read");
}
long writeBlock(Block block) throws IOException {
- long position = channel.position();
+ long position = channel.position() - blocksize;
- channel.write(block.prepare(new Long(position)));
+ System.err.printf("Writing block to position: %d in file: %s\n", channel.position(), file.toString());
+ channel.write(block.prepare(position >> log2bs));
return position;
}
@@ -101,9 +114,17 @@
/**
* FIXME: Should be package scope.
*/
- public long writeBuffers(ByteBuffer[] buffers) throws IOException {
- long position = channel.position();
+ public long writeBuffers(ByteBuffer[] buffers, int headerSize) throws IOException {
+ long currBlock = channel.position() / blocksize;
+ long hEndBlock = (channel.position() + headerSize) / blocksize;
+ if (currBlock != hEndBlock) {
+ channel.write(ByteBuffer.wrap(new byte[(int)(channel.position() % blocksize)]));
+ channel.position((currBlock + 1) * blocksize);
+ }
+ long position = channel.position() - blocksize;
+// System.err.printf("Writing %d buffers to position: %d in file: %s\n", buffers.length, channel.position(), file.toString());
+
channel.write(buffers);
return position;
@@ -113,6 +134,32 @@
* FIXME: Should be package scope.
*/
public void transferTo(FileHandle handle, long position, int length) throws IOException {
- channel.transferTo(position, length, handle.channel);
+ System.err.println("Transfering data");
+ channel.transferTo(position + blocksize, length, handle.channel);
}
+
+ public void complete() throws IOException {
+ if (channel.position() % blocksize != 0) {
+ System.err.println("Writing data: " + (int)(blocksize - channel.position() % blocksize) + " to file: " + file);
+ System.err.println(" to position: " + channel.position());
+ System.err.println(" pos % blksz: " + (channel.position() % blocksize));
+ System.err.println(" with blocksize: " + blocksize);
+ channel.write(ByteBuffer.allocate((int)(blocksize - channel.position() % blocksize)));
+ }
+ channel.force(true);
+ }
+
+ public void close() throws IOException {
+ System.err.println("Closing file: " + file);
+ channel.close();
+ }
+
+ public String toString() {
+ long position = -1;
+ try {
+ position = channel.position();
+ } catch (Exception e) {}
+
+ return file.toString() + ":" + position + ":" + log2bs + ":" + seeks;
+ }
}
Deleted: projects/xa2/object-pool/src/scheduler/FileHandleCMap.java
===================================================================
--- projects/xa2/object-pool/src/scheduler/FileHandleCMap.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/scheduler/FileHandleCMap.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -1,30 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 30th April 2008
- */
-package scheduler;
-
-import java.io.File;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * A canonical mapping of File objects to FileHandles.
- */
-public class FileHandleCMap {
- private Map<File, FileHandle> handles;
-
- public FileHandleCMap() {
- this.handles = new HashMap<File, FileHandle>();
- }
-
- public FileHandle getHandle(File file) {
- FileHandle result = handles.get(file);
- if (result == null) {
- result = new FileHandle(file, this);
- handles.put(result);
- }
- return result;
- }
-}
Copied: projects/xa2/object-pool/src/scheduler/FileHandleCMap.java.bak (from rev 915, projects/xa2/object-pool/src/scheduler/FileHandleCMap.java)
===================================================================
--- projects/xa2/object-pool/src/scheduler/FileHandleCMap.java.bak (rev 0)
+++ projects/xa2/object-pool/src/scheduler/FileHandleCMap.java.bak 2008-07-07 05:46:46 UTC (rev 1055)
@@ -0,0 +1,30 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 30th April 2008
+ */
+package scheduler;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A canonical mapping of File objects to FileHandles.
+ */
+public class FileHandleCMap {
+ private Map<File, FileHandle> handles;
+
+ public FileHandleCMap() {
+ this.handles = new HashMap<File, FileHandle>();
+ }
+
+ public FileHandle getHandle(File file) {
+ FileHandle result = handles.get(file);
+ if (result == null) {
+ result = new FileHandle(file, this);
+ handles.put(result);
+ }
+ return result;
+ }
+}
Modified: projects/xa2/object-pool/src/scheduler/FileHandleFactory.java
===================================================================
--- projects/xa2/object-pool/src/scheduler/FileHandleFactory.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/scheduler/FileHandleFactory.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -17,11 +17,11 @@
public FileHandleFactory() {}
public FileHandle createNew(File file, int magic, int blocksize) throws IOException {
- if (file.exists()) {
- throw new IllegalArgumentException("File exists: " + file);
+ if (!file.createNewFile()) {
+ throw new IllegalArgumentException("File already exists: " + file);
}
- return new FileHandle(file, new FileInputStream(file), magic, blocksize);
+ return new FileHandle(file, new FileOutputStream(file), magic, blocksize);
}
public FileHandle openExisting(File file, int magic, int blocksize) throws IOException {
@@ -29,7 +29,7 @@
throw new IllegalArgumentException("File does not exist: " + file);
}
- return new FileHandle(file, new FileOutputStream(file), magic, blocksize);
+ return new FileHandle(file, new FileInputStream(file), magic, blocksize);
}
public FileHandle open(File file, int magic, int blocksize) throws IOException {
Modified: projects/xa2/object-pool/src/scheduler/IOScheduler.java
===================================================================
--- projects/xa2/object-pool/src/scheduler/IOScheduler.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/scheduler/IOScheduler.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -19,8 +19,7 @@
if (!dir.isDirectory()) {
throw new IllegalArgumentException("Scheduler argument not directory: " + dir);
}
- }
- if (!dir.mkdirs()) {
+ } else if (!dir.mkdirs()) {
throw new IllegalStateException("Failed to make directory: " + dir);
}
}
@@ -28,6 +27,8 @@
public FileHandle open(String name, int magic, int blocksize) throws IOException {
File file = new File(dir, name);
+ System.err.println("Opening: " + file);
+
return fileHandleFactory.open(file, magic, blocksize);
}
}
Modified: projects/xa2/object-pool/src/trie/ByteMap.java
===================================================================
--- projects/xa2/object-pool/src/trie/ByteMap.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/trie/ByteMap.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -171,4 +171,14 @@
block.putShort(low[i]);
}
}
+
+ public static ByteMap<T> merge(ByteMap<T> lhs, ByteMap<T> rhs) {
+ for (short highI = 0x0000; highI <= 0x8000; highI = highI << 1) {
+ if (lhs.high & highI) {
+ if (rhs.high & highI) {
+ }
+ } else if (rhs.high & highI) {
+ }
+ }
+ }
}
Modified: projects/xa2/object-pool/src/trie/ByteMapTest.java
===================================================================
--- projects/xa2/object-pool/src/trie/ByteMapTest.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/trie/ByteMapTest.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -1,5 +1,5 @@
+package trie;
-
/**
* Represents a map from a byte to data.
* ie. a compressed 256 element array.
Deleted: projects/xa2/object-pool/src/trie/CompBlockTrie.java
===================================================================
--- projects/xa2/object-pool/src/trie/CompBlockTrie.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/trie/CompBlockTrie.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -1,342 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 22nd April 2008
- */
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.nio.channels.FileChannel;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Extends CompMemTrie to provide IO functionality.
- *
- * Guarantees Trie will fit within a single block, refuses to accept insert() if it would make the
- * serialized size of the node greater than the blocksize.
- */
-public class CompBlockTrie extends CompMemTrie {
- @SuppressWarnings("serial")
- public static class InsufficientSpace extends Exception
- { public InsufficientSpace(String s) { super(s); } };
-
- static final byte TYPE_BRANCH_TERM = 0x01; // Indicates a trie branch with a termination entry..
- static final byte TYPE_BRANCH_NOTERM = 0x02; // Indicates a trie branch without a termination entry..
- static final byte TYPE_LEAF = 0x03; // Indicates a trie branch without a termination entry..
- static final byte TYPE_ROOT_LOC = 0x04; // Indicates the following short indicates a branch.
- public final static int MAGIC = 0x00020001;
- public final static int INVERT_MAGIC = 0x01000200;
-
- // Calculated as the worst case size per entry.
- // Assumes a full leaf and branch per entry.
- // Block: 2-byte type-header
- // 4-byte offset
- // 2-byte ByteMap-High
- // 4-byte ByteMap-Lows (2-entries).
- // 4-byte Children Ptrs.
- // Leaf: 2-byte type-header
- // 8-byte offset.
- // 26-bytes Total.
- // Best case of course is 10-bytes leaf + 2-bytes children in branch.
- private static short WORST_CASE_ENTRY_SIZE = 26;
-//private static short BEST_CASE_ENTRY_SIZE = 12;
-
- /** Worst-case space left in trie measured in entries */
- private short space;
-
- /**
- * The blocksize for this Block-Trie.
- * Currently the maximum block size supported is 32K, which is worst-case:
- * 1260 entries covered by a single I0 index in 32KB.
- * 1.59 million entries by a single I1 index in 40MB.
- * 2.00 billion entries by a single I2 index in 49GB.
- * 2.52 trillion entries by a single I3 index in 60TB.
- */
- private int blockSize;
-
- public CompBlockTrie(int blockSize) {
- super();
- assert blockSize > 1024;
- assert blockSize <= 32*1024;
- this.blockSize = blockSize;
- this.space = (short)((blockSize - 8) / WORST_CASE_ENTRY_SIZE); // -8 leaves room for header info.
- }
-
- public CompBlockTrie(ByteBuffer index, FileChannel data) throws IOException {
- super();
- index.rewind(); // Note sure if I should doing this here or delegating to the caller.
- int magic = index.getInt();
- if (magic != MAGIC) {
- if (magic == INVERT_MAGIC) {
- index.order(index.order().equals(ByteOrder.BIG_ENDIAN) ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
- } else {
- throw new IllegalArgumentException("Bad magic in index buffer: " + magic + ", MAGIC=" + MAGIC);
- }
- }
-
- // I should be able to replace this with a stack.
- // The child->parent relationship is implicit in the ordering of the nodes in the file.
- // The only thing we need to know is when to stop reading nodes and that is provided by rootLocation.
- // If treated as a stack once we have read the root the stack should contain only a single element - the
- // root node.
- // For now I'm hoping the HashMap will help with debugging - but it is wasteful of memory in the long run.
- HashMap<Short, TrieNode> nodeMap = new HashMap<Short, TrieNode>();
-
- short rootLocation = -1;
- while (rootLocation == -1) {
- short location = (short)index.position();
- byte type = index.get();
- switch (type) {
- case TYPE_BRANCH_TERM:
- nodeMap.put(location, readTrieBranch(index, true, nodeMap));
- break;
- case TYPE_BRANCH_NOTERM:
- nodeMap.put(location, readTrieBranch(index, false, nodeMap));
- break;
- case TYPE_LEAF:
- nodeMap.put(location, readTrieLeaf(index, data));
- break;
- case TYPE_ROOT_LOC:
- index.get();
- rootLocation = index.getShort();
- break;
- default:
- throw new IllegalArgumentException("Invalid trie-node");
- }
- }
-
- root = nodeMap.get(rootLocation);
-
- this.space = 0; // If this gets modified it will be recalculated automaticly then.
- }
-
- public boolean insert(byte[] key, long value) {
- // Space was calculated on the basis of worst-case - if we were worst case we have now filled the
- // trie-block, but if we haven't we should recalculate the space available.
- if (space == 0) {
- int size = totalIndexSize(root);
- space = (short)((blockSize - size - 8) / WORST_CASE_ENTRY_SIZE);
- if (space < 0) {
- throw new IllegalStateException("Overfilled CompBlockTrie");
- } else if (space == 0) {
- return false;
- }
- }
-
- super.insert(key, value);
- space--;
-
- return true;
- }
-
- public void write(ByteBuffer index, FileChannel data) throws InsufficientSpace, IOException {
- if (index.remaining() < blockSize) {
- throw new InsufficientSpace("Insufficient space remaining in buffer to write block");
- }
-
- // Temporarally set the limit to blocksize.
- int limit = index.limit();
- index.limit(index.position() + blockSize);
-
- if (root == null) {
- throw new IllegalStateException("Attempt to write empty trie");
- }
-
- int indexreq = (root == null) ? 8 : totalIndexSize(root) + 4 + 4; // + sizeof(MAGIC) + sizeof(root_loc_type + root_loc)
- if (indexreq > index.remaining()) {
- System.err.printf("Index-Req:%d ; remaining:%d ; capacity:%d ; limit:%d ; position:%d\n", indexreq,
- index.remaining(), index.capacity(), index.limit(), index.position());
- throw new InsufficientSpace("Attempt to write trie index to bytebuffer with insufficient space");
- }
- if (indexreq > 0x00010000) {
- throw new InsufficientSpace("Attempt to write trie index larger than 64K");
- }
-
- HashMap<TrieNode, Short> locationMap = new HashMap<TrieNode, Short>();
-
- index.putInt(MAGIC);
- if (root != null) {
- writeTrieNode(root, index, data, locationMap);
- }
- index.put(TYPE_ROOT_LOC);
- index.put((byte)0xFF);
- if (root != null) {
- index.putShort(locationMap.get(root));
- } else {
- index.putShort((short)0x0000);
- }
-
- // Set the position to the block size to ensure whole blocks are written.
- index.position(index.limit());
- // Reset the limit to its initial value.
- index.limit(limit);
- }
-
- private static Map<Class, TrieNodeWriter> writerMap = new HashMap<Class, TrieNodeWriter>();
- static {
- writerMap.put(TrieBranch.class, new TrieBranchWriter());
- writerMap.put(TrieLeaf.class, new TrieLeafWriter());
- }
-
- protected static void writeTrieNode(TrieNode node, ByteBuffer index, FileChannel data,
- Map<TrieNode, Short> locationMap) throws IOException {
- writerMap.get(node.getClass()).write(node, index, data, locationMap);
- }
-
- private static interface TrieNodeWriter {
- /**
- * This method *must* update location with the position in the index buffer it was written to.
- * This allows parents to obtain a physical reference to this node in their write methods.
- * The index buffer is the target of the Trie index itself.
- * The data buffer is the target of the byte[]/value pairs stored in the leaves. We want to keep these
- * separate for caching purposes.
- */
- public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException;
- }
-
- private static class TrieBranchWriter implements TrieNodeWriter {
- public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException {
- TrieBranch branch = (TrieBranch)node;
- if (branch.term != null) {
- writeTrieNode(branch.term, index, data, locationMap);
- }
- for (TrieNode child : branch.children) {
- writeTrieNode(child, index, data, locationMap);
- }
-
- locationMap.put(branch, (short)index.position());
-
- index.put((branch.term == null) ? TYPE_BRANCH_NOTERM : TYPE_BRANCH_TERM);
- index.put((byte)0xFF); // Padding to keep things short-aligned.
- index.putInt(branch.offset);
- if (branch.term != null) {
- index.putShort(locationMap.get(branch.term));
- }
- branch.children.writeHeader(index);
- for (TrieNode child : branch.children) {
- index.putShort(locationMap.get(child));
- }
- }
- }
-
- private static class TrieLeafWriter implements TrieNodeWriter {
- public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException {
- TrieLeaf leaf = (TrieLeaf)node;
-
- ByteBuffer dataBuf = ByteBuffer.allocateDirect(8 + 4 + leaf.key.length);
- long keyLocation = data.position();
- dataBuf.putLong(leaf.value);
- dataBuf.putInt(leaf.key.length);
- dataBuf.put(leaf.key);
- data.write((ByteBuffer)dataBuf.flip());
-
- locationMap.put(leaf, (short)index.position());
- index.put(TYPE_LEAF);
- index.put((byte)0xFF); // Pad to 16-bit aligned.
- index.putLong(keyLocation);
- }
- }
-
- private TrieBranch readTrieBranch(ByteBuffer index, boolean hasTerm, Map<Short, TrieNode> nodeMap) throws IOException {
- TrieBranch branch = new TrieBranch();
-
- index.get(); // skip padding.
- branch.offset = index.getInt();
- if (hasTerm) {
- branch.term = (TrieLeaf)nodeMap.get(index.getShort());
- }
- branch.children = new ByteMap<TrieNode>(index, nodeMap);
- if (branch.term == null) {
- // This two-step process is required to avoid the cast from Object[] to TrieNode[] inserted by java
- // generics which triggers the obvious ClassCastException. By extracting explicitly to an Object[]
- // first; then indexing to obtain an Object; then casting the Object to TrieNode the compiler doesn't
- // insert the erroneous cast.
- Object[] d = branch.children.data;
- branch.aLeaf = ((TrieNode)(d[0])).aLeaf;
- } else {
- branch.aLeaf = branch.term;
- }
-
- return branch;
- }
-
- private TrieLeaf readTrieLeaf(ByteBuffer index, FileChannel data) throws IOException {
- index.get();
- long keyLocation = index.getLong();
- data.position(keyLocation);
- // FIXME: I need to cache these to reduce the allocation cost which is showing up under profiling
- ByteBuffer bb = ByteBuffer.allocateDirect(8+4); // sizeof(value) + sizof(length).
- data.read(bb);
- bb.flip();
- long value = bb.getLong();
- int length = bb.getInt();
- byte[] key = new byte[length]; // FIXME: I eventually need to replace this with a lazy load.
- data.read(ByteBuffer.wrap(key));
-
- return new TrieLeaf(key, value, false);
- }
-
- private static Map<Class, TrieNodeSizer> sizerMap = new HashMap<Class, TrieNodeSizer>();
- static {
- sizerMap.put(TrieBranch.class, new TrieBranchSizer());
- sizerMap.put(TrieLeaf.class, new TrieLeafSizer());
- }
-
- protected static int totalIndexSize(TrieNode node) {
- return sizerMap.get(node.getClass()).indexSize(node);
- }
-
- protected static int totalDataSize(TrieNode node) {
- return sizerMap.get(node.getClass()).dataSize(node);
- }
-
- private static interface TrieNodeSizer {
- public int indexSize(TrieNode node);
- public int dataSize(TrieNode node);
- }
-
- private static class TrieBranchSizer implements TrieNodeSizer {
- protected int memSize(TrieBranch branch) {
- return 4 + 1 + 1 + branch.children.memSize() + (branch.term == null ? 0 : 2);
- }
-
- public int indexSize(TrieNode node) {
- TrieBranch branch = (TrieBranch)node;
- int total = memSize(branch);
- if (branch.term != null) {
- total += totalIndexSize(branch.term);
- }
- for (TrieNode child : branch.children) {
- total += totalIndexSize(child);
- }
-
- return total;
- }
-
- public int dataSize(TrieNode node) {
- TrieBranch branch = (TrieBranch)node;
- int total = 0;
- if (branch.term != null) {
- total += totalDataSize(branch.term);
- }
- for (TrieNode child : branch.children) {
- total += totalDataSize(child);
- }
-
- return total;
- }
- }
-
- private static class TrieLeafSizer implements TrieNodeSizer {
- public int indexSize(TrieNode node) {
- return 1 + 1 + 8;
- }
-
- public int dataSize(TrieNode node) {
- TrieLeaf leaf = (TrieLeaf)node;
- return 8 + 4 + leaf.key.length;
- }
- }
-}
Copied: projects/xa2/object-pool/src/trie/CompBlockTrie.java.bak (from rev 915, projects/xa2/object-pool/src/trie/CompBlockTrie.java)
===================================================================
--- projects/xa2/object-pool/src/trie/CompBlockTrie.java.bak (rev 0)
+++ projects/xa2/object-pool/src/trie/CompBlockTrie.java.bak 2008-07-07 05:46:46 UTC (rev 1055)
@@ -0,0 +1,342 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 22nd April 2008
+ */
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Extends CompMemTrie to provide IO functionality.
+ *
+ * Guarantees Trie will fit within a single block, refuses to accept insert() if it would make the
+ * serialized size of the node greater than the blocksize.
+ */
+public class CompBlockTrie extends CompMemTrie {
+ @SuppressWarnings("serial")
+ public static class InsufficientSpace extends Exception
+ { public InsufficientSpace(String s) { super(s); } };
+
+ static final byte TYPE_BRANCH_TERM = 0x01; // Indicates a trie branch with a termination entry..
+ static final byte TYPE_BRANCH_NOTERM = 0x02; // Indicates a trie branch without a termination entry..
+ static final byte TYPE_LEAF = 0x03; // Indicates a trie branch without a termination entry..
+ static final byte TYPE_ROOT_LOC = 0x04; // Indicates the following short indicates a branch.
+ public final static int MAGIC = 0x00020001;
+ public final static int INVERT_MAGIC = 0x01000200;
+
+ // Calculated as the worst case size per entry.
+ // Assumes a full leaf and branch per entry.
+ // Block: 2-byte type-header
+ // 4-byte offset
+ // 2-byte ByteMap-High
+ // 4-byte ByteMap-Lows (2-entries).
+ // 4-byte Children Ptrs.
+ // Leaf: 2-byte type-header
+ // 8-byte offset.
+ // 26-bytes Total.
+ // Best case of course is 10-bytes leaf + 2-bytes children in branch.
+ private static short WORST_CASE_ENTRY_SIZE = 26;
+//private static short BEST_CASE_ENTRY_SIZE = 12;
+
+ /** Worst-case space left in trie measured in entries */
+ private short space;
+
+ /**
+ * The blocksize for this Block-Trie.
+ * Currently the maximum block size supported is 32K, which is worst-case:
+ * 1260 entries covered by a single I0 index in 32KB.
+ * 1.59 million entries by a single I1 index in 40MB.
+ * 2.00 billion entries by a single I2 index in 49GB.
+ * 2.52 trillion entries by a single I3 index in 60TB.
+ */
+ private int blockSize;
+
+ public CompBlockTrie(int blockSize) {
+ super();
+ assert blockSize > 1024;
+ assert blockSize <= 32*1024;
+ this.blockSize = blockSize;
+ this.space = (short)((blockSize - 8) / WORST_CASE_ENTRY_SIZE); // -8 leaves room for header info.
+ }
+
+ public CompBlockTrie(ByteBuffer index, FileChannel data) throws IOException {
+ super();
+ index.rewind(); // Note sure if I should doing this here or delegating to the caller.
+ int magic = index.getInt();
+ if (magic != MAGIC) {
+ if (magic == INVERT_MAGIC) {
+ index.order(index.order().equals(ByteOrder.BIG_ENDIAN) ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN);
+ } else {
+ throw new IllegalArgumentException("Bad magic in index buffer: " + magic + ", MAGIC=" + MAGIC);
+ }
+ }
+
+ // I should be able to replace this with a stack.
+ // The child->parent relationship is implicit in the ordering of the nodes in the file.
+ // The only thing we need to know is when to stop reading nodes and that is provided by rootLocation.
+ // If treated as a stack once we have read the root the stack should contain only a single element - the
+ // root node.
+ // For now I'm hoping the HashMap will help with debugging - but it is wasteful of memory in the long run.
+ HashMap<Short, TrieNode> nodeMap = new HashMap<Short, TrieNode>();
+
+ short rootLocation = -1;
+ while (rootLocation == -1) {
+ short location = (short)index.position();
+ byte type = index.get();
+ switch (type) {
+ case TYPE_BRANCH_TERM:
+ nodeMap.put(location, readTrieBranch(index, true, nodeMap));
+ break;
+ case TYPE_BRANCH_NOTERM:
+ nodeMap.put(location, readTrieBranch(index, false, nodeMap));
+ break;
+ case TYPE_LEAF:
+ nodeMap.put(location, readTrieLeaf(index, data));
+ break;
+ case TYPE_ROOT_LOC:
+ index.get();
+ rootLocation = index.getShort();
+ break;
+ default:
+ throw new IllegalArgumentException("Invalid trie-node");
+ }
+ }
+
+ root = nodeMap.get(rootLocation);
+
+ this.space = 0; // If this gets modified it will be recalculated automaticly then.
+ }
+
+ public boolean insert(byte[] key, long value) {
+ // Space was calculated on the basis of worst-case - if we were worst case we have now filled the
+ // trie-block, but if we haven't we should recalculate the space available.
+ if (space == 0) {
+ int size = totalIndexSize(root);
+ space = (short)((blockSize - size - 8) / WORST_CASE_ENTRY_SIZE);
+ if (space < 0) {
+ throw new IllegalStateException("Overfilled CompBlockTrie");
+ } else if (space == 0) {
+ return false;
+ }
+ }
+
+ super.insert(key, value);
+ space--;
+
+ return true;
+ }
+
+ public void write(ByteBuffer index, FileChannel data) throws InsufficientSpace, IOException {
+ if (index.remaining() < blockSize) {
+ throw new InsufficientSpace("Insufficient space remaining in buffer to write block");
+ }
+
+ // Temporarally set the limit to blocksize.
+ int limit = index.limit();
+ index.limit(index.position() + blockSize);
+
+ if (root == null) {
+ throw new IllegalStateException("Attempt to write empty trie");
+ }
+
+ int indexreq = (root == null) ? 8 : totalIndexSize(root) + 4 + 4; // + sizeof(MAGIC) + sizeof(root_loc_type + root_loc)
+ if (indexreq > index.remaining()) {
+ System.err.printf("Index-Req:%d ; remaining:%d ; capacity:%d ; limit:%d ; position:%d\n", indexreq,
+ index.remaining(), index.capacity(), index.limit(), index.position());
+ throw new InsufficientSpace("Attempt to write trie index to bytebuffer with insufficient space");
+ }
+ if (indexreq > 0x00010000) {
+ throw new InsufficientSpace("Attempt to write trie index larger than 64K");
+ }
+
+ HashMap<TrieNode, Short> locationMap = new HashMap<TrieNode, Short>();
+
+ index.putInt(MAGIC);
+ if (root != null) {
+ writeTrieNode(root, index, data, locationMap);
+ }
+ index.put(TYPE_ROOT_LOC);
+ index.put((byte)0xFF);
+ if (root != null) {
+ index.putShort(locationMap.get(root));
+ } else {
+ index.putShort((short)0x0000);
+ }
+
+ // Set the position to the block size to ensure whole blocks are written.
+ index.position(index.limit());
+ // Reset the limit to its initial value.
+ index.limit(limit);
+ }
+
+ private static Map<Class, TrieNodeWriter> writerMap = new HashMap<Class, TrieNodeWriter>();
+ static {
+ writerMap.put(TrieBranch.class, new TrieBranchWriter());
+ writerMap.put(TrieLeaf.class, new TrieLeafWriter());
+ }
+
+ protected static void writeTrieNode(TrieNode node, ByteBuffer index, FileChannel data,
+ Map<TrieNode, Short> locationMap) throws IOException {
+ writerMap.get(node.getClass()).write(node, index, data, locationMap);
+ }
+
+ private static interface TrieNodeWriter {
+ /**
+ * This method *must* update location with the position in the index buffer it was written to.
+ * This allows parents to obtain a physical reference to this node in their write methods.
+ * The index buffer is the target of the Trie index itself.
+ * The data buffer is the target of the byte[]/value pairs stored in the leaves. We want to keep these
+ * separate for caching purposes.
+ */
+ public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException;
+ }
+
+ private static class TrieBranchWriter implements TrieNodeWriter {
+ public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException {
+ TrieBranch branch = (TrieBranch)node;
+ if (branch.term != null) {
+ writeTrieNode(branch.term, index, data, locationMap);
+ }
+ for (TrieNode child : branch.children) {
+ writeTrieNode(child, index, data, locationMap);
+ }
+
+ locationMap.put(branch, (short)index.position());
+
+ index.put((branch.term == null) ? TYPE_BRANCH_NOTERM : TYPE_BRANCH_TERM);
+ index.put((byte)0xFF); // Padding to keep things short-aligned.
+ index.putInt(branch.offset);
+ if (branch.term != null) {
+ index.putShort(locationMap.get(branch.term));
+ }
+ branch.children.writeHeader(index);
+ for (TrieNode child : branch.children) {
+ index.putShort(locationMap.get(child));
+ }
+ }
+ }
+
+ private static class TrieLeafWriter implements TrieNodeWriter {
+ public void write(TrieNode node, ByteBuffer index, FileChannel data, Map<TrieNode, Short> locationMap) throws IOException {
+ TrieLeaf leaf = (TrieLeaf)node;
+
+ ByteBuffer dataBuf = ByteBuffer.allocateDirect(8 + 4 + leaf.key.length);
+ long keyLocation = data.position();
+ dataBuf.putLong(leaf.value);
+ dataBuf.putInt(leaf.key.length);
+ dataBuf.put(leaf.key);
+ data.write((ByteBuffer)dataBuf.flip());
+
+ locationMap.put(leaf, (short)index.position());
+ index.put(TYPE_LEAF);
+ index.put((byte)0xFF); // Pad to 16-bit aligned.
+ index.putLong(keyLocation);
+ }
+ }
+
+ private TrieBranch readTrieBranch(ByteBuffer index, boolean hasTerm, Map<Short, TrieNode> nodeMap) throws IOException {
+ TrieBranch branch = new TrieBranch();
+
+ index.get(); // skip padding.
+ branch.offset = index.getInt();
+ if (hasTerm) {
+ branch.term = (TrieLeaf)nodeMap.get(index.getShort());
+ }
+ branch.children = new ByteMap<TrieNode>(index, nodeMap);
+ if (branch.term == null) {
+ // This two-step process is required to avoid the cast from Object[] to TrieNode[] inserted by java
+ // generics which triggers the obvious ClassCastException. By extracting explicitly to an Object[]
+ // first; then indexing to obtain an Object; then casting the Object to TrieNode the compiler doesn't
+ // insert the erroneous cast.
+ Object[] d = branch.children.data;
+ branch.aLeaf = ((TrieNode)(d[0])).aLeaf;
+ } else {
+ branch.aLeaf = branch.term;
+ }
+
+ return branch;
+ }
+
+ private TrieLeaf readTrieLeaf(ByteBuffer index, FileChannel data) throws IOException {
+ index.get();
+ long keyLocation = index.getLong();
+ data.position(keyLocation);
+ // FIXME: I need to cache these to reduce the allocation cost which is showing up under profiling
+ ByteBuffer bb = ByteBuffer.allocateDirect(8+4); // sizeof(value) + sizof(length).
+ data.read(bb);
+ bb.flip();
+ long value = bb.getLong();
+ int length = bb.getInt();
+ byte[] key = new byte[length]; // FIXME: I eventually need to replace this with a lazy load.
+ data.read(ByteBuffer.wrap(key));
+
+ return new TrieLeaf(key, value, false);
+ }
+
+ private static Map<Class, TrieNodeSizer> sizerMap = new HashMap<Class, TrieNodeSizer>();
+ static {
+ sizerMap.put(TrieBranch.class, new TrieBranchSizer());
+ sizerMap.put(TrieLeaf.class, new TrieLeafSizer());
+ }
+
+ protected static int totalIndexSize(TrieNode node) {
+ return sizerMap.get(node.getClass()).indexSize(node);
+ }
+
+ protected static int totalDataSize(TrieNode node) {
+ return sizerMap.get(node.getClass()).dataSize(node);
+ }
+
+ private static interface TrieNodeSizer {
+ public int indexSize(TrieNode node);
+ public int dataSize(TrieNode node);
+ }
+
+ private static class TrieBranchSizer implements TrieNodeSizer {
+ protected int memSize(TrieBranch branch) {
+ return 4 + 1 + 1 + branch.children.memSize() + (branch.term == null ? 0 : 2);
+ }
+
+ public int indexSize(TrieNode node) {
+ TrieBranch branch = (TrieBranch)node;
+ int total = memSize(branch);
+ if (branch.term != null) {
+ total += totalIndexSize(branch.term);
+ }
+ for (TrieNode child : branch.children) {
+ total += totalIndexSize(child);
+ }
+
+ return total;
+ }
+
+ public int dataSize(TrieNode node) {
+ TrieBranch branch = (TrieBranch)node;
+ int total = 0;
+ if (branch.term != null) {
+ total += totalDataSize(branch.term);
+ }
+ for (TrieNode child : branch.children) {
+ total += totalDataSize(child);
+ }
+
+ return total;
+ }
+ }
+
+ private static class TrieLeafSizer implements TrieNodeSizer {
+ public int indexSize(TrieNode node) {
+ return 1 + 1 + 8;
+ }
+
+ public int dataSize(TrieNode node) {
+ TrieLeaf leaf = (TrieLeaf)node;
+ return 8 + 4 + leaf.key.length;
+ }
+ }
+}
Deleted: projects/xa2/object-pool/src/trie/CompBlockTrieTest.java
===================================================================
--- projects/xa2/object-pool/src/trie/CompBlockTrieTest.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/trie/CompBlockTrieTest.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -1,462 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 9th April 2008
- */
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.RandomAccessFile;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-
-/**
- * Basic tests of the CompBlockTrie.
- */
-public class CompBlockTrieTest {
- public static void main(String[] args) throws Exception {
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 1);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 2);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 3);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 4);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 5);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 6);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 7);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 8);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 9);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), 10);
- testWithLimitedFile(new File("../scratch/propernames.uniq"), Integer.MAX_VALUE);
- testWithLimitedFile(new File("../scratch/connectives.uniq"), Integer.MAX_VALUE);
- testWithLimitedFile(new File("../scratch/web2a.uniq"), Integer.MAX_VALUE);
- testWithLimitedFile(new File("../scratch/web2.uniq"), Integer.MAX_VALUE);
- testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
- testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
- testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
-// testLoadWithRandomFile(new File("/dev/urandom"));
- }
-
- private static final int BLOCK_SIZE = 32 * 1024;
- public static void testWithLimitedFile(File file, int limit) throws Exception {
- Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
-
- FileChannel indexFile = new FileOutputStream("tmp/sp_idx").getChannel();
- FileChannel dataFile = new FileOutputStream("tmp/sp_dat").getChannel();
- ByteBuffer indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
-
- ArrayList<CompBlockTrie> tries = new ArrayList<CompBlockTrie>();
- CompBlockTrie trie = new CompBlockTrie(BLOCK_SIZE);
-
- System.out.println("Inserting " + limit + " lines from " + file);
-
- BufferedReader names = new BufferedReader(new FileReader(file));
- long n = 0xDEADBEEF00000000L;
- String name = names.readLine();
- long _startInsert = System.currentTimeMillis();
- for (int i = 0; i < limit; i++) {
- // The use of the double loop means we pay the cost of setting up the exception handler only once per
- // block, not once per entry.
- if (name == null) {
- break;
- }
- if (trie.insert(name.getBytes(), n)) {
- // Note: exception thrown here if trie is full. So name will remain valid when we reenter loop with
- // a new trie.
- namesMap.put(name.getBytes(), n);
- name = names.readLine();
- n++;
- } else {
- trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
- indexFile.write((ByteBuffer)indexBuffer.flip());
- tries.add(trie);
- trie = new CompBlockTrie(BLOCK_SIZE);
- }
- }
- trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
- indexFile.write((ByteBuffer)indexBuffer.flip());
- tries.add(trie);
-
- indexFile.force(true);
- dataFile.force(true);
- indexFile.close();
- dataFile.close();
-
- long _endInsert = System.currentTimeMillis();
- names.close();
-
- indexFile = new FileInputStream("tmp/sp_idx").getChannel();
- dataFile = new FileInputStream("tmp/sp_dat").getChannel();
- indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
- ArrayList<CompBlockTrie> readTries = new ArrayList<CompBlockTrie>();
-
- long _startRead = System.currentTimeMillis();
-
- while (indexFile.read((ByteBuffer)indexBuffer.clear()) != -1) {
- indexBuffer.flip();
- CompBlockTrie ctrie = new CompBlockTrie(indexBuffer, dataFile);
- readTries.add(ctrie);
- }
-
- long _endRead = System.currentTimeMillis();
-
- System.out.println("Checking lines from " + file);
- long _startLookup = System.currentTimeMillis();
-
- boolean[] valid = new boolean[1];
-
- for (byte[] key : namesMap.keySet()) {
- boolean found = false;
- for (CompBlockTrie t : tries) {
- long value = t.lookup(key, valid);
- if (valid[0] && value == namesMap.get(key)) {
- found = true;
- break;
- }
- }
- if (!found) {
- throw new IllegalStateException("Trie doesn't match Map");
- }
- found = false;
- for (CompBlockTrie t : readTries) {
- long value = t.lookup(key, valid);
- if (valid[0] && value == namesMap.get(key)) {
- found = true;
- break;
- }
- }
- if (!found) {
- throw new IllegalStateException("Read-Trie doesn't match Map");
- }
- }
-
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) +
- " read:" + (_endRead - _startRead) +
- " lookup:" + (_endLookup - _startLookup));
- }
-
- public static class Bytes {
- public final byte[] bytes;
-
- public Bytes(byte[] bytes) {
- this.bytes = new byte[bytes.length];
- System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
- }
-
- public byte[] getBytes() {
- return bytes;
- }
-
- public boolean equals(Object o) {
- if (o instanceof Bytes) {
- return Arrays.equals(bytes, ((Bytes)o).bytes);
- } else {
- return false;
- }
- }
-
- public int hashCode() {
- int hc = Arrays.hashCode(bytes);
- return hc;
- }
-
- public String toString() {
- return Arrays.toString(bytes);
- }
- }
-
- public static void testWithRandomFileTuned(File file, int max, int small) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- CompBlockTrie namesTrie = new CompBlockTrie(32*1024);
-
- System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(134*1024);
-
- FileChannel indexFile = new FileOutputStream("tmp/sp_idx").getChannel();
- FileChannel dataFile = new FileOutputStream("tmp/sp_dat").getChannel();
- ByteBuffer indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
-
- ArrayList<CompBlockTrie> tries = new ArrayList<CompBlockTrie>();
- CompBlockTrie trie = new CompBlockTrie(BLOCK_SIZE);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nL = 0;
- int nS = 0;
-
- while (n < max) {
- if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (!namesMap.containsKey(keyBytes)) {
- n++;
- namesMap.put(keyBytes, n);
- long _si = System.currentTimeMillis();
-
- if (!trie.insert(key, n)) {
- trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
- indexFile.write((ByteBuffer)indexBuffer.flip());
- tries.add(trie);
- trie = new CompBlockTrie(BLOCK_SIZE);
- trie.insert(key, n);
- }
-
- n++;
-
- _aggregateL += System.currentTimeMillis() - _si;
- _avlL += key.length;
- nL++;
- }
-
- for (int i = 0; i < small; i++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (!namesMap.containsKey(keyBytes)) {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
-
- if (!trie.insert(key, n)) {
- trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
- indexFile.write((ByteBuffer)indexBuffer.flip());
- tries.add(trie);
- trie = new CompBlockTrie(BLOCK_SIZE);
- trie.insert(key, n);
- }
- n++;
-
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
- }
- }
- }
-
- trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
- indexFile.write((ByteBuffer)indexBuffer.flip());
- tries.add(trie);
-
- indexFile.force(true);
- dataFile.force(true);
- indexFile.close();
- dataFile.close();
-
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
- System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
- System.out.println(chan.position() + " bytes read from " + file);
- chan.close();
-
- indexFile = new FileInputStream("tmp/sp_idx").getChannel();
- dataFile = new FileInputStream("tmp/sp_dat").getChannel();
- indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
- ArrayList<CompBlockTrie> readTries = new ArrayList<CompBlockTrie>();
-
- long _startRead = System.currentTimeMillis();
-
- while (indexFile.read((ByteBuffer)indexBuffer.clear()) != -1) {
- indexBuffer.flip();
- CompBlockTrie ctrie = new CompBlockTrie(indexBuffer, dataFile);
- readTries.add(ctrie);
- }
-
- long _endRead = System.currentTimeMillis();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
-
- boolean[] valid = new boolean[1];
-
- for (Bytes keyBytes : namesMap.keySet()) {
- int i = 0;
- boolean found = false;
- for (CompBlockTrie t : readTries) {
- long value = t.lookup(keyBytes.getBytes(), valid);
- if (valid[0] && value == namesMap.get(keyBytes)) {
- found = true;
- break;
- }
- }
- if (!found) {
- throw new IllegalStateException("Trie doesn't match Map on entry: " + i
- + " key:" + keyBytes + " value: " + namesMap.get(keyBytes));
- }
- i++;
- }
-
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) +
- " read:" + (_endRead - _startRead) +
- " lookup:" + (_endLookup - _startLookup));
- }
-
- public static void testLoadWithRandomFile(File file) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- CompMemTrie namesTrie = new CompMemTrie();
-
- System.out.println("Inserting random bytes from " + file);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- int nL = 0;
- long _aggregateLL = 0;
- long _avlLL = 0;
- int nLL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nS = 0;
- long _aggregateSS = 0;
- long _avlSS = 0;
- int nSS = 0;
-
- for (int i = 0; i < 100; i++) {
- if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[buffer.getInt() & 0x0007FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sll = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateLL += System.currentTimeMillis() - _sll;
- _avlLL += key.length;
- nLL++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int j = 0; j < 10; j++) {
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sl = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateL += System.currentTimeMillis() - _sl;
- _avlL += key.length;
- nL++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int k = 0; k < 10; k++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int ii = 0; ii < 1000; ii++) {
- key = new byte[((int)buffer.get()) & 0x0000001F];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _sss;
- _avlSS += key.length;
- nSS++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
- }
- }
- }
- }
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.printf(" %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
- System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
- System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
- System.out.printf(" %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
- chan.close();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
- for (Bytes k : namesMap.keySet()) {
- int i = 0;
- if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
- throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
- }
- i++;
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-}
Copied: projects/xa2/object-pool/src/trie/CompBlockTrieTest.java.bak (from rev 915, projects/xa2/object-pool/src/trie/CompBlockTrieTest.java)
===================================================================
--- projects/xa2/object-pool/src/trie/CompBlockTrieTest.java.bak (rev 0)
+++ projects/xa2/object-pool/src/trie/CompBlockTrieTest.java.bak 2008-07-07 05:46:46 UTC (rev 1055)
@@ -0,0 +1,462 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 9th April 2008
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+/**
+ * Basic tests of the CompBlockTrie.
+ */
+public class CompBlockTrieTest {
+ public static void main(String[] args) throws Exception {
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 1);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 2);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 3);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 4);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 5);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 6);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 7);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 8);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 9);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 10);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), Integer.MAX_VALUE);
+ testWithLimitedFile(new File("../scratch/connectives.uniq"), Integer.MAX_VALUE);
+ testWithLimitedFile(new File("../scratch/web2a.uniq"), Integer.MAX_VALUE);
+ testWithLimitedFile(new File("../scratch/web2.uniq"), Integer.MAX_VALUE);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
+// testLoadWithRandomFile(new File("/dev/urandom"));
+ }
+
+ private static final int BLOCK_SIZE = 32 * 1024;
+ public static void testWithLimitedFile(File file, int limit) throws Exception {
+ Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
+
+ FileChannel indexFile = new FileOutputStream("tmp/sp_idx").getChannel();
+ FileChannel dataFile = new FileOutputStream("tmp/sp_dat").getChannel();
+ ByteBuffer indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
+
+ ArrayList<CompBlockTrie> tries = new ArrayList<CompBlockTrie>();
+ CompBlockTrie trie = new CompBlockTrie(BLOCK_SIZE);
+
+ System.out.println("Inserting " + limit + " lines from " + file);
+
+ BufferedReader names = new BufferedReader(new FileReader(file));
+ long n = 0xDEADBEEF00000000L;
+ String name = names.readLine();
+ long _startInsert = System.currentTimeMillis();
+ for (int i = 0; i < limit; i++) {
+ // The use of the double loop means we pay the cost of setting up the exception handler only once per
+ // block, not once per entry.
+ if (name == null) {
+ break;
+ }
+ if (trie.insert(name.getBytes(), n)) {
+ // Note: exception thrown here if trie is full. So name will remain valid when we reenter loop with
+ // a new trie.
+ namesMap.put(name.getBytes(), n);
+ name = names.readLine();
+ n++;
+ } else {
+ trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
+ indexFile.write((ByteBuffer)indexBuffer.flip());
+ tries.add(trie);
+ trie = new CompBlockTrie(BLOCK_SIZE);
+ }
+ }
+ trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
+ indexFile.write((ByteBuffer)indexBuffer.flip());
+ tries.add(trie);
+
+ indexFile.force(true);
+ dataFile.force(true);
+ indexFile.close();
+ dataFile.close();
+
+ long _endInsert = System.currentTimeMillis();
+ names.close();
+
+ indexFile = new FileInputStream("tmp/sp_idx").getChannel();
+ dataFile = new FileInputStream("tmp/sp_dat").getChannel();
+ indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
+ ArrayList<CompBlockTrie> readTries = new ArrayList<CompBlockTrie>();
+
+ long _startRead = System.currentTimeMillis();
+
+ while (indexFile.read((ByteBuffer)indexBuffer.clear()) != -1) {
+ indexBuffer.flip();
+ CompBlockTrie ctrie = new CompBlockTrie(indexBuffer, dataFile);
+ readTries.add(ctrie);
+ }
+
+ long _endRead = System.currentTimeMillis();
+
+ System.out.println("Checking lines from " + file);
+ long _startLookup = System.currentTimeMillis();
+
+ boolean[] valid = new boolean[1];
+
+ for (byte[] key : namesMap.keySet()) {
+ boolean found = false;
+ for (CompBlockTrie t : tries) {
+ long value = t.lookup(key, valid);
+ if (valid[0] && value == namesMap.get(key)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ throw new IllegalStateException("Trie doesn't match Map");
+ }
+ found = false;
+ for (CompBlockTrie t : readTries) {
+ long value = t.lookup(key, valid);
+ if (valid[0] && value == namesMap.get(key)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ throw new IllegalStateException("Read-Trie doesn't match Map");
+ }
+ }
+
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) +
+ " read:" + (_endRead - _startRead) +
+ " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static class Bytes {
+ public final byte[] bytes;
+
+ public Bytes(byte[] bytes) {
+ this.bytes = new byte[bytes.length];
+ System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+ }
+
+ public byte[] getBytes() {
+ return bytes;
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof Bytes) {
+ return Arrays.equals(bytes, ((Bytes)o).bytes);
+ } else {
+ return false;
+ }
+ }
+
+ public int hashCode() {
+ int hc = Arrays.hashCode(bytes);
+ return hc;
+ }
+
+ public String toString() {
+ return Arrays.toString(bytes);
+ }
+ }
+
+ public static void testWithRandomFileTuned(File file, int max, int small) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ CompBlockTrie namesTrie = new CompBlockTrie(32*1024);
+
+ System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+ FileChannel indexFile = new FileOutputStream("tmp/sp_idx").getChannel();
+ FileChannel dataFile = new FileOutputStream("tmp/sp_dat").getChannel();
+ ByteBuffer indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
+
+ ArrayList<CompBlockTrie> tries = new ArrayList<CompBlockTrie>();
+ CompBlockTrie trie = new CompBlockTrie(BLOCK_SIZE);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nL = 0;
+ int nS = 0;
+
+ while (n < max) {
+ if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _si = System.currentTimeMillis();
+
+ if (!trie.insert(key, n)) {
+ trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
+ indexFile.write((ByteBuffer)indexBuffer.flip());
+ tries.add(trie);
+ trie = new CompBlockTrie(BLOCK_SIZE);
+ trie.insert(key, n);
+ }
+
+ n++;
+
+ _aggregateL += System.currentTimeMillis() - _si;
+ _avlL += key.length;
+ nL++;
+ }
+
+ for (int i = 0; i < small; i++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+
+ if (!trie.insert(key, n)) {
+ trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
+ indexFile.write((ByteBuffer)indexBuffer.flip());
+ tries.add(trie);
+ trie = new CompBlockTrie(BLOCK_SIZE);
+ trie.insert(key, n);
+ }
+ n++;
+
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+ }
+ }
+ }
+
+ trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
+ indexFile.write((ByteBuffer)indexBuffer.flip());
+ tries.add(trie);
+
+ indexFile.force(true);
+ dataFile.force(true);
+ indexFile.close();
+ dataFile.close();
+
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.println(chan.position() + " bytes read from " + file);
+ chan.close();
+
+ indexFile = new FileInputStream("tmp/sp_idx").getChannel();
+ dataFile = new FileInputStream("tmp/sp_dat").getChannel();
+ indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
+ ArrayList<CompBlockTrie> readTries = new ArrayList<CompBlockTrie>();
+
+ long _startRead = System.currentTimeMillis();
+
+ while (indexFile.read((ByteBuffer)indexBuffer.clear()) != -1) {
+ indexBuffer.flip();
+ CompBlockTrie ctrie = new CompBlockTrie(indexBuffer, dataFile);
+ readTries.add(ctrie);
+ }
+
+ long _endRead = System.currentTimeMillis();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+
+ boolean[] valid = new boolean[1];
+
+ for (Bytes keyBytes : namesMap.keySet()) {
+ int i = 0;
+ boolean found = false;
+ for (CompBlockTrie t : readTries) {
+ long value = t.lookup(keyBytes.getBytes(), valid);
+ if (valid[0] && value == namesMap.get(keyBytes)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i
+ + " key:" + keyBytes + " value: " + namesMap.get(keyBytes));
+ }
+ i++;
+ }
+
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) +
+ " read:" + (_endRead - _startRead) +
+ " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static void testLoadWithRandomFile(File file) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ CompMemTrie namesTrie = new CompMemTrie();
+
+ System.out.println("Inserting random bytes from " + file);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ int nL = 0;
+ long _aggregateLL = 0;
+ long _avlLL = 0;
+ int nLL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nS = 0;
+ long _aggregateSS = 0;
+ long _avlSS = 0;
+ int nSS = 0;
+
+ for (int i = 0; i < 100; i++) {
+ if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[buffer.getInt() & 0x0007FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sll = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateLL += System.currentTimeMillis() - _sll;
+ _avlLL += key.length;
+ nLL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int j = 0; j < 10; j++) {
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sl = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateL += System.currentTimeMillis() - _sl;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int k = 0; k < 10; k++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int ii = 0; ii < 1000; ii++) {
+ key = new byte[((int)buffer.get()) & 0x0000001F];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _sss;
+ _avlSS += key.length;
+ nSS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.printf(" %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+}
Deleted: projects/xa2/object-pool/src/trie/CompMemTrie.java
===================================================================
--- projects/xa2/object-pool/src/trie/CompMemTrie.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/trie/CompMemTrie.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -1,255 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 15th April 2008
- */
-
-import java.util.Arrays;
-
-/**
- * Implements an in-memory trie - uses ByteMaps to implement trie nodes.
- */
-public class CompMemTrie {
- @SuppressWarnings("serial")
- public static class NotFound extends Exception {};
- private static NotFound notfound = new NotFound();
-
- protected TrieNode root;
- public CompMemTrie() {
- this.root = null;
- }
-
- public boolean insert(byte[] key, long value) {
- if (root == null) {
- root = new TrieLeaf(key, value);
- } else {
- if (!root.insert(key, value)) {
- root = new TrieBranch(root, key, value);
- }
- }
-
- return true;
- }
-
- public long lookup(byte[] key) throws NotFound {
- boolean[] valid = new boolean[1];
- long result = lookup(key, valid);
- if (valid[0]) {
- return result;
- } else {
- throw notfound;
- }
- }
-
-
- public long lookup(byte[] key, boolean[] valid) {
- if (root == null) {
- valid[0] = false;
- return 0;
- } else {
- return root.lookup(key, valid);
- }
- }
-
- protected abstract static class TrieNode {
- protected TrieLeaf aLeaf;
-
- /**
- * @return false if we need to insert key above this node.
- */
- public boolean insert(byte[] key, long value) {
- return insert(new TrieLeaf(key, value), 0);
- }
-
- public long lookup(byte[] key, boolean[] valid) {
- return lookup(key, 0, valid);
- }
-
- protected abstract boolean insert(TrieLeaf node, int parentLcd);
- protected abstract long lookup(byte[] key, int parentLcd, boolean[] valid);
-
- protected static boolean regionMatches(byte[] lhs, int lhsOff, byte[] rhs, int rhsOff, int len) {
- if (lhsOff < 0 || rhsOff < 0) {
- return false;
- }
- if (lhs.length < lhsOff + len || rhs.length < rhsOff + len) {
- return false;
- }
- for (int i = 0; i < len; i++) {
- if (lhs[lhsOff + i] != rhs[rhsOff + i]) return false;
- }
-
- return true;
- }
- }
-
- protected static class TrieBranch extends TrieNode {
- protected int offset;
- protected ByteMap<TrieNode> children;
- protected TrieLeaf term;
-
- protected TrieBranch() {}
-
- public TrieBranch(byte[] key, long value) {
- this.children = new ByteMap<TrieNode>();
- this.term = new TrieLeaf(key, value);
- this.offset = term.key.length;
- this.aLeaf = this.term;
- }
-
- public TrieBranch(TrieNode oldRoot, byte[] key, long value) {
- this(oldRoot, new TrieLeaf(key, value));
- }
-
- private TrieBranch(TrieNode oldNode, TrieLeaf newNode) {
- super();
- assert oldNode != null;
- assert newNode != null;
-
- offset = 0;
- children = new ByteMap<TrieNode>();
- term = null;
-
- // as every child node shares a common lcp, any child will suffice when preparing the new
- // lcp/offset of the new node.
- aLeaf = newNode;
-
- byte[] lhs = oldNode.aLeaf.key;
- byte[] rhs = newNode.key;
-
- int i = 0;
- while (i < lhs.length && i < rhs.length) {
- if (lhs[i] != rhs[i]) {
- offset = i;
- children.insert(lhs[i], oldNode);
- children.insert(rhs[i], newNode);
-
- return; // Escape here.
- }
- i++;
- }
-
- if (i < lhs.length) {
- offset = i;
- children.insert(lhs[i], oldNode);
- term = newNode;
- } else if (i < rhs.length) {
- if (oldNode instanceof TrieLeaf) {
- offset = i;
- children.insert(rhs[i], newNode);
- term = (TrieLeaf)oldNode;
- } else {
- throw new IllegalStateException("Attempt to create new branch node with leaf > branch.");
- }
- } else {
- throw new IllegalStateException("Attempt to create new branch node with equal children");
- }
- }
-
- protected boolean insert(TrieLeaf node, int parentLcp) {
- if (!regionMatches(aLeaf.key, parentLcp, node.key, parentLcp, offset - parentLcp)) {
- return false;
- } else {
- // new node matches the lcp of this node.
- if (node.key.length == offset) {
- // new node is expected to terminate here.
- if (term == null) {
- term = node;
- } else {
- return term.insert(node, offset);
- }
- } else {
- // new node is expected to terminate in one of this nodes children.
- TrieNode child = children.lookup(node.key[offset]);
- if (child == null) {
- // this is the first node to be inserted on this branching key.
- children.insert(node.key[offset], node);
- } else {
- // there is an existing child node branching on this branching key.
- if (!child.insert(node, offset)) {
- children.insert(node.key[offset], new TrieBranch(child, node));
- }
- }
- }
-
- return true;
- }
- }
-
- protected long lookup(byte[] key, int parentLcd, boolean[] valid) {
- if (!regionMatches(aLeaf.key, parentLcd, key, parentLcd, offset - parentLcd)) {
- //FIXME: I need to migrate this to the end of the search to avoid needing access to the key at each level.
- valid[0] = false;
- return 0;
- } else {
- // new node matches the lcp of this node.
- TrieNode child;
- if (key.length == offset) {
- // new node is expected to terminate here.
- if (term != null) {
- valid[0] = true;
- return term.value;
- } else {
- valid[0] = false;
- return 0;
- }
- } else {
- // new node is expected to terminate in one of this nodes children.
- child = children.lookup(key[offset]);
- if (child != null) {
- return child.lookup(key, offset, valid);
- } else {
- valid[0] = false;
- return 0;
- }
- }
- }
- }
-
- public String toString() {
- return "Trie-BRANCH[" + (term != null) + " on " + offset + " with " + children.size() + " and aLeaf[" + aLeaf + "]";
- }
- }
-
- protected static class TrieLeaf extends TrieNode {
- final byte[] key;
- final long value;
-
- protected TrieLeaf(byte[] key, long value, boolean copyKey) {
- if (copyKey) {
- this.key = new byte[key.length];
- System.arraycopy(key, 0, this.key, 0, key.length);
- } else {
- this.key = key;
- }
- this.value = value;
- this.aLeaf = this;
- }
-
- public TrieLeaf(byte[] key, long value) {
- this(key, value, true);
- }
-
- protected boolean insert(TrieLeaf node, int parentLcp) {
- if (key.length != node.key.length) {
- return false;
- } else if (!regionMatches(key, parentLcp, node.key, parentLcp, key.length - parentLcp)) {
- return false;
- } else if (value == node.value) {
- return true; // Duplicate key/value pair.
- } else {
- throw new IllegalArgumentException("Attempt to insert multiple values for same key");
- }
- }
-
- protected long lookup(byte[] key, int parentLcd, boolean[] valid) {
- assert Arrays.equals(this.key, key);
- valid[0] = true;
- return value;
- }
-
- public String toString() {
- return "Trie-LEAF: " + Arrays.toString(key) + " -> " + value;
- }
- }
-}
Copied: projects/xa2/object-pool/src/trie/CompMemTrie.java.bak (from rev 915, projects/xa2/object-pool/src/trie/CompMemTrie.java)
===================================================================
--- projects/xa2/object-pool/src/trie/CompMemTrie.java.bak (rev 0)
+++ projects/xa2/object-pool/src/trie/CompMemTrie.java.bak 2008-07-07 05:46:46 UTC (rev 1055)
@@ -0,0 +1,255 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 15th April 2008
+ */
+
+import java.util.Arrays;
+
+/**
+ * Implements an in-memory trie - uses ByteMaps to implement trie nodes.
+ */
+public class CompMemTrie {
+ @SuppressWarnings("serial")
+ public static class NotFound extends Exception {};
+ private static NotFound notfound = new NotFound();
+
+ protected TrieNode root;
+ public CompMemTrie() {
+ this.root = null;
+ }
+
+ public boolean insert(byte[] key, long value) {
+ if (root == null) {
+ root = new TrieLeaf(key, value);
+ } else {
+ if (!root.insert(key, value)) {
+ root = new TrieBranch(root, key, value);
+ }
+ }
+
+ return true;
+ }
+
+ public long lookup(byte[] key) throws NotFound {
+ boolean[] valid = new boolean[1];
+ long result = lookup(key, valid);
+ if (valid[0]) {
+ return result;
+ } else {
+ throw notfound;
+ }
+ }
+
+
+ public long lookup(byte[] key, boolean[] valid) {
+ if (root == null) {
+ valid[0] = false;
+ return 0;
+ } else {
+ return root.lookup(key, valid);
+ }
+ }
+
+ protected abstract static class TrieNode {
+ protected TrieLeaf aLeaf;
+
+ /**
+ * @return false if we need to insert key above this node.
+ */
+ public boolean insert(byte[] key, long value) {
+ return insert(new TrieLeaf(key, value), 0);
+ }
+
+ public long lookup(byte[] key, boolean[] valid) {
+ return lookup(key, 0, valid);
+ }
+
+ protected abstract boolean insert(TrieLeaf node, int parentLcd);
+ protected abstract long lookup(byte[] key, int parentLcd, boolean[] valid);
+
+ protected static boolean regionMatches(byte[] lhs, int lhsOff, byte[] rhs, int rhsOff, int len) {
+ if (lhsOff < 0 || rhsOff < 0) {
+ return false;
+ }
+ if (lhs.length < lhsOff + len || rhs.length < rhsOff + len) {
+ return false;
+ }
+ for (int i = 0; i < len; i++) {
+ if (lhs[lhsOff + i] != rhs[rhsOff + i]) return false;
+ }
+
+ return true;
+ }
+ }
+
+ protected static class TrieBranch extends TrieNode {
+ protected int offset;
+ protected ByteMap<TrieNode> children;
+ protected TrieLeaf term;
+
+ protected TrieBranch() {}
+
+ public TrieBranch(byte[] key, long value) {
+ this.children = new ByteMap<TrieNode>();
+ this.term = new TrieLeaf(key, value);
+ this.offset = term.key.length;
+ this.aLeaf = this.term;
+ }
+
+ public TrieBranch(TrieNode oldRoot, byte[] key, long value) {
+ this(oldRoot, new TrieLeaf(key, value));
+ }
+
+ private TrieBranch(TrieNode oldNode, TrieLeaf newNode) {
+ super();
+ assert oldNode != null;
+ assert newNode != null;
+
+ offset = 0;
+ children = new ByteMap<TrieNode>();
+ term = null;
+
+ // as every child node shares a common lcp, any child will suffice when preparing the new
+ // lcp/offset of the new node.
+ aLeaf = newNode;
+
+ byte[] lhs = oldNode.aLeaf.key;
+ byte[] rhs = newNode.key;
+
+ int i = 0;
+ while (i < lhs.length && i < rhs.length) {
+ if (lhs[i] != rhs[i]) {
+ offset = i;
+ children.insert(lhs[i], oldNode);
+ children.insert(rhs[i], newNode);
+
+ return; // Escape here.
+ }
+ i++;
+ }
+
+ if (i < lhs.length) {
+ offset = i;
+ children.insert(lhs[i], oldNode);
+ term = newNode;
+ } else if (i < rhs.length) {
+ if (oldNode instanceof TrieLeaf) {
+ offset = i;
+ children.insert(rhs[i], newNode);
+ term = (TrieLeaf)oldNode;
+ } else {
+ throw new IllegalStateException("Attempt to create new branch node with leaf > branch.");
+ }
+ } else {
+ throw new IllegalStateException("Attempt to create new branch node with equal children");
+ }
+ }
+
+ protected boolean insert(TrieLeaf node, int parentLcp) {
+ if (!regionMatches(aLeaf.key, parentLcp, node.key, parentLcp, offset - parentLcp)) {
+ return false;
+ } else {
+ // new node matches the lcp of this node.
+ if (node.key.length == offset) {
+ // new node is expected to terminate here.
+ if (term == null) {
+ term = node;
+ } else {
+ return term.insert(node, offset);
+ }
+ } else {
+ // new node is expected to terminate in one of this nodes children.
+ TrieNode child = children.lookup(node.key[offset]);
+ if (child == null) {
+ // this is the first node to be inserted on this branching key.
+ children.insert(node.key[offset], node);
+ } else {
+ // there is an existing child node branching on this branching key.
+ if (!child.insert(node, offset)) {
+ children.insert(node.key[offset], new TrieBranch(child, node));
+ }
+ }
+ }
+
+ return true;
+ }
+ }
+
+ protected long lookup(byte[] key, int parentLcd, boolean[] valid) {
+ if (!regionMatches(aLeaf.key, parentLcd, key, parentLcd, offset - parentLcd)) {
+ //FIXME: I need to migrate this to the end of the search to avoid needing access to the key at each level.
+ valid[0] = false;
+ return 0;
+ } else {
+ // new node matches the lcp of this node.
+ TrieNode child;
+ if (key.length == offset) {
+ // new node is expected to terminate here.
+ if (term != null) {
+ valid[0] = true;
+ return term.value;
+ } else {
+ valid[0] = false;
+ return 0;
+ }
+ } else {
+ // new node is expected to terminate in one of this nodes children.
+ child = children.lookup(key[offset]);
+ if (child != null) {
+ return child.lookup(key, offset, valid);
+ } else {
+ valid[0] = false;
+ return 0;
+ }
+ }
+ }
+ }
+
+ public String toString() {
+ return "Trie-BRANCH[" + (term != null) + " on " + offset + " with " + children.size() + " and aLeaf[" + aLeaf + "]";
+ }
+ }
+
+ protected static class TrieLeaf extends TrieNode {
+ final byte[] key;
+ final long value;
+
+ protected TrieLeaf(byte[] key, long value, boolean copyKey) {
+ if (copyKey) {
+ this.key = new byte[key.length];
+ System.arraycopy(key, 0, this.key, 0, key.length);
+ } else {
+ this.key = key;
+ }
+ this.value = value;
+ this.aLeaf = this;
+ }
+
+ public TrieLeaf(byte[] key, long value) {
+ this(key, value, true);
+ }
+
+ protected boolean insert(TrieLeaf node, int parentLcp) {
+ if (key.length != node.key.length) {
+ return false;
+ } else if (!regionMatches(key, parentLcp, node.key, parentLcp, key.length - parentLcp)) {
+ return false;
+ } else if (value == node.value) {
+ return true; // Duplicate key/value pair.
+ } else {
+ throw new IllegalArgumentException("Attempt to insert multiple values for same key");
+ }
+ }
+
+ protected long lookup(byte[] key, int parentLcd, boolean[] valid) {
+ assert Arrays.equals(this.key, key);
+ valid[0] = true;
+ return value;
+ }
+
+ public String toString() {
+ return "Trie-LEAF: " + Arrays.toString(key) + " -> " + value;
+ }
+ }
+}
Deleted: projects/xa2/object-pool/src/trie/CompMemTrieTest.java
===================================================================
--- projects/xa2/object-pool/src/trie/CompMemTrieTest.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/trie/CompMemTrieTest.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -1,409 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 9th April 2008
- */
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-
-/**
- * Basic tests of the CompMemTrie.
- */
-public class CompMemTrieTest {
- public static void main(String[] args) throws Exception {
- testWithFile(new File("../scratch/propernames.uniq"));
- testWithFile(new File("../scratch/connectives.uniq"));
- testWithFile(new File("../scratch/web2a.uniq"));
- testWithFile(new File("../scratch/web2.uniq"));
- testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
- testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
- testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
- testWithRandomFile(new File("/dev/urandom"));
-// testLoadWithRandomFile(new File("/dev/urandom"));
- }
-
- public static void testWithFile(File file) throws Exception {
- Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
- CompMemTrie namesTrie = new CompMemTrie();
-
- System.out.println("Inserting lines from " + file);
- BufferedReader names = new BufferedReader(new FileReader(file));
- long n = 0;
- String name = names.readLine();
- long _startInsert = System.currentTimeMillis();
- while (name != null) {
- namesMap.put(name.getBytes(), n);
- namesTrie.insert(name.getBytes(), n);
- name = names.readLine();
- n++;
- }
- long _endInsert = System.currentTimeMillis();
- names.close();
-
- System.out.println("Checking " + n + " lines from " + file);
- long _startLookup = System.currentTimeMillis();
- for (byte[] key : namesMap.keySet()) {
- if (namesTrie.lookup(key) != namesMap.get(key)) {
- throw new IllegalStateException("Trie doesn't match Map");
- }
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-
- public static class Bytes {
- public final byte[] bytes;
-
- public Bytes(byte[] bytes) {
- this.bytes = new byte[bytes.length];
- System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
- }
-
- public boolean equals(Object o) {
- if (o instanceof Bytes) {
- return Arrays.equals(bytes, ((Bytes)o).bytes);
- } else {
- return false;
- }
- }
-
- public int hashCode() {
- int hc = Arrays.hashCode(bytes);
- return hc;
- }
- }
-
- public static void testWithRandomFile(File file) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- CompMemTrie namesTrie = new CompMemTrie();
-
- System.out.println("Inserting random bytes from " + file);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(134*1024);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nL = 0;
- int nS = 0;
-
- while (n < 5000) {
- if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (namesMap.containsKey(keyBytes)) {
-// namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _si = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateL += System.currentTimeMillis() - _si;
- _avlL += key.length;
- nL++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int i = 0; i < 10; i++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
-// namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
- }
- }
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
- System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
- System.out.println(chan.position() + " bytes read from " + file);
- chan.close();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
- for (Bytes k : namesMap.keySet()) {
- int i = 0;
- if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
- throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
- }
- i++;
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-
- public static void testWithRandomFileTuned(File file, int max, int small) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- CompMemTrie namesTrie = new CompMemTrie();
-
- System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(134*1024);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nL = 0;
- int nS = 0;
-
- while (n < max) {
- if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (namesMap.containsKey(keyBytes)) {
-// namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _si = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateL += System.currentTimeMillis() - _si;
- _avlL += key.length;
- nL++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int i = 0; i < small; i++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
-// namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
- }
- }
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
- System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
- System.out.println(chan.position() + " bytes read from " + file);
- chan.close();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
- for (Bytes k : namesMap.keySet()) {
- int i = 0;
- if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
- throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
- }
- i++;
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-
- public static void testLoadWithRandomFile(File file) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- CompMemTrie namesTrie = new CompMemTrie();
-
- System.out.println("Inserting random bytes from " + file);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- int nL = 0;
- long _aggregateLL = 0;
- long _avlLL = 0;
- int nLL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nS = 0;
- long _aggregateSS = 0;
- long _avlSS = 0;
- int nSS = 0;
-
- for (int i = 0; i < 100; i++) {
- if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[buffer.getInt() & 0x0007FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sll = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateLL += System.currentTimeMillis() - _sll;
- _avlLL += key.length;
- nLL++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int j = 0; j < 10; j++) {
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sl = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateL += System.currentTimeMillis() - _sl;
- _avlL += key.length;
- nL++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int k = 0; k < 10; k++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int ii = 0; ii < 1000; ii++) {
- key = new byte[((int)buffer.get()) & 0x0000001F];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (namesMap.containsKey(keyBytes)) {
- namesTrie.insert(key, namesMap.get(keyBytes));
- } else {
- n++;
- namesMap.put(keyBytes, n);
- long _sss = System.currentTimeMillis();
- namesTrie.insert(key, n);
- _aggregateS += System.currentTimeMillis() - _sss;
- _avlSS += key.length;
- nSS++;
-
- if (namesTrie.lookup(key) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
- }
- }
- }
- }
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.printf(" %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
- System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
- System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
- System.out.printf(" %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
- chan.close();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
- for (Bytes k : namesMap.keySet()) {
- int i = 0;
- if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
- throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
- }
- i++;
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-}
Copied: projects/xa2/object-pool/src/trie/CompMemTrieTest.java.bak (from rev 915, projects/xa2/object-pool/src/trie/CompMemTrieTest.java)
===================================================================
--- projects/xa2/object-pool/src/trie/CompMemTrieTest.java.bak (rev 0)
+++ projects/xa2/object-pool/src/trie/CompMemTrieTest.java.bak 2008-07-07 05:46:46 UTC (rev 1055)
@@ -0,0 +1,409 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 9th April 2008
+ */
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+/**
+ * Basic tests of the CompMemTrie.
+ */
+public class CompMemTrieTest {
+ public static void main(String[] args) throws Exception {
+ testWithFile(new File("../scratch/propernames.uniq"));
+ testWithFile(new File("../scratch/connectives.uniq"));
+ testWithFile(new File("../scratch/web2a.uniq"));
+ testWithFile(new File("../scratch/web2.uniq"));
+ testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
+ testWithRandomFile(new File("/dev/urandom"));
+// testLoadWithRandomFile(new File("/dev/urandom"));
+ }
+
+ public static void testWithFile(File file) throws Exception {
+ Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
+ CompMemTrie namesTrie = new CompMemTrie();
+
+ System.out.println("Inserting lines from " + file);
+ BufferedReader names = new BufferedReader(new FileReader(file));
+ long n = 0;
+ String name = names.readLine();
+ long _startInsert = System.currentTimeMillis();
+ while (name != null) {
+ namesMap.put(name.getBytes(), n);
+ namesTrie.insert(name.getBytes(), n);
+ name = names.readLine();
+ n++;
+ }
+ long _endInsert = System.currentTimeMillis();
+ names.close();
+
+ System.out.println("Checking " + n + " lines from " + file);
+ long _startLookup = System.currentTimeMillis();
+ for (byte[] key : namesMap.keySet()) {
+ if (namesTrie.lookup(key) != namesMap.get(key)) {
+ throw new IllegalStateException("Trie doesn't match Map");
+ }
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static class Bytes {
+ public final byte[] bytes;
+
+ public Bytes(byte[] bytes) {
+ this.bytes = new byte[bytes.length];
+ System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof Bytes) {
+ return Arrays.equals(bytes, ((Bytes)o).bytes);
+ } else {
+ return false;
+ }
+ }
+
+ public int hashCode() {
+ int hc = Arrays.hashCode(bytes);
+ return hc;
+ }
+ }
+
+ public static void testWithRandomFile(File file) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ CompMemTrie namesTrie = new CompMemTrie();
+
+ System.out.println("Inserting random bytes from " + file);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nL = 0;
+ int nS = 0;
+
+ while (n < 5000) {
+ if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (namesMap.containsKey(keyBytes)) {
+// namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _si = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateL += System.currentTimeMillis() - _si;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int i = 0; i < 10; i++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+// namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.println(chan.position() + " bytes read from " + file);
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static void testWithRandomFileTuned(File file, int max, int small) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ CompMemTrie namesTrie = new CompMemTrie();
+
+ System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nL = 0;
+ int nS = 0;
+
+ while (n < max) {
+ if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (namesMap.containsKey(keyBytes)) {
+// namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _si = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateL += System.currentTimeMillis() - _si;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int i = 0; i < small; i++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+// namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.println(chan.position() + " bytes read from " + file);
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static void testLoadWithRandomFile(File file) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ CompMemTrie namesTrie = new CompMemTrie();
+
+ System.out.println("Inserting random bytes from " + file);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ int nL = 0;
+ long _aggregateLL = 0;
+ long _avlLL = 0;
+ int nLL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nS = 0;
+ long _aggregateSS = 0;
+ long _avlSS = 0;
+ int nSS = 0;
+
+ for (int i = 0; i < 100; i++) {
+ if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[buffer.getInt() & 0x0007FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sll = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateLL += System.currentTimeMillis() - _sll;
+ _avlLL += key.length;
+ nLL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int j = 0; j < 10; j++) {
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sl = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateL += System.currentTimeMillis() - _sl;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int k = 0; k < 10; k++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int ii = 0; ii < 1000; ii++) {
+ key = new byte[((int)buffer.get()) & 0x0000001F];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _sss;
+ _avlSS += key.length;
+ nSS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
+ System.out.printf(" %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+}
Modified: projects/xa2/object-pool/src/trie/DiskTrie.java
===================================================================
--- projects/xa2/object-pool/src/trie/DiskTrie.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/trie/DiskTrie.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -29,8 +29,6 @@
static final byte TYPE_BRANCH_NOTERM = 0x02; // Indicates a trie branch without a termination entry..
static final byte TYPE_LEAF = 0x03; // Indicates a trie branch without a termination entry..
static final byte TYPE_ROOT_LOC = 0x04; // Indicates the following short indicates a branch.
- public final static int MAGIC = 0x00020001;
- public final static int INVERT_MAGIC = 0x01000200;
// Calculated as the worst case size per entry.
// Assumes a full leaf and branch per entry.
@@ -64,8 +62,8 @@
// inline; 16MB-64MB for delete-marked bulk managed; and over that as indirected individual files.
public DiskTrie(Block indexBlock) {
super();
- if (indexBlock.getBlockSize() > 1024) throw new IllegalArgumentException("Attempt to use Block < 1024bytes in DiskTrie");
- if (indexBlock.getBlockSize() <= 32*1024) throw new IllegalArgumentException("Attempt to use Block >32KB in DiskTrie");
+ if (indexBlock.getBlockSize() < 1024) throw new IllegalArgumentException("Attempt to use Block < 1024bytes in DiskTrie");
+ if (indexBlock.getBlockSize() > 32*1024) throw new IllegalArgumentException("Attempt to use Block >32KB in DiskTrie");
if (indexBlock.getBlockId() != Block.UNALLOCATED) throw new IllegalArgumentException("Attempt to reuse block in DiskTrie");
this.indexBlock = indexBlock;
@@ -105,7 +103,7 @@
rootLocation = indexBlock.getShort();
break;
default:
- throw new IllegalArgumentException("Invalid trie-node");
+ throw new IllegalArgumentException("Invalid trie-node type=" + type);
}
}
@@ -141,7 +139,6 @@
HashMap<TrieNode, Short> locationMap = new HashMap<TrieNode, Short>();
- indexBlock.putInt(MAGIC);
if (root != null) {
writeTrieNode(root, indexBlock, data, locationMap);
}
@@ -152,6 +149,9 @@
} else {
indexBlock.putShort((short)0x0000);
}
+
+ // Pushes block to disk.
+ indexBlock.write();
}
private static Map<Class, TrieNodeWriter> writerMap = new HashMap<Class, TrieNodeWriter>();
Added: projects/xa2/object-pool/src/trie/DiskTrieTest.java
===================================================================
--- projects/xa2/object-pool/src/trie/DiskTrieTest.java (rev 0)
+++ projects/xa2/object-pool/src/trie/DiskTrieTest.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -0,0 +1,447 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 9th April 2008
+ */
+package trie;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+import data.DataEntry;
+import index.IndexFile;
+import scheduler.FileHandleFactory;
+import scheduler.IOScheduler;
+import trie.DiskTrie;
+
+/**
+ * Basic tests of the DiskTrie.
+ */
+public class DiskTrieTest {
+ public static void main(String[] args) throws Exception {
+// testWithLimitedFile(new File("../scratch/propernames.uniq"), 1);
+// testWithLimitedFile(new File("../scratch/propernames.uniq"), 2);
+// testWithLimitedFile(new File("../scratch/propernames.uniq"), 3);
+// testWithLimitedFile(new File("../scratch/propernames.uniq"), 4);
+// testWithLimitedFile(new File("../scratch/propernames.uniq"), 5);
+// testWithLimitedFile(new File("../scratch/propernames.uniq"), 6);
+// testWithLimitedFile(new File("../scratch/propernames.uniq"), 7);
+// testWithLimitedFile(new File("../scratch/propernames.uniq"), 8);
+// testWithLimitedFile(new File("../scratch/propernames.uniq"), 9);
+// testWithLimitedFile(new File("../scratch/propernames.uniq"), 10);
+// testWithLimitedFile(new File("../scratch/propernames.uniq"), Integer.MAX_VALUE);
+// testWithLimitedFile(new File("../scratch/connectives.uniq"), Integer.MAX_VALUE);
+// testWithLimitedFile(new File("../scratch/web2a.uniq"), Integer.MAX_VALUE);
+// testWithLimitedFile(new File("../scratch/web2.uniq"), Integer.MAX_VALUE);
+// testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
+// testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
+ testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
+// testLoadWithRandomFile(new File("/dev/urandom"));
+ }
+
+ public static void testWithLimitedFile(File file, int limit) throws Exception {
+ Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
+
+ IOScheduler scheduler = new IOScheduler(new File("tmp"), new FileHandleFactory());
+
+ IndexFile indexFile = new IndexFile("sp." + file.getName() + limit, scheduler);
+
+ ArrayList<DiskTrie> tries = new ArrayList<DiskTrie>();
+ DiskTrie trie = indexFile.createEntry();
+
+ System.out.println("Inserting " + limit + " lines from " + file + " into index " + indexFile);
+
+ BufferedReader names = new BufferedReader(new FileReader(file));
+ long n = 0xDEADBEEF00000000L;
+ String name = names.readLine();
+ long _startInsert = System.currentTimeMillis();
+ for (int i = 0; i < limit; i++) {
+ if (name == null) {
+ break;
+ }
+ if (trie.insert(DataEntry.getEntry(name.getBytes(), n))) {
+ namesMap.put(name.getBytes(), n);
+ name = names.readLine();
+ n++;
+ } else {
+ indexFile.write(trie);
+ tries.add(trie);
+ trie = indexFile.createEntry();
+ }
+ }
+ indexFile.write(trie);
+ tries.add(trie);
+
+ indexFile.complete();
+ indexFile.close();
+
+ long _endInsert = System.currentTimeMillis();
+ names.close();
+
+ indexFile = new IndexFile("sp." + file.getName() + limit, scheduler);
+
+ ArrayList<DiskTrie> readTries = new ArrayList<DiskTrie>();
+
+ System.out.println("Reading trie data for " + file + " from index " + indexFile);
+
+ long _startRead = System.currentTimeMillis();
+
+ for (DiskTrie dtrie = indexFile.readEntry(); dtrie != null; dtrie = indexFile.readEntry()) {
+ readTries.add(dtrie);
+ }
+
+ long _endRead = System.currentTimeMillis();
+
+ System.out.println("Checking lines for " + file + " from index " + indexFile);
+ long _startLookup = System.currentTimeMillis();
+
+ boolean[] valid = new boolean[1];
+
+ for (byte[] key : namesMap.keySet()) {
+ boolean found = false;
+ for (DiskTrie t : tries) {
+ long value = t.lookup(DataEntry.getKey(key), valid);
+ if (valid[0] && value == namesMap.get(key)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ throw new IllegalStateException("Trie doesn't match Map");
+ }
+ found = false;
+ for (DiskTrie t : readTries) {
+ long value = t.lookup(DataEntry.getKey(key), valid);
+ if (valid[0] && value == namesMap.get(key)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ throw new IllegalStateException("Read-Trie doesn't match Map");
+ }
+ }
+
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) +
+ " read:" + (_endRead - _startRead) +
+ " lookup:" + (_endLookup - _startLookup));
+ }
+
+
+ public static class Bytes {
+ public final byte[] bytes;
+
+ public Bytes(byte[] bytes) {
+ this.bytes = new byte[bytes.length];
+ System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+ }
+
+ public byte[] getBytes() {
+ return bytes;
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof Bytes) {
+ return Arrays.equals(bytes, ((Bytes)o).bytes);
+ } else {
+ return false;
+ }
+ }
+
+ public int hashCode() {
+ int hc = Arrays.hashCode(bytes);
+ return hc;
+ }
+
+ public String toString() {
+ return Arrays.toString(bytes);
+ }
+ }
+
+ public static void testWithRandomFileTuned(File file, int max, int small) throws Exception {
+ System.out.println("Testing with Random File");
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ IOScheduler scheduler = new IOScheduler(new File("tmp"), new FileHandleFactory());
+ IndexFile indexFile = new IndexFile("sp." + file.getName() + max + "." + small, scheduler);
+ DiskTrie trie = indexFile.createEntry();
+
+ System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nL = 0;
+ int nS = 0;
+
+ while (n < max) {
+ if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _si = System.currentTimeMillis();
+
+ DataEntry entry = DataEntry.getEntry(key, n, true);
+ if (!trie.insert(entry)) {
+ indexFile.write(trie);
+ trie = indexFile.createEntry();
+ trie.insert(entry);
+ }
+
+ n++;
+
+ _aggregateL += System.currentTimeMillis() - _si;
+ _avlL += key.length;
+ nL++;
+ }
+
+ for (int i = 0; i < small; i++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+
+ DataEntry entry = DataEntry.getEntry(key, n, true);
+ if (!trie.insert(entry)) {
+ indexFile.write(trie);
+ trie = indexFile.createEntry();
+ trie.insert(entry);
+ }
+
+ n++;
+
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+ }
+ }
+ }
+
+ indexFile.write(trie);
+
+ indexFile.complete();
+ indexFile.close();
+
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.println(chan.position() + " bytes read from " + file);
+ chan.close();
+
+ indexFile = new IndexFile("sp." + file.getName() + max + "." + small, scheduler);
+ ArrayList<DiskTrie> readTries = new ArrayList<DiskTrie>();
+ System.out.println("Reading trie data for " + file + " from index " + indexFile);
+ long _startRead = System.currentTimeMillis();
+
+ for (DiskTrie dtrie = indexFile.readEntry(); dtrie != null; dtrie = indexFile.readEntry()) {
+ readTries.add(dtrie);
+ }
+
+ long _endRead = System.currentTimeMillis();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+
+ boolean[] valid = new boolean[1];
+
+ for (Bytes keyBytes : namesMap.keySet()) {
+ int i = 0;
+ boolean found = false;
+ for (DiskTrie t : readTries) {
+ long value = t.lookup(DataEntry.getKey(keyBytes.getBytes()), valid);
+ if (valid[0] && value == namesMap.get(keyBytes)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i
+ + " key:" + keyBytes + " value: " + namesMap.get(keyBytes));
+ }
+ i++;
+ }
+
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) +
+ " read:" + (_endRead - _startRead) +
+ " lookup:" + (_endLookup - _startLookup));
+ }
+
+/*
+ public static void testLoadWithRandomFile(File file) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ CompMemTrie namesTrie = new CompMemTrie();
+
+ System.out.println("Inserting random bytes from " + file);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ int nL = 0;
+ long _aggregateLL = 0;
+ long _avlLL = 0;
+ int nLL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nS = 0;
+ long _aggregateSS = 0;
+ long _avlSS = 0;
+ int nSS = 0;
+
+ for (int i = 0; i < 100; i++) {
+ if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[buffer.getInt() & 0x0007FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sll = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateLL += System.currentTimeMillis() - _sll;
+ _avlLL += key.length;
+ nLL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int j = 0; j < 10; j++) {
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sl = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateL += System.currentTimeMillis() - _sl;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int k = 0; k < 10; k++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int ii = 0; ii < 1000; ii++) {
+ key = new byte[((int)buffer.get()) & 0x0000001F];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (namesMap.containsKey(keyBytes)) {
+ namesTrie.insert(key, namesMap.get(keyBytes));
+ } else {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sss = System.currentTimeMillis();
+ namesTrie.insert(key, n);
+ _aggregateS += System.currentTimeMillis() - _sss;
+ _avlSS += key.length;
+ nSS++;
+
+ if (namesTrie.lookup(key) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.printf(" %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(k.bytes) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" + Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(k.bytes) + " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+*/
+}
Deleted: projects/xa2/object-pool/src/trie/MemoryTrieTest.java
===================================================================
--- projects/xa2/object-pool/src/trie/MemoryTrieTest.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/trie/MemoryTrieTest.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -1,413 +0,0 @@
-/*
- * Copyright Topaz Foundation 2008
- * Author Andrae Muys
- * Date 9th May 2008
- */
-package trie;
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileReader;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-
-import data.DataEntry;
-
-/**
- * Basic tests of the MemoryTrie
- */
-public class MemoryTrieTest {
- public static void main(String[] args) throws Exception {
- testWithFile(new File("../scratch/propernames.uniq"), 0);
- testWithFile(new File("../scratch/propernames.uniq"), 1);
- testWithFile(new File("../scratch/propernames.uniq"), 10);
- testWithFile(new File("../scratch/propernames.uniq"), 100);
- testWithFile(new File("../scratch/propernames.uniq"), 1000);
- testWithFile(new File("../scratch/propernames.uniq"), 10000);
- testWithFile(new File("../scratch/connectives.uniq"), 10000);
- testWithFile(new File("../scratch/web2a.uniq"), 100000);
- testWithFile(new File("../scratch/web2.uniq"), 1000000);
- testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
- testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
- testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
- testWithRandomFile(new File("/dev/urandom"));
- testLoadWithRandomFile(new File("/dev/urandom"));
- }
-
-
- public static void testWithFile(File file, int max) throws Exception {
- Set<DataEntry> namesSet = new HashSet<DataEntry>();
- MemoryTrie namesTrie = new MemoryTrie();
-
- System.out.println("Inserting lines from " + file);
- BufferedReader names = new BufferedReader(new FileReader(file));
- long n = 0;
- String name = names.readLine();
- long _startInsert = System.currentTimeMillis();
- while (name != null && n < max) {
- DataEntry entry = DataEntry.getEntry(name.getBytes(), n);
- namesSet.add(entry);
- namesTrie.insert(entry);
- name = names.readLine();
- n++;
- }
- long _endInsert = System.currentTimeMillis();
- names.close();
-
- System.out.println("Checking " + n + " lines from " + file);
- long _startLookup = System.currentTimeMillis();
- for (DataEntry key : namesSet) {
- if (namesTrie.lookup(key) != key.getValue()) {
- throw new IllegalStateException("Trie doesn't match Map");
- }
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-
-
- public static class Bytes {
- public final byte[] bytes;
-
- public Bytes(byte[] bytes) {
- this.bytes = new byte[bytes.length];
- System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
- }
-
- public boolean equals(Object o) {
- if (o instanceof Bytes) {
- return Arrays.equals(bytes, ((Bytes)o).bytes);
- } else {
- return false;
- }
- }
-
- public int hashCode() {
- int hc = Arrays.hashCode(bytes);
- return hc;
- }
- }
-
- public static void testWithRandomFile(File file) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- MemoryTrie namesTrie = new MemoryTrie();
-
- System.out.println("Inserting random bytes from " + file);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(134*1024);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nL = 0;
- int nS = 0;
-
- while (n < 5000) {
- if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (!namesMap.containsKey(keyBytes)) {
- n++;
- namesMap.put(keyBytes, n);
- long _si = System.currentTimeMillis();
- namesTrie.insert(DataEntry.getEntry(key, n));
- _aggregateL += System.currentTimeMillis() - _si;
- _avlL += key.length;
- nL++;
-
- if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int i = 0; i < 10; i++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (!namesMap.containsKey(keyBytes)) {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
- namesTrie.insert(DataEntry.getEntry(key, n));
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
-
- if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
- }
- }
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
- System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
- System.out.println(chan.position() + " bytes read from " + file);
- chan.close();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
- for (Bytes k : namesMap.keySet()) {
- int i = 0;
- if (namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) != namesMap.get(k)) {
- throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" +
- Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) +
- " value': " + namesMap.get(k));
- }
- i++;
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-
-
- public static void testWithRandomFileTuned(File file, int max, int small) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- MemoryTrie namesTrie = new MemoryTrie();
-
- System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(134*1024);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nL = 0;
- int nS = 0;
-
- while (n < max) {
- if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (!namesMap.containsKey(keyBytes)) {
- n++;
- namesMap.put(keyBytes, n);
- long _si = System.currentTimeMillis();
- namesTrie.insert(DataEntry.getEntry(key, n));
- _aggregateL += System.currentTimeMillis() - _si;
- _avlL += key.length;
- nL++;
-
- if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int i = 0; i < small; i++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (!namesMap.containsKey(keyBytes)) {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
- namesTrie.insert(DataEntry.getEntry(key, n));
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
-
- if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
- }
- }
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
- System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
- System.out.println(chan.position() + " bytes read from " + file);
- chan.close();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
- for (Bytes k : namesMap.keySet()) {
- int i = 0;
- if (namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) != namesMap.get(k)) {
- throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" +
- Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) +
- " value': " + namesMap.get(k));
- }
- i++;
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-
- public static void testLoadWithRandomFile(File file) throws Exception {
- Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
- MemoryTrie namesTrie = new MemoryTrie();
-
- System.out.println("Inserting random bytes from " + file);
- FileChannel chan = new FileInputStream(file).getChannel();
- ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
-
- long n = 0;
- byte[] key;
- // Set limit to 0 so initial compact works correctly.
- buffer.clear().limit(0);
- long _startInsert = System.currentTimeMillis();
- long _aggregateL = 0;
- long _avlL = 0;
- int nL = 0;
- long _aggregateLL = 0;
- long _avlLL = 0;
- int nLL = 0;
- long _aggregateS = 0;
- long _avlS = 0;
- int nS = 0;
- long _aggregateSS = 0;
- long _avlSS = 0;
- int nSS = 0;
-
- for (int i = 0; i < 200; i++) {
- if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
- break;
- }
- buffer.flip();
-
- key = new byte[buffer.getInt() & 0x0007FFFF];
- buffer.get(key);
- Bytes keyBytes = new Bytes(key);
-
- if (!namesMap.containsKey(keyBytes)) {
- n++;
- namesMap.put(keyBytes, n);
- long _sll = System.currentTimeMillis();
- namesTrie.insert(DataEntry.getEntry(key, n));
- _aggregateLL += System.currentTimeMillis() - _sll;
- _avlLL += key.length;
- nLL++;
-
- if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int j = 0; j < 10; j++) {
- key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (!namesMap.containsKey(keyBytes)) {
- n++;
- namesMap.put(keyBytes, n);
- long _sl = System.currentTimeMillis();
- namesTrie.insert(DataEntry.getEntry(key, n));
- _aggregateL += System.currentTimeMillis() - _sl;
- _avlL += key.length;
- nL++;
-
- if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int k = 0; k < 20; k++) {
- key = new byte[((int)buffer.get()) & 0x000000FF];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (!namesMap.containsKey(keyBytes)) {
- n++;
- namesMap.put(keyBytes, n);
- long _ss = System.currentTimeMillis();
- namesTrie.insert(DataEntry.getEntry(key, n));
- _aggregateS += System.currentTimeMillis() - _ss;
- _avlS += key.length;
- nS++;
-
- if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
-
- for (int ii = 0; ii < 50; ii++) {
- key = new byte[((int)buffer.get()) & 0x0000001F];
- buffer.get(key);
- keyBytes = new Bytes(key);
- if (!namesMap.containsKey(keyBytes)) {
- n++;
- namesMap.put(keyBytes, n);
- long _sss = System.currentTimeMillis();
- namesTrie.insert(DataEntry.getEntry(key, n));
- _aggregateSS += System.currentTimeMillis() - _sss;
- _avlSS += key.length;
- nSS++;
-
- if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
- throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
- }
- }
- }
- }
- }
- }
- long _endInsert = System.currentTimeMillis();
- System.out.println("Input " + namesMap.size() + " entries");
- System.out.printf(" %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
- System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
- System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
- System.out.printf(" %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
- nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
- chan.close();
-
- long _startLookup = System.currentTimeMillis();
- System.out.println("Checking random bytes from " + file);
- for (Bytes k : namesMap.keySet()) {
- int i = 0;
- if (namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) != namesMap.get(k)) {
- throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" +
- Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) +
- " value': " + namesMap.get(k));
- }
- i++;
- }
- long _endLookup = System.currentTimeMillis();
-
- System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
- }
-}
Copied: projects/xa2/object-pool/src/trie/MemoryTrieUnitTest.java (from rev 915, projects/xa2/object-pool/src/trie/MemoryTrieTest.java)
===================================================================
--- projects/xa2/object-pool/src/trie/MemoryTrieUnitTest.java (rev 0)
+++ projects/xa2/object-pool/src/trie/MemoryTrieUnitTest.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -0,0 +1,445 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 9th May 2008
+ */
+package trie;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+import data.DataEntry;
+
+/**
+ * Basic tests of the MemoryTrie
+ */
+public class MemoryTrieUnitTest extends TestCase {
+ public static TestSuite suite() {
+ TestSuite suite = new TestSuite();
+ suite.addTest(new MemoryTrieUnitTest(new File("scratch/propernames.uniq"), 0));
+ suite.addTest(new MemoryTrieUnitTest(new File("scratch/propernames.uniq"), 1));
+ suite.addTest(new MemoryTrieUnitTest(new File("scratch/propernames.uniq"), 10));
+ suite.addTest(new MemoryTrieUnitTest(new File("scratch/propernames.uniq"), 100));
+ suite.addTest(new MemoryTrieUnitTest(new File("scratch/propernames.uniq"), 1000));
+ suite.addTest(new MemoryTrieUnitTest(new File("scratch/propernames.uniq"), 10000));
+ suite.addTest(new MemoryTrieUnitTest(new File("scratch/connectives.uniq"), 10000));
+ suite.addTest(new MemoryTrieUnitTest(new File("scratch/web2a.uniq"), 100000));
+ suite.addTest(new MemoryTrieUnitTest(new File("scratch/web2.uniq"), 1000000));
+
+ suite.addTest(new MemoryTrieUnitTest(new File("bulk/random70M"), 12, 10));
+ suite.addTest(new MemoryTrieUnitTest(new File("bulk/random70M"), 250, 10));
+ suite.addTest(new MemoryTrieUnitTest(new File("bulk/random70M"), 5000, 10));
+
+ suite.addTest(new MemoryTrieUnitTest(new File("/dev/urandom")));
+ suite.addTest(new MemoryTrieUnitTest(new File("/dev/urandom")));
+
+ return suite;
+ }
+
+ private File file;
+ private int max;
+ private int small;
+
+ public MemoryTrieUnitTest(File file) {
+ super("testWithRandomFile");
+
+ this.file = file;
+ }
+
+ public MemoryTrieUnitTest(File file, int max) {
+ super("testWithFile");
+
+ this.file = file;
+ this.max = max;
+ }
+
+ public MemoryTrieUnitTest(File file, int max, int small) {
+ super("testWithRandomFileTuned");
+
+ this.file = file;
+ this.max = max;
+ this.small = small;
+ }
+
+ public void testWithFile() throws Exception {
+ Set<DataEntry> namesSet = new HashSet<DataEntry>();
+ MemoryTrie namesTrie = new MemoryTrie();
+
+ System.out.println("Inserting lines from " + file);
+ BufferedReader names = new BufferedReader(new FileReader(file));
+ long n = 0;
+ String name = names.readLine();
+ long _startInsert = System.currentTimeMillis();
+ while (name != null && n < max) {
+ DataEntry entry = DataEntry.getEntry(name.getBytes(), n);
+ namesSet.add(entry);
+ namesTrie.insert(entry);
+ name = names.readLine();
+ n++;
+ }
+ long _endInsert = System.currentTimeMillis();
+ names.close();
+
+ System.out.println("Checking " + n + " lines from " + file);
+ long _startLookup = System.currentTimeMillis();
+ for (DataEntry key : namesSet) {
+ if (namesTrie.lookup(key) != key.getValue()) {
+ throw new IllegalStateException("Trie doesn't match Map");
+ }
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static class Bytes {
+ public final byte[] bytes;
+
+ public Bytes(byte[] bytes) {
+ this.bytes = new byte[bytes.length];
+ System.arraycopy(bytes, 0, this.bytes, 0, bytes.length);
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof Bytes) {
+ return Arrays.equals(bytes, ((Bytes)o).bytes);
+ } else {
+ return false;
+ }
+ }
+
+ public int hashCode() {
+ int hc = Arrays.hashCode(bytes);
+ return hc;
+ }
+ }
+
+ public void testWithRandomFile() throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ MemoryTrie namesTrie = new MemoryTrie();
+
+ System.out.println("Inserting random bytes from " + file);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nL = 0;
+ int nS = 0;
+
+ while (n < 5000) {
+ if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _si = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateL += System.currentTimeMillis() - _si;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int i = 0; i < 10; i++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.println(chan.position() + " bytes read from " + file);
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" +
+ Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) +
+ " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+
+ public void testWithRandomFileTuned() throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ MemoryTrie namesTrie = new MemoryTrie();
+
+ System.out.println("Inserting random bytes from " + file + " to max records " + max + " with small " + small);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(134*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nL = 0;
+ int nS = 0;
+
+ while (n < max) {
+ if (buffer.remaining() < 67*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _si = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateL += System.currentTimeMillis() - _si;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int i = 0; i < small; i++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.println(chan.position() + " bytes read from " + file);
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" +
+ Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) +
+ " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+
+ public static void testLoadWithRandomFile(File file) throws Exception {
+ Map<Bytes, Long> namesMap = new HashMap<Bytes, Long>();
+ MemoryTrie namesTrie = new MemoryTrie();
+
+ System.out.println("Inserting random bytes from " + file);
+ FileChannel chan = new FileInputStream(file).getChannel();
+ ByteBuffer buffer = ByteBuffer.allocate(8*1024*1024);
+
+ long n = 0;
+ byte[] key;
+ // Set limit to 0 so initial compact works correctly.
+ buffer.clear().limit(0);
+ long _startInsert = System.currentTimeMillis();
+ long _aggregateL = 0;
+ long _avlL = 0;
+ int nL = 0;
+ long _aggregateLL = 0;
+ long _avlLL = 0;
+ int nLL = 0;
+ long _aggregateS = 0;
+ long _avlS = 0;
+ int nS = 0;
+ long _aggregateSS = 0;
+ long _avlSS = 0;
+ int nSS = 0;
+
+ for (int i = 0; i < 200; i++) {
+ if (buffer.remaining() < 2*1024*1024 && chan.read(buffer.compact()) == -1) {
+ break;
+ }
+ buffer.flip();
+
+ key = new byte[buffer.getInt() & 0x0007FFFF];
+ buffer.get(key);
+ Bytes keyBytes = new Bytes(key);
+
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sll = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateLL += System.currentTimeMillis() - _sll;
+ _avlLL += key.length;
+ nLL++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int j = 0; j < 10; j++) {
+ key = new byte[((int)buffer.getShort()) & 0x0000FFFF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sl = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateL += System.currentTimeMillis() - _sl;
+ _avlL += key.length;
+ nL++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int k = 0; k < 20; k++) {
+ key = new byte[((int)buffer.get()) & 0x000000FF];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _ss = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateS += System.currentTimeMillis() - _ss;
+ _avlS += key.length;
+ nS++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+
+ for (int ii = 0; ii < 50; ii++) {
+ key = new byte[((int)buffer.get()) & 0x0000001F];
+ buffer.get(key);
+ keyBytes = new Bytes(key);
+ if (!namesMap.containsKey(keyBytes)) {
+ n++;
+ namesMap.put(keyBytes, n);
+ long _sss = System.currentTimeMillis();
+ namesTrie.insert(DataEntry.getEntry(key, n));
+ _aggregateSS += System.currentTimeMillis() - _sss;
+ _avlSS += key.length;
+ nSS++;
+
+ if (namesTrie.lookup(DataEntry.getEntry(key, 0)) != n) {
+ throw new IllegalStateException("lookup failed key: " + Arrays.toString(key) + " value: " + n);
+ }
+ }
+ }
+ }
+ }
+ }
+ long _endInsert = System.currentTimeMillis();
+ System.out.println("Input " + namesMap.size() + " entries");
+ System.out.printf(" %d very long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nLL, (_avlLL / nLL), _aggregateLL, (_aggregateLL * 1000 / nLL), (_aggregateLL * 1000000 / _avlLL));
+ System.out.printf(" %d long entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nL, (_avlL / nL), _aggregateL, (_aggregateL * 1000 / nL), (_aggregateL * 1000000 / _avlL));
+ System.out.printf(" %d short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nS, (_avlS / nS), _aggregateS, (_aggregateS * 1000 / nS), (_aggregateS * 1000000 / _avlS));
+ System.out.printf(" %d very short entries ave: %d in: %dms; ave %dus per entry, %dns per byte\n",
+ nSS, (_avlSS / nSS), _aggregateSS, (_aggregateSS * 1000 / nSS), (_aggregateSS * 1000000 / _avlSS));
+ chan.close();
+
+ long _startLookup = System.currentTimeMillis();
+ System.out.println("Checking random bytes from " + file);
+ for (Bytes k : namesMap.keySet()) {
+ int i = 0;
+ if (namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) != namesMap.get(k)) {
+ throw new IllegalStateException("Trie doesn't match Map on entry: " + i + " key:" +
+ Arrays.toString(k.bytes) + " value: " + namesTrie.lookup(DataEntry.getEntry(k.bytes, 0)) +
+ " value': " + namesMap.get(k));
+ }
+ i++;
+ }
+ long _endLookup = System.currentTimeMillis();
+
+ System.out.println("Test Succeeded with " + file +
+ " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ }
+}
Modified: projects/xa2/object-pool/src/trie/OnesTable.java
===================================================================
--- projects/xa2/object-pool/src/trie/OnesTable.java 2008-07-07 00:20:20 UTC (rev 1054)
+++ projects/xa2/object-pool/src/trie/OnesTable.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -46,7 +46,7 @@
static {
try {
- FileChannel fc = new FileInputStream("trie/OnesTable.dat").getChannel();
+ FileChannel fc = new FileInputStream("OnesTable.dat").getChannel();
shortOnesTable = new byte[64*1024];
fc.read(ByteBuffer.wrap(shortOnesTable));
onesTable = new byte[64*1024 * 2*256];
Added: projects/xa2/object-pool/src/trie/Pair.java
===================================================================
--- projects/xa2/object-pool/src/trie/Pair.java (rev 0)
+++ projects/xa2/object-pool/src/trie/Pair.java 2008-07-07 05:46:46 UTC (rev 1055)
@@ -0,0 +1,27 @@
+/*
+ * Copyright Topaz Foundation 2008
+ * Author Andrae Muys
+ * Date 20th June 2008
+ */
+package trie;
+
+/**
+ * Represents a pair of two elements.
+ */
+public class Pair<A,B> {
+ private final A head;
+ private final B tail;
+
+ public Pair(A head, B tail) {
+ this.head = head;
+ this.tail = tail;
+ }
+
+ public A head() {
+ return head;
+ }
+
+ public B tail() {
+ return tail;
+ }
+}
More information about the Mulgara-svn
mailing list