[Mulgara-svn] r844 - in projects/xa2/object-pool/src: . gen
andrae at mulgara.org
andrae at mulgara.org
Thu Apr 24 08:48:53 UTC 2008
Author: andrae
Date: 2008-04-24 01:48:52 -0700 (Thu, 24 Apr 2008)
New Revision: 844
Modified:
projects/xa2/object-pool/src/CompBlockTrie.java
projects/xa2/object-pool/src/CompBlockTrieTest.java
projects/xa2/object-pool/src/CompMemTrie.java
projects/xa2/object-pool/src/gen/OnesTable.java
Log:
Round-tripping now works most of the time - I have a block alignment bug I still need to fix - I expect with
that bug fixed I should be in a position to move on to the ISAM-tree component.
Modified: projects/xa2/object-pool/src/CompBlockTrie.java
===================================================================
--- projects/xa2/object-pool/src/CompBlockTrie.java 2008-04-24 06:58:17 UTC (rev 843)
+++ projects/xa2/object-pool/src/CompBlockTrie.java 2008-04-24 08:48:52 UTC (rev 844)
@@ -7,14 +7,12 @@
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
-import java.util.Arrays;
/**
* Extends CompMemTrie to guarantee the trie will fit within a single block.
*/
public class CompBlockTrie extends CompMemTrie {
@SuppressWarnings("serial")
- public static int MAGIC = 0x00010001;
// Calculated as the worst case size per entry.
// Assumes a full leaf and branch per entry.
// Block: 2-byte type-header
@@ -27,7 +25,7 @@
// 26-bytes Total.
// Best case of course is 10-bytes leaf + 2-bytes children in branch.
private static short WORST_CASE_ENTRY_SIZE = 26;
- private static short BEST_CASE_ENTRY_SIZE = 12;
+//private static short BEST_CASE_ENTRY_SIZE = 12;
/** Worst-case space left in trie measured in entries */
private short space;
Modified: projects/xa2/object-pool/src/CompBlockTrieTest.java
===================================================================
--- projects/xa2/object-pool/src/CompBlockTrieTest.java 2008-04-24 06:58:17 UTC (rev 843)
+++ projects/xa2/object-pool/src/CompBlockTrieTest.java 2008-04-24 08:48:52 UTC (rev 844)
@@ -11,6 +11,7 @@
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
@@ -21,10 +22,20 @@
*/
public class CompBlockTrieTest {
public static void main(String[] args) throws Exception {
- testWithFile(new File("../scratch/propernames.uniq"));
-// testWithFile(new File("../scratch/connectives.uniq"));
-// testWithFile(new File("../scratch/web2a.uniq"));
-// testWithFile(new File("../scratch/web2.uniq"));
+// testWithLimitedFile(new File("../scratch/propernames.uniq"), 1);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 2);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 3);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 4);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 5);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 6);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 7);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 8);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 9);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), 10);
+ testWithLimitedFile(new File("../scratch/propernames.uniq"), Integer.MAX_VALUE);
+ testWithLimitedFile(new File("../scratch/connectives.uniq"), Integer.MAX_VALUE);
+ testWithLimitedFile(new File("../scratch/web2a.uniq"), Integer.MAX_VALUE);
+ testWithLimitedFile(new File("../scratch/web2.uniq"), Integer.MAX_VALUE);
// testWithRandomFileTuned(new File("../bulk/random70M"), 12, 10);
// testWithRandomFileTuned(new File("../bulk/random70M"), 250, 10);
// testWithRandomFileTuned(new File("../bulk/random70M"), 5000, 10);
@@ -33,43 +44,45 @@
}
private static final int BLOCK_SIZE = 32 * 1024;
- public static void testWithFile(File file) throws Exception {
+ public static void testWithLimitedFile(File file, int limit) throws Exception {
Map<byte[], Long> namesMap = new HashMap<byte[], Long>();
- FileChannel indexFile = new RandomAccessFile("tmp/sp_idx", "rw").getChannel().truncate(0);
- FileChannel dataFile = new RandomAccessFile("tmp/sp_dat", "rw").getChannel().truncate(0);
+ FileChannel indexFile = new FileOutputStream("tmp/sp_idx").getChannel();
+ FileChannel dataFile = new FileOutputStream("tmp/sp_dat").getChannel();
ByteBuffer indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
ArrayList<CompBlockTrie> tries = new ArrayList<CompBlockTrie>();
CompBlockTrie trie = new CompBlockTrie(BLOCK_SIZE);
- tries.add(trie);
- System.out.println("Inserting lines from " + file);
+ System.out.println("Inserting " + limit + " lines from " + file);
BufferedReader names = new BufferedReader(new FileReader(file));
- long n = 0;
+ long n = 0xDEADBEEF00000000L;
String name = names.readLine();
long _startInsert = System.currentTimeMillis();
- for (;;) {
+ for (int i = 0; i < limit; i++) {
// The use of the double loop means we pay the cost of setting up the exception handler only once per
// block, not once per entry.
try {
- while (name != null) {
- trie.insert(name.getBytes(), n);
- // Note: exception thrown here if trie is full. So name will remain valid when we reenter loop with
- // a new trie.
- namesMap.put(name.getBytes(), n);
- name = names.readLine();
- n++;
+ if (name == null) {
+ break;
}
- break; // From for(;;)-loop.
+ trie.insert(name.getBytes(), n);
+ // Note: exception thrown here if trie is full. So name will remain valid when we reenter loop with
+ // a new trie.
+ namesMap.put(name.getBytes(), n);
+ name = names.readLine();
+ n++;
} catch (CompMemTrie.InsufficientSpace ec) {
trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
- indexFile.write(indexBuffer);
+ indexFile.write((ByteBuffer)indexBuffer.flip());
+ tries.add(trie);
trie = new CompBlockTrie(BLOCK_SIZE);
- tries.add(trie);
}
}
+ trie.write((ByteBuffer)indexBuffer.clear(), dataFile);
+ indexFile.write((ByteBuffer)indexBuffer.flip());
+ tries.add(trie);
indexFile.force(true);
dataFile.force(true);
@@ -79,8 +92,8 @@
long _endInsert = System.currentTimeMillis();
names.close();
- indexFile = new RandomAccessFile("tmp/sp_idx", "rw").getChannel().truncate(0);
- dataFile = new RandomAccessFile("tmp/sp_dat", "rw").getChannel().truncate(0);
+ indexFile = new FileInputStream("tmp/sp_idx").getChannel();
+ dataFile = new FileInputStream("tmp/sp_dat").getChannel();
indexBuffer = ByteBuffer.allocateDirect(BLOCK_SIZE);
ArrayList<CompBlockTrie> readTries = new ArrayList<CompBlockTrie>();
@@ -92,24 +105,32 @@
readTries.add(ctrie);
}
+ long _endRead = System.currentTimeMillis();
+
System.out.println("Checking lines from " + file);
long _startLookup = System.currentTimeMillis();
for (byte[] key : namesMap.keySet()) {
boolean found = false;
for (CompBlockTrie t : tries) {
- if (t.lookup(key) == namesMap.get(key)) {
- found = true;
- }
+ try {
+ if (t.lookup(key) == namesMap.get(key)) {
+ found = true;
+ break;
+ }
+ } catch (CompMemTrie.NotFound nf) {}
}
if (!found) {
throw new IllegalStateException("Trie doesn't match Map");
}
found = false;
for (CompBlockTrie t : readTries) {
- if (t.lookup(key) == namesMap.get(key)) {
- found = true;
- }
+ try {
+ if (t.lookup(key) == namesMap.get(key)) {
+ found = true;
+ break;
+ }
+ } catch (CompMemTrie.NotFound nf) {}
}
if (!found) {
throw new IllegalStateException("Read-Trie doesn't match Map");
@@ -119,7 +140,9 @@
long _endLookup = System.currentTimeMillis();
System.out.println("Test Succeeded with " + file +
- " insert:" + (_endInsert - _startInsert) + " lookup:" + (_endLookup - _startLookup));
+ " insert:" + (_endInsert - _startInsert) +
+ " read:" + (_endRead - _startRead) +
+ " lookup:" + (_endLookup - _startLookup));
}
public static class Bytes {
Modified: projects/xa2/object-pool/src/CompMemTrie.java
===================================================================
--- projects/xa2/object-pool/src/CompMemTrie.java 2008-04-24 06:58:17 UTC (rev 843)
+++ projects/xa2/object-pool/src/CompMemTrie.java 2008-04-24 08:48:52 UTC (rev 844)
@@ -18,12 +18,14 @@
public class CompMemTrie {
@SuppressWarnings("serial")
public static class NotFound extends Exception {};
+ @SuppressWarnings("serial")
public static class InsufficientSpace extends Exception
{ public InsufficientSpace(String s) { super(s); } };
static final byte TYPE_BRANCH_TERM = 0x01; // Indicates a trie branch with a termination entry..
static final byte TYPE_BRANCH_NOTERM = 0x02; // Indicates a trie branch without a termination entry..
static final byte TYPE_LEAF = 0x03; // Indicates a trie branch without a termination entry..
+ static final byte TYPE_ROOT_LOC = 0x04; // Indicates the following short indicates a branch.
public final static int MAGIC = 0x00020001;
public final static int INVERT_MAGIC = 0x01000200;
@@ -57,7 +59,7 @@
if (root == null) {
throw new IllegalStateException("Attempt to write empty trie");
}
- int indexreq = root.totalIndexSize() + 4 + 2; // + sizeof(MAGIC) + sizeof(ptr_to_root)
+ int indexreq = root.totalIndexSize() + 4 + 4; // + sizeof(MAGIC) + sizeof(root_loc_type + root_loc)
if (indexreq > index.remaining()) {
throw new InsufficientSpace("Attempt to write trie index to bytebuffer with insufficient space");
}
@@ -67,7 +69,9 @@
index.putInt(MAGIC);
root.write(index, data);
- index.putShort(index.limit() - 2, root.location);
+ index.put(TYPE_ROOT_LOC);
+ index.put((byte)0xFF);
+ index.putShort(root.location);
}
public CompMemTrie(ByteBuffer index, FileChannel data) throws IOException {
@@ -81,8 +85,6 @@
}
}
- short rootLocation = index.getShort(index.limit() - 2);
-
// I should be able to replace this with a stack.
// The child->parent relationship is implicit in the ordering of the nodes in the file.
// The only thing we need to know is when to stop reading nodes and that is provided by rootLocation.
@@ -90,7 +92,9 @@
// root node.
// For now I'm hoping the HashMap will help with debugging - but it is wasteful of memory in the long run.
HashMap<Short, TrieNode> nodeMap = new HashMap<Short, TrieNode>();
- while (index.position() < rootLocation) {
+
+ short rootLocation = -1;
+ while (rootLocation == -1) {
short location = (short)index.position();
byte type = index.get();
switch (type) {
@@ -103,6 +107,12 @@
case TYPE_LEAF:
nodeMap.put(location, new TrieLeaf(index, data));
break;
+ case TYPE_ROOT_LOC:
+ index.get();
+ rootLocation = index.getShort();
+ break;
+ default:
+ throw new IllegalArgumentException("Invalid trie-node");
}
}
@@ -211,6 +221,17 @@
term = (TrieLeaf)nodeMap.get(index.getShort());
}
children = new ByteMap<TrieNode>(index, nodeMap);
+ if (term == null) {
+ // This two-step process is required to avoid the cast from Object[] to TrieNode[] inserted by java
+ // generics which triggers the obvious ClassCastException. By extracting explicitly to an Object[]
+ // first; then indexing to obtain an Object; then casting the Object to TrieNode the compiler doesn't
+ // insert the erroneous cast.
+ Object[] d = children.data;
+ aLeaf = ((TrieNode)(d[0])).aLeaf;
+ } else {
+ aLeaf = term;
+ }
+// aLeaf = (term == null) ? ((TrieNode)(children.data[0])).aLeaf : term;
}
protected void write(ByteBuffer index, FileChannel data) throws IOException {
@@ -224,7 +245,7 @@
location = (short)index.position();
index.put((term == null) ? TYPE_BRANCH_NOTERM : TYPE_BRANCH_TERM);
- index.put((byte)0x00); // Padding to keep things short-aligned.
+ index.put((byte)0xFF); // Padding to keep things short-aligned.
index.putInt(offset);
if (term != null) {
index.putShort(term.location);
@@ -360,12 +381,14 @@
index.get();
this.keyLocation = index.getLong();
data.position(keyLocation);
- ByteBuffer bb = ByteBuffer.allocateDirect(14);
+ ByteBuffer bb = ByteBuffer.allocateDirect(8+4); // sizeof(value) + sizof(length).
data.read(bb);
+ bb.flip();
value = bb.getLong();
int length = bb.getInt();
this.key = new byte[length]; // FIXME: I eventually need to replace this with a lazy load.
data.read(ByteBuffer.wrap(key));
+ this.aLeaf = this;
}
protected void insert(TrieLeaf node, int parentLcp) throws InsertAbove {
@@ -399,11 +422,11 @@
dataBuf.putLong(value);
dataBuf.putInt(key.length);
dataBuf.put(key);
- data.write(dataBuf);
+ data.write((ByteBuffer)dataBuf.flip());
location = (short)index.position();
index.put(TYPE_LEAF);
- index.put((byte)0x00); // Pad to 16-bit aligned.
+ index.put((byte)0xFF); // Pad to 16-bit aligned.
index.putLong(keyLocation);
}
Modified: projects/xa2/object-pool/src/gen/OnesTable.java
===================================================================
--- projects/xa2/object-pool/src/gen/OnesTable.java 2008-04-24 06:58:17 UTC (rev 843)
+++ projects/xa2/object-pool/src/gen/OnesTable.java 2008-04-24 08:48:52 UTC (rev 844)
@@ -5,7 +5,6 @@
*/
package gen;
-import java.io.File;
import java.io.FileInputStream;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
More information about the Mulgara-svn
mailing list