[Mulgara-svn] r1973 - in trunk/src/jar: . content-rdfa content-rdfa/java content-rdfa/java/org content-rdfa/java/org/mulgara content-rdfa/java/org/mulgara/content content-rdfa/java/org/mulgara/content/rdfa

Fri Aug 20 02:41:38 UTC 2010

Author: pag
Date: 2010-08-20 02:41:38 +0000 (Fri, 20 Aug 2010)
New Revision: 1973

Added:
   trunk/src/jar/content-rdfa/
   trunk/src/jar/content-rdfa/build.xml
   trunk/src/jar/content-rdfa/content-rdfa-build.properties
   trunk/src/jar/content-rdfa/java/
   trunk/src/jar/content-rdfa/java/org/
   trunk/src/jar/content-rdfa/java/org/mulgara/
   trunk/src/jar/content-rdfa/java/org/mulgara/content/
   trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/
   trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/BasedResolver.java
   trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/RdfaContentHandler.java
   trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/RdfaStatements.java
   trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/RdfaStatementsUnitTest.java
   trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/StatementParser.java
   trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/package.html
Log:
Fixing the RDFa code that should have already been added

Added: trunk/src/jar/content-rdfa/build.xml
===================================================================

--- trunk/src/jar/content-rdfa/build.xml	                        (rev 0)
+++ trunk/src/jar/content-rdfa/build.xml	2010-08-20 02:41:38 UTC (rev 1973)
@@ -0,0 +1,122 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE project>
+
+<!-- =================================================================== -->
+<!--                       Project definition                            -->
+<!-- =================================================================== -->
+<project name="content-rdfa" default="content-rdfa-jar" basedir="../../..">
+
+  <!-- =================================================================== -->
+  <!--                      Property Definitions                           -->
+  <!-- =================================================================== -->
+
+  <!-- =================================================================== -->
+  <!--                              Imports                                -->
+  <!-- =================================================================== -->
+
+  <!-- =================================================================== -->
+  <!--                          Path Definitions                           -->
+  <!-- =================================================================== -->
+  <path id="content-rdfa-classpath">
+    <path refid="common-classpath"/>
+    <fileset file="${query.dist.dir}/${query.jar}"/>
+    <fileset file="${resolver-spi.dist.dir}/${resolver-spi.jar}"/>
+    <fileset file="${resolver-file.dist.dir}/${resolver-file.jar}"/>
+    <fileset file="${tuples.dist.dir}/${tuples.jar}"/>
+    <fileset file="${util.dist.dir}/${util.jar}"/>
+    <fileset file="${lib.dir}/${htmlparser.jar}"/>
+    <fileset file="${lib.dir}/${java-rdfa.jar}"/>
+  </path>
+
+  <path id="content-rdfa-test-classpath">
+    <path refid="content-rdfa-classpath"/>
+    <fileset file="${resolver-file.dist.dir}/${resolver-file.jar}"/>
+    <fileset file="${store-stringpool-memory.dist.dir}/${store-stringpool-memory.jar}"/>
+    <fileset file="${store-stringpool-xa.dist.dir}/${store-stringpool-xa.jar}"/>
+    <fileset file="${content-rdfa.dist.dir}/${content-rdfa.jar}"/>
+  </path>
+
+
+  <target name="content-rdfa-clean" description="Removes all compile generated files for content-rdfa">
+    <tstamp/>
+    <delete dir="${content-rdfa.obj.dir}"/>
+    <delete dir="${content-rdfa.test.dir}"/>
+    <delete dir="${content-rdfa.dist.dir}"/>
+  </target>
+
+
+  <target name="-content-rdfa-prepare"
+          description="Creates all directories associated with the content-rdfa's compilation"
+          depends="-prepare-build">
+    <mkdir dir="${content-rdfa.obj.dir}"/>
+    <mkdir dir="${content-rdfa.test.dir}"/>
+    <mkdir dir="${content-rdfa.dist.dir}"/>
+    <mkdir dir="${content-rdfa.obj.dir}/classes"/>
+  </target>
+
+
+  <target name="content-rdfa-compile"
+          depends="-content-rdfa-prepare, resolver-spi-jar, resolver-file-jar"
+          description="Compiles all content-rdfa related files">
+    <javac destdir="${content-rdfa.obj.dir}/classes" debug="on" deprecation="on" source="1.5" encoding="UTF-8">
+      <classpath refid="content-rdfa-classpath"/>
+      <src path="${content-rdfa.src.dir}/java"/>
+    </javac>
+  </target>
+
+
+  <target name="content-rdfa-jar" depends="content-rdfa-compile, -content-rdfa-jar-uptodate"
+          unless="content-rdfa.jar.uptodate" description="Builds the rdfa content handler JAR">
+
+    <jar jarfile="${content-rdfa.dist.dir}/${content-rdfa.jar}">
+      <fileset dir="${content-rdfa.obj.dir}/classes"/>
+      <zipfileset src="${lib.dir}/${htmlparser.jar}" excludes="META-INF/**"/>
+      <zipfileset src="${lib.dir}/${java-rdfa.jar}" excludes="META-INF/**"/>
+    </jar>
+  </target>
+
+
+  <target name="-content-rdfa-jar-uptodate">
+    <uptodate property="content-rdfa.jar.uptodate" targetfile="${content-rdfa.dist.dir}/${content-rdfa.jar}">
+      <srcfiles dir="${content-rdfa.obj.dir}/classes" includes="**/*"/>
+    </uptodate>
+  </target>
+
+
+  <target name="content-rdfa-dist" depends="content-rdfa-jar" unless="content-rdfa.uptodate"
+          description="Performs all tasks related to finalising this content-rdfa and readying it for distribution">
+  </target>
+
+
+  <target name="content-rdfa-test"
+          depends="content-rdfa-jar, resolver-file-jar, store-stringpool-memory-jar, store-stringpool-xa-jar">
+    <antcall target="component-test">
+      <param name="classpath.id" value="content-rdfa-test-classpath"/>
+      <param name="dir" value="${content-rdfa.src.dir}"/>
+      <param name="jar" value="${content-rdfa.jar}"/>
+    </antcall>
+  </target>
+
+
+  <target name="content-rdfa-javadoc" depends="content-rdfa-jar" description="Creates the javadoc for this content-rdfa">
+    <antcall target="javadoc">
+      <param name="javadoc.package" value="org.mulgara.content.rdfa.*"/>
+      <param name="javadoc.classpath" value="content-rdfa-classpath"/>
+      <param name="javadoc.sourcepath" value="${content-rdfa.src.dir}/java"/>
+    </antcall>
+  </target>
+
+
+  <target name="content-rdfa-help" description="Displays the help information for this content-rdfa">
+    <echo message="Welcome to the build script for ${content-rdfa.name}."/>
+    <echo message=""/>
+    <echo message="These targets can be invoked as follows:"/>
+    <echo message="  ./build.sh &lt;target&gt;"/>
+    <echo message=""/>
+    <java fork="false" classname="org.apache.tools.ant.Main" newenvironment="false">
+      <jvmarg value="${arch.bits}"/>
+      <arg line="-buildfile ${content-rdfa.src.dir}/build.xml -projecthelp"/>
+    </java>
+  </target>
+
+</project>

Added: trunk/src/jar/content-rdfa/content-rdfa-build.properties
===================================================================
--- trunk/src/jar/content-rdfa/content-rdfa-build.properties	                        (rev 0)
+++ trunk/src/jar/content-rdfa/content-rdfa-build.properties	2010-08-20 02:41:38 UTC (rev 1973)
@@ -0,0 +1,14 @@
+#
+# Properties used by the RDFa content handler module
+#
+
+# Module Name
+content-rdfa.name          = Content RDFa
+
+# General module properties
+content-rdfa.src.dir       = ${jar.src.dir}/content-rdfa
+content-rdfa.obj.dir       = ${jar.obj.dir}/content-rdfa
+content-rdfa.dist.dir      = ${bin.dir}
+content-rdfa.test.dir      = ${test.dir}/content-rdfa
+content-rdfa.jxtest.dir    = ${jxtest.dir}/content-rdfa
+content-rdfa.jar           = content-rdfa-base-${mulgara-version}.jar

Added: trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/BasedResolver.java
===================================================================
--- trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/BasedResolver.java	                        (rev 0)
+++ trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/BasedResolver.java	2010-08-20 02:41:38 UTC (rev 1973)
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2010 Paul Gearon
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.mulgara.content.rdfa;
+
+import org.apache.log4j.Logger;      // Apache Log4J
+
+import com.hp.hpl.jena.iri.IRI;
+import com.hp.hpl.jena.iri.IRIFactory;
+
+import net.rootdev.javardfa.Resolver;
+
+/**
+ *
+ * @created 2010-08-09
+ * @author Paul Gearon
+ */
+class BasedResolver implements Resolver {
+
+  /** Logger. */
+  @SuppressWarnings("unused")
+  private static final Logger logger = Logger.getLogger(BasedResolver.class.getName());
+
+  /** The factory for creating new IRIs. */
+  private final IRIFactory iriFactory;
+
+  /** The main IRI for determining relative IRIs in relation to. */
+  private IRI base;
+
+  /** The string if the IRI that is the base. */
+  private String baseStr;
+
+  /**
+   * Constructs a resolver with just the base.
+   * @param baseStr The string for the IRI to use as the base of relative IRIs.
+   */
+  public BasedResolver(String baseStr) {
+    this(baseStr, IRIFactory.semanticWebImplementation());
+  }
+
+  /**
+   * Constructs a resolver.
+   * @param baseStr The string for the IRI to use as the base of relative IRIs.
+   * @param iriFactory The factory for creating IRIs from.
+   */
+  public BasedResolver(String baseStr, IRIFactory iriFactory) {
+    this.iriFactory = iriFactory;
+    setBase(baseStr);
+  }
+
+  /**
+   * Changes the base to use.
+   * @param baseStr The new base. <code>null</code> will be ignored.
+   */
+  public void setBase(String baseStr) {
+    if (baseStr != null) {
+      this.baseStr = baseStr;
+      base = iriFactory.construct(baseStr);
+    }
+  }
+
+
+  /**
+   * Resolves a IRI relative to a given base.
+   * @param baseStr A string form of the base IRI. Expected to be the same as the current base.
+   * @param rel An IRI that may be relative to the base.
+   * @return a string containing the lexical form of the calculated IRI.
+   */
+  public String resolve(String baseStr, String rel) {
+    IRI bIri = base;
+    // test if a different base string to the one we expect is being used
+    if (baseStr != null) {
+      if (this.baseStr != baseStr && !baseStr.equals(this.baseStr)) {
+        bIri = iriFactory.construct(baseStr);
+      }
+    }
+    IRI resolved = bIri.resolve(rel);
+    return resolved.toString();
+  }
+
+}

Added: trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/RdfaContentHandler.java
===================================================================
--- trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/RdfaContentHandler.java	                        (rev 0)
+++ trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/RdfaContentHandler.java	2010-08-20 02:41:38 UTC (rev 1973)
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2010 Paul Gearon
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.mulgara.content.rdfa;
+
+// Java packages
+// Java 2 enterprise packages
+import javax.activation.MimeType;
+import javax.activation.MimeTypeParseException;
+
+// Third party packages
+import org.apache.log4j.Logger;  // Apache Log4J
+
+// Local packages
+import org.mulgara.content.Content;
+import org.mulgara.content.ContentHandler;
+import org.mulgara.content.ContentHandlerException;
+import org.mulgara.content.ModifiedException;
+import org.mulgara.content.NotModifiedException;
+import org.mulgara.resolver.spi.ResolverSession;
+import org.mulgara.resolver.spi.Statements;
+
+/**
+ * Resolves constraints in models defined by RDFa in HTML and XHTML documents
+ *
+ * @created 2010-08-09
+ * @author Paul Gearon
+ */
+public class RdfaContentHandler implements ContentHandler {
+  /** Logger. */
+  @SuppressWarnings("unused")
+  private static Logger logger = Logger.getLogger(RdfaContentHandler.class);
+
+  /** The MIME type for XHTML documents */
+  static final MimeType XHTML_MIME;
+
+  /** The MIME type for HTML documents */
+  static final MimeType HTML_MIME;
+
+  static {
+    try {
+      XHTML_MIME = new MimeType("application", "xml");
+      HTML_MIME = new MimeType("text", "html");
+    } catch (MimeTypeParseException e) {
+      throw new Error("Mime initialization error");
+    }
+  }
+
+  public Statements parse(Content content, ResolverSession resolverSession) throws ContentHandlerException {
+    try {
+      return new RdfaStatements(resolverSession, content);
+    } catch (Exception e) {
+      throw new ContentHandlerException(e.getMessage());
+    }
+  }
+
+  /**
+   * @return <code>true</code> if the file part of the URI has an
+   *   <code>.n3</code>, <code>.nt</code> or <code>.rdf</code> extension
+   */
+  public boolean canParse(Content content) throws NotModifiedException {
+    // We definitely can parse anything of MIME type application/rdf+xml
+    MimeType contentType = content.getContentType();
+    if (contentType != null && (HTML_MIME.match(contentType) || XHTML_MIME.match(contentType))) {
+      return true;
+    }
+
+    if (content.getURI() == null) return false;
+
+    // Obtain the path part of the URI
+    String path = content.getURI().getPath();
+    if (path == null) return false;
+    return path.endsWith(".html");
+  }
+
+  /**
+   * Cannot serialize RDF into HTML.
+   */
+  public void serialize(Statements      statements,
+                        Content         content,
+                        ResolverSession resolverSession)
+      throws ContentHandlerException, ModifiedException {
+    throw new UnsupportedOperationException();
+  }
+}

Added: trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/RdfaStatements.java
===================================================================
--- trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/RdfaStatements.java	                        (rev 0)
+++ trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/RdfaStatements.java	2010-08-20 02:41:38 UTC (rev 1973)
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2010 Paul Gearon.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.mulgara.content.rdfa;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.jrdf.graph.ObjectNode;
+import org.jrdf.graph.PredicateNode;
+import org.jrdf.graph.SubjectNode;
+import org.jrdf.graph.Triple;
+import org.mulgara.content.Content;
+import org.mulgara.content.NotModifiedException;
+import org.mulgara.query.TuplesException;
+import org.mulgara.resolver.spi.LocalizeException;
+import org.mulgara.resolver.spi.ResolverSession;
+import org.mulgara.resolver.spi.Statements;
+import org.mulgara.store.tuples.AbstractTuples;
+import org.mulgara.store.tuples.Tuples;
+
+/**
+ * This class works in tandem with a parser to provide Statements with a cursor
+ * interface. The parser keeps a buffer, which this class sucks out of that
+ * buffer to provide statements as required.
+ */
+public class RdfaStatements extends AbstractTuples implements Statements {
+
+  /** The resolver session to convert parsed data into localized data. */
+  ResolverSession session;
+
+  /** The source of the statements. */
+  StatementParser source;
+
+  /** The current row. */
+  Triple currentRow = null;
+
+  /** Flag to indicate that the {@link #beforeFirst()} method has been called. */
+  boolean beforeFirstCalled = false;
+
+  /**
+   * Initialize the statements with the parser that is to be the source
+   * of the statements.
+   */
+  public RdfaStatements(ResolverSession session, Content content) throws TuplesException, NotModifiedException {
+    this.session = session;
+    source = new StatementParser(content, session);
+    new Thread(source).start();
+  }
+
+  /**
+   * @see org.mulgara.store.tuples.Tuples#hasNoDuplicates()
+   */
+  public boolean hasNoDuplicates() throws TuplesException {
+    // no idea, so return false
+    return false;
+  }
+
+  /**
+   * @see org.mulgara.store.tuples.Tuples#getOperands()
+   */
+  public List<Tuples> getOperands() {
+    return Collections.emptyList();
+  }
+
+  /**
+   * @see org.mulgara.resolver.spi.Statements#getSubject()
+   */
+  @Override
+  public long getSubject() throws TuplesException {
+    SubjectNode s = currentRow.getSubject();
+    try {
+      return session.localize(s);
+    } catch (LocalizeException e) {
+      throw new TuplesException("Unable to localize subject: " + s);
+    }
+  }
+
+  /**
+   * @see org.mulgara.resolver.spi.Statements#getPredicate()
+   */
+  @Override
+  public long getPredicate() throws TuplesException {
+    PredicateNode p = currentRow.getPredicate();
+    try {
+      return session.localize(p);
+    } catch (LocalizeException e) {
+      throw new TuplesException("Unable to localize predicate: " + p);
+    }
+  }
+
+  /**
+   * @see org.mulgara.resolver.spi.Statements#getObject()
+   */
+  @Override
+  public long getObject() throws TuplesException {
+    ObjectNode o = currentRow.getObject();
+    try {
+      return session.localize(o);
+    } catch (LocalizeException e) {
+      throw new TuplesException("Unable to localize object: " + o);
+    }
+  }
+
+  /**
+   * @see org.mulgara.store.tuples.AbstractTuples#getColumnValue(int)
+   */
+  @Override
+  public long getColumnValue(int column) throws TuplesException {
+    if (column == 0) return getSubject();
+    if (column == 1) return getPredicate();
+    if (column == 2) return getObject();
+    throw new IndexOutOfBoundsException("Statements have 3 columns: " + column);
+  }
+
+  /**
+   * @see org.mulgara.store.tuples.AbstractTuples#getRowUpperBound()
+   */
+  public long getRowUpperBound() throws TuplesException {
+    // go for the max number of integers, not longs, since this is more reasonable
+    return source.isFinished() ? source.getStatementCount() : Integer.MAX_VALUE;
+  }
+
+  /**
+   * @see org.mulgara.store.tuples.AbstractTuples#getRowExpectedCount()
+   */
+  public long getRowExpectedCount() throws TuplesException {
+    return source.isFinished() ? source.getStatementCount() : Short.MAX_VALUE;
+  }
+
+  /**
+   * @see org.mulgara.store.tuples.AbstractTuples#isColumnEverUnbound(int)
+   */
+  public boolean isColumnEverUnbound(int column) throws TuplesException {
+    return false;
+  }
+
+  /**
+   * @see org.mulgara.store.tuples.AbstractTuples#beforeFirst(long[], int)
+   */
+  @Override
+  public void beforeFirst(long[] prefix, int suffixTruncation) throws TuplesException {
+    // Validate params
+    if (prefix != null && prefix.length != 0) {
+      throw new IllegalArgumentException("Prefix on RDFa statements must be empty");
+    }
+    if (suffixTruncation != 0) {
+      throw new IllegalArgumentException("Null \"suffixTruncation\" parameter");
+    }
+
+    if (beforeFirstCalled) throw new TuplesException("RDFa statements do not support rewinding");
+    beforeFirstCalled = true;
+  }
+
+  /**
+   * @see org.mulgara.store.tuples.AbstractTuples#next()
+   */
+  @Override
+  public boolean next() throws TuplesException {
+    try {
+      currentRow = source.getTriple();
+    } catch (TuplesException ex) {
+      source.terminate();
+      throw ex;
+    }
+
+    if (currentRow == null) {
+      rowCount = source.getStatementCount();
+      source.terminate();
+    }
+    return currentRow != null;
+  }
+
+  /**
+   * @see org.mulgara.store.tuples.AbstractTuples#close()
+   */
+  @Override
+  public void close() throws TuplesException {
+    source.terminate();
+  }
+
+}

Added: trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/RdfaStatementsUnitTest.java
===================================================================
--- trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/RdfaStatementsUnitTest.java	                        (rev 0)
+++ trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/RdfaStatementsUnitTest.java	2010-08-20 02:41:38 UTC (rev 1973)
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2010 Paul Gearon
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.mulgara.content.rdfa;
+
+// Third party packages
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import javax.activation.MimeType;
+import javax.activation.MimeTypeParseException;
+
+import junit.framework.*;        // JUnit unit testing framework
+import org.apache.log4j.Logger;  // Apache Log4J
+import org.jrdf.graph.AbstractTriple;
+import org.jrdf.graph.BlankNode;
+import org.jrdf.graph.URIReference;
+import org.jrdf.graph.Node;
+import org.jrdf.graph.ObjectNode;
+import org.jrdf.graph.PredicateNode;
+import org.jrdf.graph.SubjectNode;
+import org.jrdf.graph.Triple;
+import org.mulgara.content.Content;
+// import org.mulgara.resolver.http.HttpContent;
+import org.mulgara.resolver.spi.ResolverSession;
+import org.mulgara.resolver.spi.Statements;
+import org.mulgara.resolver.spi.TestResolverSession;
+
+
+/**
+ * @created August 9, 2010
+ * @author Paul Gearon
+ */
+public class RdfaStatementsUnitTest extends TestCase {
+  /** Logger. */
+  @SuppressWarnings("unused")
+  private static final Logger logger = Logger.getLogger(RdfaStatementsUnitTest.class.getName());
+
+  //
+  // Constructors
+  //
+
+  /**
+   * Construct a test.
+   *
+   * @param name  the name of the test to construct
+   */
+  public RdfaStatementsUnitTest(String name) {
+    super(name);
+  }
+
+  //
+  // Methods implementing TestCase
+  //
+
+  public void setup() {
+  }
+
+  /**
+   * Hook from which the test runner can obtain a test suite.
+   *
+   * @return the test suite
+   */
+  public static Test suite() {
+    return new TestSuite(RdfaStatementsUnitTest.class);
+  }
+
+
+  /**
+   * Test {@link rdfaStatements} parsing a file.
+   */
+  public void testParse() throws Exception {
+    Content content = new StringContent(HCARD);
+    // Content content = new HttpContent(URI.create("http://examples.tobyinkster.co.uk/hcard"));
+    System.err.println(content.getContentType().toString());
+    StatementParser parser = new StatementParser(content, new TestResolverSession());
+    parser.run();
+  }
+
+  public void testParseData() throws Exception {
+    ResolverSession session = new TestResolverSession();
+    Content content = new StringContent(HCARD);
+    // Content content = new HttpContent(URI.create("http://examples.tobyinkster.co.uk/hcard"));
+    List<Triple> triples = toTriples(new RdfaStatements(session, content), session);
+    assertEquals(8, triples.size());
+    URI page = URI.create("http://examples.tobyinkster.co.uk/hcard");
+    for (int i = 0; i < 2; i++) assertEquals(page, ((URIReference)triples.get(i).getSubject()).getURI());
+    assertTrue(triples.get(2).getSubject().isBlankNode());
+    URI jack = URI.create("http://examples.tobyinkster.co.uk/hcard#jack");
+    for (int i = 3; i < 8; i++) {
+      assertTrue(triples.get(i).getSubject().isURIReference());
+      assertEquals(jack, ((URIReference)triples.get(i).getSubject()).getURI());
+    }
+  }
+
+  static List<Triple> toTriples(Statements s, ResolverSession session) throws Exception {
+    List<Triple> triples = new ArrayList<Triple>();
+    s.beforeFirst();
+    while (s.next()) {
+      triples.add(new TripleImpl(
+          session.globalize(s.getSubject()),
+          session.globalize(s.getPredicate()),
+          session.globalize(s.getObject())
+      ));
+    }
+    return triples;
+  }
+  
+  static final String HCARD = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML+RDFa 1.0//EN\"\n" +
+  "          \"http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd\">\n" +
+  "\n" +
+  "<html xml:lang=\"en\"\n" +
+  " xmlns=\"http://www.w3.org/1999/xhtml\"\n" +
+  " xmlns:vcard=\"urn:ietf:rfc:2426#\"\n" +
+  " xmlns:foaf=\"http://xmlns.com/foaf/0.1/\"\n" +
+  " xmlns:w3card=\"http://www.w3.org/2006/vcard/ns#\"\n" +
+  " xmlns:pim=\"http://www.w3.org/2000/10/swap/pim/contact#\"\n" +
+  " xmlns:dc=\"http://purl.org/dc/terms/\">\n" +
+  "\n" +
+  " <head>\n" +
+  "   <title>Complex hCard + RDFa Example</title>\n" +
+  "   <link rel=\"foaf:primaryTopic\" href=\"#jack\" />\n" +
+  " </head>\n" +
+  "\n" +
+  " <body style=\"max-width:50em\">\n" +
+  "\n" +
+  "   <p style=\"font-style:italic\">No, I'm not obsessed with Jack Bauer (well, maybe\n" +
+  "   a little bit). <span property=\"dc:abstract\">This page is intended to be a demonstration of\n" +
+  "   the use of RDFa (including FOAF, Dublin Core and W3C PIM vocabularies) in\n" +
+  "   conjunction with Microformats (including hCard and rel-tag).</span></p>\n" +
+  "\n" +
+  "   <div id=\"jack\" class=\"vcard\" typeof=\"pim:Male\">\n" +
+  "\n" +
+  "     <img class=\"photo\" alt=\"Jack could kick your ass.\" src=\"JackB4.jpg\" style=\"float:right;\n" +
+  "     margin:1em 0 1em 2em;    /* Don't let Jack too near the text as he might kick its ass! */\n" +
+  "     border: 4px solid black; /* But don't fool yourself into thinking that will constrain him! */\"\n" +
+  "     />\n" +
+  "\n" +
+  "     <h1 class=\"fn\">Jack Bauer</h1>\n" +
+  "\n" +
+  "     <p class=\"org\">\n" +
+  "       <span about=\"#jack\" property=\"w3card:category\" class=\"organization-name\">Counter-Terrorist Unit</span>\n" +
+  "       (<span class=\"organization-unit\">Los Angeles Division</span>)\n" +
+  "     </p>\n" +
+  "\n" +
+  "     <p class=\"adr\">\n" +
+  "       <span class=\"street-address\">10201 W. Pico Blvd.</span><br />\n" +
+  "       <span class=\"locality\">Los Angeles</span>,\n" +
+  "       <span class=\"region\">CA</span>\n" +
+  "       <span class=\"postal-code\">90064</span><br />\n" +
+  "       <span class=\"country-name\">United States</span><br />\n" +
+  "       <small class=\"geo\" style=\"color:#999;font-size:67%\">34.052339;-118.410623</small>\n" +
+  "     </p>\n" +
+  "\n" +
+  "     <h2>Assorted Contact Methods</h2>\n" +
+  "     <ul about=\"#jack\">\n" +
+  "       <li class=\"tel\">+1 (310) 597 3781 <span class=\"type\">work</span></li>\n" +
+  "       <li><a rel=\"tag foaf:homepage\" href=\"http://en.wikipedia.org/wiki/Jack_Bauer\">I'm on Wikipedia</a> so you can leave a message on my user talk page.</li>\n" +
+  "       <li rel=\"foaf:workInfoHomepage\"><a href=\"http://www.jackbauerfacts.com/\">Jack Bauer Facts</a></li>\n" +
+  "       <li class=\"email\">j.bauer at la.ctu.gov.invalid</li>\n" +
+  "       <li><a rel=\"w3card:mobileTel\" href=\"tel:+1-310-555-3781\">mobile phone</a></li>\n" +
+  "     </ul>\n" +
+  "\n" +
+  "     <p class=\"note\">If I'm out in the field, you may be better off contacting <span class=\"agent vcard\">\n" +
+  "     <a class=\"email fn\" href=\"mailto:c.obrian at la.ctu.gov.invalid\">Chloe O'Brian</a></span>\n" +
+  "     if it's about work, or ask <span class=\"agent\">Tony Almeida</span> if you're interested\n" +
+  "     in the CTU five-a-side football team we're trying to get going.</p>\n" +
+  "\n" +
+  "     <h2>Plan</h2>\n" +
+  "     <p about=\"#jack\" property=\"foaf:plan\">I will kick your terrorist ass!</p>\n" +
+  "\n" +
+  "     <ins class=\"tel rev\" datetime=\"2008-07-20T21:00:00+0100\">\n" +
+  "       <strong>Update!</strong>\n" +
+  "       My new <span class=\"type\">home</span> phone number is\n" +
+  "       <span class=\"value\">01632 960 123</span>.\n" +
+  "     </ins>\n" +
+  "   </div>\n" +
+  "\n" +
+  "\n" +
+  "   <div style=\"border-top: 1px solid silver;margin-top:2em;padding-top:0.67em\">\n" +
+  "\n" +
+  "     <a href=\"http://validator.w3.org/check?uri=referer\">validate</a> |\n" +
+  "\n" +
+  "     <a href=\"http://srv.buzzword.org.uk/vcard/referer\">cognify (vCard)</a> | \n" +
+  "\n" +
+  "     <a href=\"http://srv.buzzword.org.uk/jcard/referer\">cognify (jCard)</a> | \n" +
+  "\n" +
+  "     <a href=\"http://srv.buzzword.org.uk/rdf-xml/referer\">cognify (RDF)</a> | \n" +
+  "\n" +
+  "     tech:\n" +
+  "     <a href=\"http://rdfa.info/\"><img style=\"border:0;vertical-align:middle\" src=\"http://buzzword.org.uk/cognition/buttons/rdfa.png\" alt=\"RDFa,\" /></a>\n" +
+  "     <a href=\"http://www.foaf-project.org/\"><img style=\"border:0;vertical-align:middle\" src=\"http://buzzword.org.uk/cognition/buttons/foaf.png\" alt=\"FOAF,\" /></a>\n" +
+  "     <a href=\"http://dublincore.org/\"><img style=\"border:0;vertical-align:middle\" src=\"http://buzzword.org.uk/cognition/buttons/dc.png\" alt=\"Dublin Core and\" /></a>\n" +
+  "     <a href=\"http://microformats.org/wiki/hcard\"><img style=\"border:0;vertical-align:middle\" src=\"http://buzzword.org.uk/cognition/buttons/hcard.png\" alt=\"hCard\" /></a>\n" +
+  "\n" +
+  "   </div>\n" +
+  "\n" +
+  " </body>\n" +
+  "\n" +
+  "</html>\n";
+}
+
+class StringContent implements Content {
+  private final String content;
+
+  public StringContent(String content) {
+    this.content = content;
+  }
+
+  public Map<Object,BlankNode> getBlankNodeMap()         { return null; }
+  public MimeType getContentType()                       {
+    try {
+      return new MimeType("text", "html");
+    } catch (MimeTypeParseException e) { return null; }
+  }
+  public URI getURI()                                    { return URI.create("http://examples.tobyinkster.co.uk/hcard"); }
+  public String getURIString()                           { return "http://examples.tobyinkster.co.uk/hcard"; }
+  public OutputStream newOutputStream()                  { return null; }
+
+  public InputStream newInputStream() throws IOException {
+    return new ByteArrayInputStream(content.getBytes("UTF-8"));
+  }
+}
+
+class TripleImpl extends AbstractTriple {
+  private static final long serialVersionUID = -26504102803266709L;
+  TripleImpl(Node node, Node node2, Node node3) {
+    subjectNode = (SubjectNode)node;
+    predicateNode = (PredicateNode)node2;
+    objectNode = (ObjectNode)node3;
+  }
+}
+

Added: trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/StatementParser.java
===================================================================
--- trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/StatementParser.java	                        (rev 0)
+++ trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/StatementParser.java	2010-08-20 02:41:38 UTC (rev 1973)
@@ -0,0 +1,442 @@
+/*
+ * Copyright 2010 Paul Gearon
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.mulgara.content.rdfa;
+
+// Java 2 standard packages
+import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Map;
+
+import javax.activation.MimeType;
+
+// Third party packages
+import org.apache.log4j.Logger;      // Apache Log4J
+import org.jrdf.graph.BlankNode;     // JRDF
+import org.jrdf.graph.Literal;
+import org.jrdf.graph.Node;
+import org.jrdf.graph.ObjectNode;
+import org.jrdf.graph.PredicateNode;
+import org.jrdf.graph.SubjectNode;
+import org.jrdf.graph.Triple;
+import org.xml.sax.InputSource;
+import org.xml.sax.XMLReader;
+
+import net.rootdev.javardfa.ParserFactory.Format;
+import net.rootdev.javardfa.StatementSink;
+import net.rootdev.javardfa.ParserFactory;
+
+
+// Locally written packages
+import org.mulgara.content.Content;
+import org.mulgara.content.NotModifiedException;
+import org.mulgara.parser.MulgaraParserException;
+import org.mulgara.query.TuplesException;
+import org.mulgara.query.rdf.BlankNodeImpl;
+import org.mulgara.query.rdf.LiteralImpl;
+import org.mulgara.query.rdf.TripleImpl;
+import org.mulgara.query.rdf.URIReferenceImpl;
+import org.mulgara.resolver.spi.ResolverSession;
+import org.mulgara.util.IntFile;
+import org.mulgara.util.TempDir;
+
+/**
+ *
+ * @created 2010-08-09
+ * @author Paul Gearon
+ */
+class StatementParser implements Runnable, StatementSink {
+
+  /** Logger. */
+  private static final Logger logger = Logger.getLogger(StatementParser.class.getName());
+
+  /** Text prefix for blank nodes. */
+  @SuppressWarnings("unused")
+  private static final String BLANK_PREFIX = "_:";
+
+  /** The prefix that rdfa-java uses */
+  private static final String RJ_PREFIX = "_:node";
+
+  /**
+   * Maximum size for the {@link #triples} buffer. Any larger and the parser will
+   * block and drain.
+   */
+  private static final long BUFFER_SIZE = 1000;
+
+  /** Mapping between parsed blank node IDs and local node numbers. */
+  private IntFile blankNodeIdMap;
+
+  /** Mapping between blank node IDs and blank-node instances that haven't been stored. */
+  private Map<Long,BlankNodeImpl> blankNodeInstMap = new HashMap<Long,BlankNodeImpl>();
+
+  /** The resolverSession to create new internal identifiers for blank nodes. */
+  private ResolverSession resolverSession;
+
+  /** The data to be parsed and its metadata */
+  private final Content content;
+
+  /** The stream containing the data to be parsed. */
+  private InputStream inputStream;
+
+  /** The parser for the input stream. */
+  private XMLReader reader;
+
+  /** The base of the document. */
+  private URI base;
+
+  /** Resolves relative URIs and IRIs to absolute URIs/IRIs */
+  private BasedResolver parseResolver;
+
+  /** The queue of triples generated by the parser. */
+  private LinkedList<Triple> triples = new LinkedList<Triple>();
+
+  /** The number of parsed statements */
+  private long statementCount = 0;
+
+  /** Indicates that parsing is complete */
+  private boolean finished = false;
+
+  /** Used to asynchronously indicate an exception. */
+  private Throwable exception = null;
+
+  /**
+   * Sets up the sink to start receiving triples.
+   * @param content Contains the data for parsing and its metadata.
+   * @param resolverSession Access to the database for inserting data.
+   */
+  StatementParser(Content content, ResolverSession resolverSession) throws NotModifiedException, TuplesException {
+    if (content == null) throw new IllegalArgumentException("Null \"content\" parameter");
+    if (resolverSession == null) throw new IllegalArgumentException("Null \"resolverSession\" parameter");
+
+    this.content = content;
+    this.resolverSession = resolverSession;
+    try {
+      this.blankNodeIdMap = IntFile.open(TempDir.createTempFile("rdfaidmap", null), true);
+      this.inputStream = content.newInputStream();
+      this.base = content.getURI();
+      parseResolver = new BasedResolver(content.getURIString());
+      reader = ParserFactory.createReaderForFormat(this, getType(content), parseResolver);
+    } catch (Exception e) {
+      throw new TuplesException("Unable to obtain input stream from " + content.getURI(), e);
+    }
+  }
+
+  /**
+   * @return the number of statements parsed so far
+   */
+  synchronized long getStatementCount() {
+    return statementCount;
+  }
+
+  /**
+   * @return the total number of statements in the file
+   */
+  synchronized long waitForStatementTotal() throws TuplesException {
+    while (!finished) {
+      checkForException();
+
+      // ignoring all incoming data
+      triples.clear();
+      blankNodeInstMap.clear();
+      notifyAll();
+
+      try {
+        wait();
+      } catch (InterruptedException ex) {
+        throw new TuplesException("Abort");
+      }
+    }
+    checkForException();
+    return statementCount;
+  }
+
+  public void run() {
+    Throwable t = null;
+
+    try {
+      reader.parse(new InputSource(inputStream));
+      if (logger.isDebugEnabled()) logger.debug("Parsed RDFa on " + content.getURI());
+      return;
+    } catch (Throwable th) {
+      th.printStackTrace();
+      t = th;
+    } finally {
+      synchronized (this) {
+        if (t != null) exception = t;
+        finished = true;
+        notifyAll();
+      }
+    }
+
+    if (logger.isDebugEnabled()) logger.debug("Exception while parsing RDFa", exception);
+  }
+
+  public void start() {
+    if (logger.isDebugEnabled()) logger.debug("Started RDFa document");
+  }
+
+  public void end() {
+    if (logger.isDebugEnabled()) logger.debug("End RDFa document");
+    finished = true;
+    synchronized (this) {
+      notifyAll();
+    }
+  }
+
+  public void addPrefix(String prefix, String uri) {
+    if (logger.isDebugEnabled()) logger.debug("@prefix " + prefix + ": <" + uri + "> .");
+  }
+
+  public void setBase(String base) {
+    try {
+      if (base != null) parseResolver.setBase(base);
+    } catch (IllegalArgumentException e) {
+      logger.warn("Invalid base in RDFa file: " + base);
+    }
+  }
+
+  /**
+   * Adds an triple with a Literal as the object.
+   * @param subject string form of the subject.
+   * @param predicate string form of the predicate.
+   * @param lex The lexical form of the literal in the object.
+   * @param lang The language code of the literal in the object. May be <code>null</code>.
+   * @param datatype The datatype of the literal in the object. May be <code>null</code>.
+   */
+  public void addLiteral(String subject, String predicate, String lex, String lang, String datatype) {
+    try {
+      enqueue((SubjectNode)toNode(subject), (PredicateNode)toNode(predicate), toLiteral(lex, lang, datatype));
+    } catch (MulgaraParserException e) {
+      logger.error("Unable to parse. " + e.getMessage());
+      return;
+    }
+  }
+
+
+  /**
+   * Adds an triple with a URI or blank node as the object.
+   * @param subject string form of the subject.
+   * @param predicate string form of the predicate.
+   * @param object string form of the object.
+   */
+  public void addObject(String subject, String predicate, String object) {
+    try {
+      enqueue((SubjectNode)toNode(subject), (PredicateNode)toNode(predicate), (ObjectNode)toNode(object));
+    } catch (MulgaraParserException e) {
+      logger.error("Unable to parse. " + e.getMessage());
+      return;
+    }
+  }
+
+
+  /**
+   * Add a parsed triple to the queue.
+   * @param subjectNode The subject of the triple.
+   * @param predicateNode The predicate of the triple.
+   * @param objectNode The object of the triple.
+   */
+  void enqueue(SubjectNode subjectNode, PredicateNode predicateNode, ObjectNode objectNode) {
+    if (logger.isDebugEnabled()) {
+      logger.debug("Parsed " + subjectNode + " " + predicateNode + " " + objectNode + " from " + content.getURI());
+    }
+
+    synchronized (this) {
+      // Wait for the triples buffer to drain if it's too full
+      while (triples.size() >= BUFFER_SIZE) {
+        try {
+          wait();
+        } catch (InterruptedException ex) {
+          throw new RuntimeException("Abort");
+        }
+      }
+
+      // Buffer the statement
+      triples.addLast(new TripleImpl(subjectNode, predicateNode, objectNode));
+      statementCount++;
+      notifyAll();
+    }
+  }
+
+  /**
+   * Convert and validate an AST object into a node.
+   *
+   * @param text The text of the node that was parsed.
+   * @return a {@link Node} formed from the text
+   * @throws MulgaraParserException An unhandled element was encountered.
+   */
+  private Node toNode(String text) throws MulgaraParserException {
+    if (text == null) return new URIReferenceImpl(base);
+
+    if (text.startsWith(RJ_PREFIX)) return getBlankNode(text);
+    return toUri(text);
+  }
+
+  /**
+   * Creates a URIReference out of a string.
+   * @param text The string to convert.
+   * @return A new URIReference containing the URI from the string.
+   * @throws MulgaraParserException The text was not a valid URI.
+   */
+  private Node toUri(String text) throws MulgaraParserException {
+    try {
+      return new URIReferenceImpl(new URI(text));
+    } catch (URISyntaxException e) {
+      throw new MulgaraParserException("Invalid URI: " + text, e);
+    }
+  }
+
+  /**
+   * Create a blank node from a URI with a blank node form.
+   *
+   * @param n The node to convert to an anonymous node.
+   * @return An anonymous node that the node maps to.
+   */
+  private BlankNode getBlankNode(String n) throws MulgaraParserException {
+    long anonId;
+    try {
+      anonId = Long.parseLong(n.substring(RJ_PREFIX.length()));
+    } catch (NumberFormatException nfe) {
+      throw new MulgaraParserException("Invalid blank node: " + n);
+    }
+    if (anonId < 0) throw new MulgaraParserException("Inexpected blank node format: " + n);
+
+    synchronized (this) {
+      // look up the id in the blank node map
+      long internalId = blankNodeIdMap.getLong(anonId);
+
+      // check if the node was found
+      BlankNodeImpl blankNode;
+      if (internalId == 0) {
+        blankNode = blankNodeInstMap.get(anonId);
+        if (blankNode == null) {
+          blankNode = new BlankNodeImpl();
+          blankNodeInstMap.put(anonId, blankNode);
+        }
+      } else {
+        // Found the ID, so need to recreate the anonymous resource for it
+        blankNode = new BlankNodeImpl(internalId);
+      }
+
+      return blankNode;
+    }
+  }
+
+  /**
+   * Creates a literal out of three components.
+   * @param text The lexical value of the literal.
+   * @param lang The language code of the literal, or <code>null</code> if not an
+   *        untyped literal with a language code.
+   * @param datatype The URI of the datatype of the literal, or <code>null</code>
+   *        if an untyped literal.
+   * @return A new literal.
+   */
+  Literal toLiteral(String text, String lang, String datatype) throws MulgaraParserException {
+    if (datatype != null) {
+      assert lang == null;
+      try {
+        return new LiteralImpl(text, new URI(datatype));
+      } catch (URISyntaxException e) {
+        throw new MulgaraParserException("Invalid datatype on literal: " + text + "^^" + datatype, e);
+      }
+    }
+    if (lang != null) return new LiteralImpl(text, lang);
+    return new LiteralImpl(text);
+  }
+
+  /**
+   * If an exception occurred in the parser, throws a TuplesException that
+   * wraps the exception.
+   */
+  private void checkForException() throws TuplesException {
+    if (exception != null) {
+      throw new TuplesException("Exception while reading " + content.getURIString(), exception);
+    }
+  }
+
+
+  /**
+   * Returns a new triple from the queue or null if there are no more triples.
+   * @return The oldest triple in the queue.
+   */
+  synchronized Triple getTriple() throws TuplesException {
+    while (triples.isEmpty()) {
+      checkForException();
+      if (finished) return null;
+
+      // Wait for more triples.
+      try {
+        wait();
+      } catch (InterruptedException ex) {
+        throw new TuplesException("Abort");
+      }
+    }
+    checkForException();
+    allocateBlankNodes();
+
+    notifyAll();
+    return triples.removeFirst();
+  }
+
+  /**
+   * Allocate the ids for the new blank nodes.
+   */
+  private synchronized void allocateBlankNodes() {
+    try {
+      for (Map.Entry<Long, BlankNodeImpl> entry : blankNodeInstMap.entrySet()) {
+        resolverSession.localize(entry.getValue());     // This sets and returns the node ID
+        blankNodeIdMap.putLong(entry.getKey(), entry.getValue().getNodeId());
+      }
+      blankNodeInstMap.clear();
+
+    } catch (Exception le) {
+      throw new RuntimeException("Unable to create blank node", le);
+    }
+  }
+
+  /**
+   * Stops the thread.
+   */
+  synchronized void terminate() {
+    Thread.currentThread().interrupt();
+    triples.clear();
+    notifyAll();
+  }
+
+  /**
+   * Tests if the parse is complete.
+   * @return <code>true</code> if parsing is over.
+   */
+  synchronized boolean isFinished() {
+    return finished;
+  }
+
+  /**
+   * Determine the type of parsing to be done, based on the content.
+   * @param c The Content to be parsed.
+   * @return Either <code>Format.XHTML</code> or <code>Format.HTML</code>.
+   * @throws NotModifiedException 
+   */
+  private Format getType(Content c) throws NotModifiedException {
+    MimeType t = c.getContentType();
+    if (RdfaContentHandler.XHTML_MIME.match(t)) return Format.XHTML;
+    if (RdfaContentHandler.HTML_MIME.match(t)) return Format.HTML;
+    logger.warn("Guessing HTML for unknown MIME type: " + t);
+    return Format.HTML;
+  }
+}

Added: trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/package.html
===================================================================
--- trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/package.html	                        (rev 0)
+++ trunk/src/jar/content-rdfa/java/org/mulgara/content/rdfa/package.html	2010-08-20 02:41:38 UTC (rev 1973)
@@ -0,0 +1,13 @@
+<html>
+<head>
+<title>RDFa Content Handler</title>
+</head>
+<body>
+<h1>
+RDFa content handler.
+</h1>
+<p>
+This package is a service provider for parsing HTML/XHTML documents with embedded RDFa data. It uses the java-rdfa library by Damian Steer. See: <a href="http://github.com/shellac/java-rdfa">http://github.com/shellac/java-rdfa</a>
+</p>
+</body>
+</html>