[Mulgara-svn] r1787 - in trunk/src/jar: content-n3/java/org/mulgara/content/n3 querylang/java/org/mulgara/protocol
pag at mulgara.org
pag at mulgara.org
Sat Sep 12 05:41:25 UTC 2009
Author: pag
Date: 2009-09-11 22:41:24 -0700 (Fri, 11 Sep 2009)
New Revision: 1787
Modified:
trunk/src/jar/content-n3/java/org/mulgara/content/n3/N3WriterUnitTest.java
trunk/src/jar/querylang/java/org/mulgara/protocol/StreamedN3Answer.java
Log:
Added the option to set the charset encoding, with a default of UTF-8
Modified: trunk/src/jar/content-n3/java/org/mulgara/content/n3/N3WriterUnitTest.java
===================================================================
--- trunk/src/jar/content-n3/java/org/mulgara/content/n3/N3WriterUnitTest.java 2009-09-12 05:39:37 UTC (rev 1786)
+++ trunk/src/jar/content-n3/java/org/mulgara/content/n3/N3WriterUnitTest.java 2009-09-12 05:41:24 UTC (rev 1787)
@@ -26,6 +26,8 @@
import java.io.StringWriter;
import java.net.URI;
import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
// Java 2 enterprise packages
import javax.activation.MimeType;
@@ -49,31 +51,93 @@
*/
public class N3WriterUnitTest extends TestCase {
/**
- * Test {@link N3Writer} writing.
+ * Test {@link N3Writer} writing on a basic statement with a plain literal.
*
* @throws Exception if there's an error running the test (note that if the test
* merely fails, this should <em>not</em> throw any exception
*/
- public void test1() throws Exception {
- // basic statement with plain literal
+ public void testBasic() throws Exception {
runTest("<foo:bar> <foo:baz> \"42\" .\n", null);
- // basic statement with blank-node and datatyped literal
+ }
+
+ /**
+ * Test {@link N3Writer} writing on a basic statement with blank-node and datatyped literal.
+ *
+ * @throws Exception if there's an error running the test (note that if the test
+ * merely fails, this should <em>not</em> throw any exception
+ */
+ public void testBasicBlankDataTyped() throws Exception {
runTest("_:node1000001 <foo:baz> \"42\"^^<xsd:int> .\n", null);
- // literal and uri with non-ascii characters
+ }
+
+ /**
+ * Test {@link N3Writer} writing on a statement with non-ascii characters.
+ *
+ * @throws Exception if there's an error running the test (note that if the test
+ * merely fails, this should <em>not</em> throw any exception
+ */
+ public void testNonAscii() throws Exception {
runTest("<foo:i18n:øé> <foo:baz> \"Some text with \\\" in it, and 日本 chars, and \\u00E0 \" .",
"<foo:i18n:%C3%B8%C3%A9> <foo:baz> \"Some text with \\\" in it, and \\u65E5\\u672C chars, and \\u00E0 \" .\n");
+ }
+
+
+ /**
+ * Test {@link N3Writer} writing with escaped characters.
+ *
+ * @throws Exception if there's an error running the test (note that if the test
+ * merely fails, this should <em>not</em> throw any exception
+ */
+ public void testEscapedChars() throws Exception {
// literal with newlines, ff, etc
runTest("<foo:bar> <foo:baz> \"Some text with \n, \r, \f, \u0000 in it\" .",
"<foo:bar> <foo:baz> \"Some text with \\n, \\r, \\u000C, \\u0000 in it\" .\n");
runTest("<foo:bar> <foo:baz> \"Some text with \\n, \\r, \\f, \\u0000 in it\" .",
"<foo:bar> <foo:baz> \"Some text with \\n, \\r, \\u000C, \\u0000 in it\" .\n");
+ }
+
+ /**
+ * Test {@link N3Writer} writing statements with multiple blank nodes and a language tag.
+ *
+ * @throws Exception if there's an error running the test (note that if the test
+ * merely fails, this should <em>not</em> throw any exception
+ */
+ public void testMultiBlank() throws Exception {
// multiple blank-nodes, and language-tag
- runTest("_:bn1 <baz:baz> _:bn2 .\n" +
- "_:bn2 <bar:bar> <foo:foo> .\n" +
- "<foo:foo> <dc:title> \"hello\"@en .",
- "_:node1000002 <baz:baz> _:node1000001 .\n" +
- "_:node1000001 <bar:bar> <foo:foo> .\n" +
- "<foo:foo> <dc:title> \"hello\"@en .\n");
+ String n3 = "_:bn1 <baz:baz> _:bn2 .\n" +
+ "_:bn2 <bar:bar> <foo:foo> .\n" +
+ "<foo:foo> <dc:title> \"hello\"@en .";
+ // Obtain a resolver session
+ ResolverSession resolverSession = new TestResolverSession();
+
+ // create the statements
+ Statements statements = new N3Statements(new StringContent(n3), resolverSession);
+
+ // test
+ StringWriter out = new StringWriter();
+ new N3Writer().write(statements, resolverSession, out);
+
+ // don't know what blank nodes will be returned, so we need just need to check that they are used consistently
+ Matcher m = Pattern.compile("(_:node[^ ]*) <baz:baz> (_:node[^ ]*)").matcher(out.toString());
+ assertTrue(m.find());
+ assertEquals(2, m.groupCount());
+ String bn1 = m.group(1);
+ String bn2 = m.group(2);
+ assertNotSame(bn1, bn2);
+
+ String exp = bn1 + " <baz:baz> " + bn2 + " .\n" +
+ bn2 + " <bar:bar> <foo:foo> .\n" +
+ "<foo:foo> <dc:title> \"hello\"@en .\n";
+ assertEquals(exp != null ? exp : n3, out.toString());
+ }
+
+ /**
+ * Test {@link N3Writer} writing with multiple blank nodes using internal IDs.
+ *
+ * @throws Exception if there's an error running the test (note that if the test
+ * merely fails, this should <em>not</em> throw any exception
+ */
+ public void testInternalBlank() throws Exception {
// multiple blank-nodes using internal-ids (numbers)
runTest("_:42 <baz:baz> _:987 .\n" +
"_:987 <bar:bar> <foo:foo> .\n",
Modified: trunk/src/jar/querylang/java/org/mulgara/protocol/StreamedN3Answer.java
===================================================================
--- trunk/src/jar/querylang/java/org/mulgara/protocol/StreamedN3Answer.java 2009-09-12 05:39:37 UTC (rev 1786)
+++ trunk/src/jar/querylang/java/org/mulgara/protocol/StreamedN3Answer.java 2009-09-12 05:41:24 UTC (rev 1787)
@@ -16,11 +16,13 @@
package org.mulgara.protocol;
-import java.io.BufferedOutputStream;
+import java.io.OutputStreamWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
+import java.nio.charset.Charset;
+import org.apache.log4j.Logger;
import org.jrdf.graph.BlankNode;
import org.jrdf.graph.Literal;
import org.jrdf.graph.TypedNodeVisitable;
@@ -42,23 +44,44 @@
*/
public class StreamedN3Answer implements StreamedAnswer {
+ /** Logger. */
+ private final static Logger logger = Logger.getLogger(StreamedN3Answer.class);
+
/** The answer to convert to RDF/XML. */
private final GraphAnswer ans;
/** The writer to send the data to. */
private final PrintWriter p;
+ /** The charset encoding to use when writing to the output stream. */
+ static final String UTF8 = "UTF-8";
+
/**
* Constructs the object and prepares to writing.
* @param ans The answer to emit.
* @param s The stream to write the answer to.
*/
public StreamedN3Answer(Answer ans, OutputStream s) {
+ this(ans, s, UTF8);
+ }
+
+ /**
+ * Constructs the object and prepares to writing.
+ * @param ans The answer to emit.
+ * @param s The stream to write the answer to.
+ */
+ public StreamedN3Answer(Answer ans, OutputStream s, String charsetName) {
if (!(ans instanceof GraphAnswer)) throw new IllegalArgumentException("N3 constructor can only be constructed from a GraphAnswer");
this.ans = (GraphAnswer)ans;
assert ans.getVariables().length == 3;
- BufferedOutputStream out = new BufferedOutputStream(s);
- p = new PrintWriter(out);
+ Charset charset = null;
+ try {
+ charset = Charset.forName(charsetName);
+ } catch (Exception e) {
+ logger.error("Invalid charset. Using UTF-8: " + charsetName);
+ charset = Charset.forName(UTF8);
+ }
+ p = new PrintWriter(new OutputStreamWriter(s, charset));
}
/**
More information about the Mulgara-svn
mailing list