[Mulgara-svn] r1787 - in trunk/src/jar: content-n3/java/org/mulgara/content/n3 querylang/java/org/mulgara/protocol

pag at mulgara.org pag at mulgara.org
Sat Sep 12 05:41:25 UTC 2009


Author: pag
Date: 2009-09-11 22:41:24 -0700 (Fri, 11 Sep 2009)
New Revision: 1787

Modified:
   trunk/src/jar/content-n3/java/org/mulgara/content/n3/N3WriterUnitTest.java
   trunk/src/jar/querylang/java/org/mulgara/protocol/StreamedN3Answer.java
Log:
Added the option to set the charset encoding, with a default of UTF-8

Modified: trunk/src/jar/content-n3/java/org/mulgara/content/n3/N3WriterUnitTest.java
===================================================================
--- trunk/src/jar/content-n3/java/org/mulgara/content/n3/N3WriterUnitTest.java	2009-09-12 05:39:37 UTC (rev 1786)
+++ trunk/src/jar/content-n3/java/org/mulgara/content/n3/N3WriterUnitTest.java	2009-09-12 05:41:24 UTC (rev 1787)
@@ -26,6 +26,8 @@
 import java.io.StringWriter;
 import java.net.URI;
 import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 // Java 2 enterprise packages
 import javax.activation.MimeType;
@@ -49,31 +51,93 @@
  */
 public class N3WriterUnitTest extends TestCase {
   /**
-   * Test {@link N3Writer} writing.
+   * Test {@link N3Writer} writing on a basic statement with a plain literal.
    *
    * @throws Exception if there's an error running the test (note that if the test
    *                   merely fails, this should <em>not</em> throw any exception
    */
-  public void test1() throws Exception {
-    // basic statement with plain literal
+  public void testBasic() throws Exception {
     runTest("<foo:bar> <foo:baz> \"42\" .\n", null);
-    // basic statement with blank-node and datatyped literal
+  }
+
+  /**
+   * Test {@link N3Writer} writing on a basic statement with blank-node and datatyped literal.
+   *
+   * @throws Exception if there's an error running the test (note that if the test
+   *                   merely fails, this should <em>not</em> throw any exception
+   */
+  public void testBasicBlankDataTyped() throws Exception {
     runTest("_:node1000001 <foo:baz> \"42\"^^<xsd:int> .\n", null);
-    // literal and uri with non-ascii characters
+  }
+
+  /**
+   * Test {@link N3Writer} writing on a statement with non-ascii characters.
+   *
+   * @throws Exception if there's an error running the test (note that if the test
+   *                   merely fails, this should <em>not</em> throw any exception
+   */
+  public void testNonAscii() throws Exception {
     runTest("<foo:i18n:øé> <foo:baz> \"Some text with \\\" in it, and 日本 chars, and \\u00E0 \" .",
             "<foo:i18n:%C3%B8%C3%A9> <foo:baz> \"Some text with \\\" in it, and \\u65E5\\u672C chars, and \\u00E0 \" .\n");
+  }
+
+
+  /**
+   * Test {@link N3Writer} writing with escaped characters.
+   *
+   * @throws Exception if there's an error running the test (note that if the test
+   *                   merely fails, this should <em>not</em> throw any exception
+   */
+  public void testEscapedChars() throws Exception {
     // literal with newlines, ff, etc
     runTest("<foo:bar> <foo:baz> \"Some text with \n, \r, \f, \u0000 in it\" .",
             "<foo:bar> <foo:baz> \"Some text with \\n, \\r, \\u000C, \\u0000 in it\" .\n");
     runTest("<foo:bar> <foo:baz> \"Some text with \\n, \\r, \\f, \\u0000 in it\" .",
             "<foo:bar> <foo:baz> \"Some text with \\n, \\r, \\u000C, \\u0000 in it\" .\n");
+  }
+
+  /**
+   * Test {@link N3Writer} writing statements with multiple blank nodes and a language tag.
+   *
+   * @throws Exception if there's an error running the test (note that if the test
+   *                   merely fails, this should <em>not</em> throw any exception
+   */
+  public void testMultiBlank() throws Exception {
     // multiple blank-nodes, and language-tag
-    runTest("_:bn1 <baz:baz> _:bn2 .\n" +
-            "_:bn2 <bar:bar> <foo:foo> .\n" +
-            "<foo:foo> <dc:title> \"hello\"@en .",
-            "_:node1000002 <baz:baz> _:node1000001 .\n" +
-            "_:node1000001 <bar:bar> <foo:foo> .\n" +
-            "<foo:foo> <dc:title> \"hello\"@en .\n");
+    String n3 = "_:bn1 <baz:baz> _:bn2 .\n" +
+                "_:bn2 <bar:bar> <foo:foo> .\n" +
+                "<foo:foo> <dc:title> \"hello\"@en .";
+    // Obtain a resolver session
+    ResolverSession resolverSession = new TestResolverSession();
+
+    // create the statements
+    Statements statements = new N3Statements(new StringContent(n3), resolverSession);
+
+    // test
+    StringWriter out = new StringWriter();
+    new N3Writer().write(statements, resolverSession, out);
+
+    // don't know what blank nodes will be returned, so we need just need to check that they are used consistently
+    Matcher m = Pattern.compile("(_:node[^ ]*) <baz:baz> (_:node[^ ]*)").matcher(out.toString());
+    assertTrue(m.find());
+    assertEquals(2, m.groupCount());
+    String bn1 = m.group(1);
+    String bn2 = m.group(2);
+    assertNotSame(bn1, bn2);
+
+    String exp = bn1 + " <baz:baz> " + bn2 + " .\n" +
+                 bn2 + " <bar:bar> <foo:foo> .\n" +
+                 "<foo:foo> <dc:title> \"hello\"@en .\n";    
+    assertEquals(exp != null ? exp : n3, out.toString());
+  }
+
+  /**
+   * Test {@link N3Writer} writing with multiple blank nodes using internal IDs.
+   *
+   * @throws Exception if there's an error running the test (note that if the test
+   *                   merely fails, this should <em>not</em> throw any exception
+   */
+  public void testInternalBlank() throws Exception {
     // multiple blank-nodes using internal-ids (numbers)
     runTest("_:42 <baz:baz> _:987 .\n" +
             "_:987 <bar:bar> <foo:foo> .\n",

Modified: trunk/src/jar/querylang/java/org/mulgara/protocol/StreamedN3Answer.java
===================================================================
--- trunk/src/jar/querylang/java/org/mulgara/protocol/StreamedN3Answer.java	2009-09-12 05:39:37 UTC (rev 1786)
+++ trunk/src/jar/querylang/java/org/mulgara/protocol/StreamedN3Answer.java	2009-09-12 05:41:24 UTC (rev 1787)
@@ -16,11 +16,13 @@
 
 package org.mulgara.protocol;
 
-import java.io.BufferedOutputStream;
+import java.io.OutputStreamWriter;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.PrintWriter;
+import java.nio.charset.Charset;
 
+import org.apache.log4j.Logger;
 import org.jrdf.graph.BlankNode;
 import org.jrdf.graph.Literal;
 import org.jrdf.graph.TypedNodeVisitable;
@@ -42,23 +44,44 @@
  */
 public class StreamedN3Answer implements StreamedAnswer {
 
+  /** Logger. */
+  private final static Logger logger = Logger.getLogger(StreamedN3Answer.class);
+
   /** The answer to convert to RDF/XML. */
   private final GraphAnswer ans;
 
   /** The writer to send the data to. */
   private final PrintWriter p;
 
+  /** The charset encoding to use when writing to the output stream. */
+  static final String UTF8 = "UTF-8";
+
   /**
    * Constructs the object and prepares to writing.
    * @param ans The answer to emit.
    * @param s The stream to write the answer to.
    */
   public StreamedN3Answer(Answer ans, OutputStream s) {
+    this(ans, s, UTF8);
+  }
+
+  /**
+   * Constructs the object and prepares to writing.
+   * @param ans The answer to emit.
+   * @param s The stream to write the answer to.
+   */
+  public StreamedN3Answer(Answer ans, OutputStream s, String charsetName) {
     if (!(ans instanceof GraphAnswer)) throw new IllegalArgumentException("N3 constructor can only be constructed from a GraphAnswer");
     this.ans = (GraphAnswer)ans;
     assert ans.getVariables().length == 3;
-    BufferedOutputStream out = new BufferedOutputStream(s);
-    p = new PrintWriter(out);
+    Charset charset = null;
+    try {
+      charset = Charset.forName(charsetName);
+    } catch (Exception e) {
+      logger.error("Invalid charset. Using UTF-8: " + charsetName);
+      charset = Charset.forName(UTF8);
+    }
+    p = new PrintWriter(new OutputStreamWriter(s, charset));
   }
 
   /**




More information about the Mulgara-svn mailing list