[Mulgara-svn] r942 - in trunk/src/jar: resolver-relational/java/org/mulgara/resolver/relational store-stringpool/java/org/mulgara/store/stringpool/xa

pag at mulgara.org pag at mulgara.org
Thu May 15 06:41:14 UTC 2008


Author: pag
Date: 2008-05-14 23:41:13 -0700 (Wed, 14 May 2008)
New Revision: 942

Modified:
   trunk/src/jar/resolver-relational/java/org/mulgara/resolver/relational/RelationalResolverUnitTest.java
   trunk/src/jar/store-stringpool/java/org/mulgara/store/stringpool/xa/SPObjectFactoryImpl.java
   trunk/src/jar/store-stringpool/java/org/mulgara/store/stringpool/xa/SPStringImpl.java
Log:
Added in language codes for untyped literals

Modified: trunk/src/jar/resolver-relational/java/org/mulgara/resolver/relational/RelationalResolverUnitTest.java
===================================================================
--- trunk/src/jar/resolver-relational/java/org/mulgara/resolver/relational/RelationalResolverUnitTest.java	2008-05-15 06:40:37 UTC (rev 941)
+++ trunk/src/jar/resolver-relational/java/org/mulgara/resolver/relational/RelationalResolverUnitTest.java	2008-05-15 06:41:13 UTC (rev 942)
@@ -974,13 +974,13 @@
       answer.beforeFirst();
       assertTrue(answer.next());
       assertEquals(new URIReferenceImpl(new URI("http://www.conference.org/conf02004/paper#Paper1")), answer.getObject(0));
-      assertEquals(new LiteralImpl("Titel of the Paper: Apes and their Friends"), answer.getObject(1));
+      assertEquals(new LiteralImpl("Titel of the Paper: Apes and their Friends", "en"), answer.getObject(1));
       assertTrue(answer.next());
       assertEquals(new URIReferenceImpl(new URI("http://www.conference.org/conf02004/paper#Paper2")), answer.getObject(0));
-      assertEquals(new LiteralImpl("Titel of the Paper: Bears like us too"), answer.getObject(1));
+      assertEquals(new LiteralImpl("Titel of the Paper: Bears like us too", "en"), answer.getObject(1));
       assertTrue(answer.next());
       assertEquals(new URIReferenceImpl(new URI("http://www.conference.org/conf02004/paper#Paper3")), answer.getObject(0));
-      assertEquals(new LiteralImpl("Titel of the Paper: Why Cats?"), answer.getObject(1));
+      assertEquals(new LiteralImpl("Titel of the Paper: Why Cats?", "en"), answer.getObject(1));
       assertFalse(answer.next());
       answer.close();
         
@@ -1062,7 +1062,7 @@
 
       answer.beforeFirst();
       assertTrue(answer.next());
-      assertEquals(new LiteralImpl("Titel of the Paper: Why Cats?"), answer.getObject(0));
+      assertEquals(new LiteralImpl("Titel of the Paper: Why Cats?", "en"), answer.getObject(0));
       assertFalse(answer.next());
       answer.close();
         
@@ -1187,15 +1187,15 @@
       assertTrue(answer.next());
       assertEquals(new LiteralImpl("Apes and Bears"), answer.getObject(0));
       assertEquals(new LiteralImpl("Do we like Bears?"), answer.getObject(1));
-      assertEquals(new LiteralImpl("Titel of the Paper: Bears like us too"), answer.getObject(2));
+      assertEquals(new LiteralImpl("Titel of the Paper: Bears like us too", "en"), answer.getObject(2));
       assertTrue(answer.next());
       assertEquals(new LiteralImpl("Apes and Bears"), answer.getObject(0));
       assertEquals(new LiteralImpl("We like Apes"), answer.getObject(1));
-      assertEquals(new LiteralImpl("Titel of the Paper: Apes and their Friends"), answer.getObject(2));
+      assertEquals(new LiteralImpl("Titel of the Paper: Apes and their Friends", "en"), answer.getObject(2));
       assertTrue(answer.next());
       assertEquals(new LiteralImpl("Cats and Donkeys"), answer.getObject(0));
       assertEquals(new LiteralImpl("I prefer Donkeys"), answer.getObject(1));
-      assertEquals(new LiteralImpl("Titel of the Paper: Why Cats?"), answer.getObject(2));
+      assertEquals(new LiteralImpl("Titel of the Paper: Why Cats?", "en"), answer.getObject(2));
       assertFalse(answer.next());
       answer.close();
         
@@ -1244,13 +1244,13 @@
       answer.beforeFirst();
       assertTrue(answer.next());
       assertEquals(new LiteralImpl("Do we like Bears?"), answer.getObject(0));
-      assertEquals(new LiteralImpl("Titel of the Paper: Bears like us too"), answer.getObject(1));
+      assertEquals(new LiteralImpl("Titel of the Paper: Bears like us too", "en"), answer.getObject(1));
       assertTrue(answer.next());
       assertEquals(new LiteralImpl("I prefer Donkeys"), answer.getObject(0));
-      assertEquals(new LiteralImpl("Titel of the Paper: Why Cats?"), answer.getObject(1));
+      assertEquals(new LiteralImpl("Titel of the Paper: Why Cats?", "en"), answer.getObject(1));
       assertTrue(answer.next());
       assertEquals(new LiteralImpl("We like Apes"), answer.getObject(0));
-      assertEquals(new LiteralImpl("Titel of the Paper: Apes and their Friends"), answer.getObject(1));
+      assertEquals(new LiteralImpl("Titel of the Paper: Apes and their Friends", "en"), answer.getObject(1));
       assertFalse(answer.next());
       answer.close();
         
@@ -1303,16 +1303,16 @@
 
       answer.beforeFirst();
       assertTrue(answer.next());
-      assertEquals(new LiteralImpl("Titel of the Paper: Apes and their Friends"), answer.getObject(0));
+      assertEquals(new LiteralImpl("Titel of the Paper: Apes and their Friends", "en"), answer.getObject(0));
       assertEquals(new LiteralImpl("Smith"), answer.getObject(1));
       assertTrue(answer.next());
-      assertEquals(new LiteralImpl("Titel of the Paper: Bears like us too"), answer.getObject(0));
+      assertEquals(new LiteralImpl("Titel of the Paper: Bears like us too", "en"), answer.getObject(0));
       assertEquals(new LiteralImpl("Carson"), answer.getObject(1));
       assertTrue(answer.next());
-      assertEquals(new LiteralImpl("Titel of the Paper: Bears like us too"), answer.getObject(0));
+      assertEquals(new LiteralImpl("Titel of the Paper: Bears like us too", "en"), answer.getObject(0));
       assertEquals(new LiteralImpl("Smith"), answer.getObject(1));
       assertTrue(answer.next());
-      assertEquals(new LiteralImpl("Titel of the Paper: Why Cats?"), answer.getObject(0));
+      assertEquals(new LiteralImpl("Titel of the Paper: Why Cats?", "en"), answer.getObject(0));
       assertEquals(new LiteralImpl("Carson"), answer.getObject(1));
       assertFalse(answer.next());
       answer.close();

Modified: trunk/src/jar/store-stringpool/java/org/mulgara/store/stringpool/xa/SPObjectFactoryImpl.java
===================================================================
--- trunk/src/jar/store-stringpool/java/org/mulgara/store/stringpool/xa/SPObjectFactoryImpl.java	2008-05-15 06:40:37 UTC (rev 941)
+++ trunk/src/jar/store-stringpool/java/org/mulgara/store/stringpool/xa/SPObjectFactoryImpl.java	2008-05-15 06:41:13 UTC (rev 942)
@@ -29,7 +29,6 @@
 // Java 2 standard packages
 import java.nio.ByteBuffer;
 import java.net.URI;
-import java.util.Date;
 
 // JRDF
 import org.jrdf.graph.Literal;
@@ -89,20 +88,22 @@
         );
       }
 
-      // This must be a typed literal.
+      // This is either a typed literal, or it is untyped with a language code.
+      // Check for the start of a type URI
       int index = encodedString.lastIndexOf("\"^^<");
       if (index == -1) {
-        throw new IllegalArgumentException(
-            "Could not parse encoded string (String?): \"" + encodedString +
-            "\""
+        // must be a language-coded untyped literal
+        index = encodedString.lastIndexOf("\"@");
+        if (index == -1) throw new IllegalArgumentException("Could not parse encoded string (String?): \"" + encodedString + "\"");
+        String lang = encodedString.substring(index + 2);  // 2 for the quote and @ characters
+        return SPStringImpl.newSPObject(
+            AbstractSPObject.unescapeString(encodedString.substring(1, index)), lang
         );
       }
 
+      // must be a typed literal
       if (encodedString.charAt(len - 1) != '>') {
-        throw new IllegalArgumentException(
-            "Bad encodedString format (Typed literal?): \"" + encodedString +
-            "\""
-        );
+        throw new IllegalArgumentException("Bad encodedString format (Typed literal?): \"" + encodedString + "\"");
       }
 
       return newSPTypedLiteral(
@@ -197,16 +198,6 @@
     if (rdfNode instanceof Literal) {
       Literal literal = (Literal)rdfNode;
 
-      // TODO language codes are currently ignored.
-      /*
-      if (literal.getLanguage() != null) {
-        throw new IllegalArgumentException(
-          "Language codes are not yet supported: \"" + literal.getLanguage() +
-          "\" (" + literal + ")"
-        );
-      }
-      */
-
       URI typeURI = literal.getDatatypeURI();
       String lexicalForm = literal.getLexicalForm();
 
@@ -216,7 +207,7 @@
       }
 
       // Create an SPObject representing an untyped literal.
-      return SPStringImpl.newSPObject(lexicalForm);
+      return SPStringImpl.newSPObject(lexicalForm, literal.getLanguage());
     }
 
     if (rdfNode instanceof URIReference) {
@@ -224,8 +215,7 @@
     }
 
     throw new IllegalArgumentException(
-        "Unsupported jrdf node type: " + rdfNode + " (" + rdfNode.getClass() +
-        ")"
+        "Unsupported jrdf node type: " + rdfNode + " (" + rdfNode.getClass() + ")"
     );
   }
 

Modified: trunk/src/jar/store-stringpool/java/org/mulgara/store/stringpool/xa/SPStringImpl.java
===================================================================
--- trunk/src/jar/store-stringpool/java/org/mulgara/store/stringpool/xa/SPStringImpl.java	2008-05-15 06:40:37 UTC (rev 941)
+++ trunk/src/jar/store-stringpool/java/org/mulgara/store/stringpool/xa/SPStringImpl.java	2008-05-15 06:41:13 UTC (rev 942)
@@ -60,22 +60,40 @@
  */
 public final class SPStringImpl extends AbstractSPObject implements SPString {
 
+  @SuppressWarnings("unused")
   private final static Logger logger = Logger.getLogger(SPStringImpl.class);
 
+  private static final int LANG_SEPARATOR = '\0';
+
   private String str;
 
+  private String lang;
 
   SPStringImpl(String str) {
-    if (str == null) {
-      throw new IllegalArgumentException("Null \"str\" parameter");
-    }
+    if (str == null) throw new IllegalArgumentException("Null \"str\" parameter");
+    this.str = str;
+    lang = "";
+  }
 
+
+  SPStringImpl(String str, String lang) {
+    if (str == null) throw new IllegalArgumentException("Null \"str\" parameter");
+    if (lang == null) lang = "";
     this.str = str;
+    this.lang = lang;
   }
 
 
   SPStringImpl(ByteBuffer data) {
-    this.str = CHARSET.decode(data).toString();
+    String fullStr = CHARSET.decode(data).toString();
+    int sep = fullStr.indexOf(LANG_SEPARATOR);
+    if (sep < 0) {
+      str = fullStr;
+      lang = "";
+    } else {
+      lang = fullStr.substring(0, sep);
+      str = fullStr.substring(sep + 1);
+    }
   }
 
 
@@ -84,11 +102,24 @@
   }
 
 
+  static SPObject newSPObject(String str, String language) {
+    if (language == null || language.length() == 0) return new SPStringImpl(str);
+    if (language.charAt(0) == '@') language = language.substring(1);
+    if (!checkLangChars(language)) throw new IllegalArgumentException("Invalid language code characters: " + language);
+    return new SPStringImpl(str, language);
+  }
+
+
   public String getLexicalForm() {
     return str;
   }
 
 
+  public String getLanguageCode() {
+    return lang;
+  }
+
+
   /* from SPObject interface. */
 
   public TypeCategory getTypeCategory() {
@@ -97,7 +128,9 @@
 
 
   public ByteBuffer getData() {
-    return CHARSET.encode(str);
+    StringBuilder sb = new StringBuilder(lang);
+    sb.appendCodePoint(LANG_SEPARATOR).append(str);
+    return CHARSET.encode(sb.toString());
   }
 
 
@@ -110,11 +143,14 @@
     StringBuffer sb = new StringBuffer(str.length() + 8);
     sb.append(str);
     escapeString(sb);
-    return sb.insert(0, '"').append('"').toString();
+    sb.insert(0, '"').append('"');
+    if (lang.length() > 0) sb.append("@").append(lang);
+    return sb.toString();
   }
 
 
   public org.jrdf.graph.Node getRDFNode() {
+    if (lang.length() > 0) return new LiteralImpl(str, lang);
     return new LiteralImpl(str);
   }
 
@@ -126,6 +162,10 @@
     int c = super.compareTo(o);
     if (c != 0) return c;
 
+    // Compare the language Strings.
+    c = lang.compareToIgnoreCase(((SPStringImpl)o).lang);
+    if (c != 0) return c;
+    
     // Compare the Strings.
     return str.compareToIgnoreCase(((SPStringImpl)o).str);
   }
@@ -134,21 +174,70 @@
   /* from Object. */
 
   public int hashCode() {
-    return str.hashCode();
+    return str.hashCode() + lang.hashCode() * 13;
   }
 
 
   public boolean equals(Object obj) {
     // Check for null.
     if (obj == null) return false;
+    // short circuit if it is the same object
+    if (this == obj) return true;
+    // check for type
+    if (!(obj instanceof SPStringImpl)) return false;
 
-    try {
-      return str.equals(((SPStringImpl)obj).str);
-    } catch (ClassCastException ex) {
-      // obj was not an SPStringImpl.
-      return false;
+    SPStringImpl other = (SPStringImpl)obj;
+    return str.equals(other.str) && lang.equals(other.lang);
+  }
+
+  /**
+   * Test if a language code is valid. Language codes are guaranteed to be ASCII
+   * and must meet the pattern: [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
+   * Also permits empty language tags to pass.
+   * @param lang The language string to test.
+   * @return <code>true</code> if the string passes the test, or <code>false</code> if it doesn't.
+   */
+  public static boolean checkLangChars(String lang) {
+    // a flag to indicate when the first '-' character has been passed
+    boolean extension = false;
+    // check each character in the string
+    for (int i = 0; i < lang.length(); i++) {
+      char current = lang.charAt(i);
+      // check for starting an extension
+      if (current == '-') {
+        // must have more characters
+        if (++i == lang.length()) return false;
+        extension = true;
+        // the very next character can only be a letter or digit, and not a '-'
+        if (!isAsciiLetterDigit(lang.charAt(i))) return false;
+        continue;
+      }
+      // before the first '-' character, only letters are accepted.
+      if (extension) {
+        if (!isAsciiLetterDigit(current)) return false;
+      } else {
+        if (!isAsciiLetter(current)) return false;
+      }
     }
+    return true;
   }
 
+  /**
+   * Check if a character is an ASCII letter or digit.
+   * @param c The character to test.
+   * @return <code>true</code> if the character meets the pattern [a-zA-Z0-9]
+   */
+  private static boolean isAsciiLetterDigit(char c) {
+    return (c > 'A' && c < 'Z') || (c > 'a' && c < 'z') || (c > '0' && c < '9');
+  }
+
+  /**
+   * Check if a character is an ASCII letter.
+   * @param c The character to test.
+   * @return <code>true</code> if the character meets the pattern [a-zA-Z]
+   */
+  private static boolean isAsciiLetter(char c) {
+    return (c > 'A' && c < 'Z') || (c > 'a' && c < 'z');
+  }
 }
 




More information about the Mulgara-svn mailing list