[Mulgara-svn] r1531 - trunk/src/jar/query/java/org/mulgara/query/filter

pag at mulgara.org pag at mulgara.org
Fri Feb 20 00:08:07 UTC 2009


Author: pag
Date: 2009-02-19 16:08:06 -0800 (Thu, 19 Feb 2009)
New Revision: 1531

Modified:
   trunk/src/jar/query/java/org/mulgara/query/filter/RegexFn.java
   trunk/src/jar/query/java/org/mulgara/query/filter/RegexFnUnitTest.java
Log:
Updated to use XPath regular expressions instead of Java regular expressions

Modified: trunk/src/jar/query/java/org/mulgara/query/filter/RegexFn.java
===================================================================
--- trunk/src/jar/query/java/org/mulgara/query/filter/RegexFn.java	2009-02-19 16:49:37 UTC (rev 1530)
+++ trunk/src/jar/query/java/org/mulgara/query/filter/RegexFn.java	2009-02-20 00:08:06 UTC (rev 1531)
@@ -11,17 +11,13 @@
  */
 package org.mulgara.query.filter;
 
-import java.util.regex.Pattern;
-import static java.util.regex.Pattern.*;
-
+import org.apache.xerces.impl.xpath.regex.RegularExpression;
 import org.mulgara.query.QueryException;
 import org.mulgara.query.filter.value.ValueLiteral;
 
 
 /**
  * The regular expression test for values.
- * TODO: Move this on to Xalan Regex functions as these are fully compliant with SPARQL,
- * while the Java ones are not.
  *
  * @created Mar 8, 2008
  * @author Paul Gearon
@@ -33,18 +29,18 @@
   /** Generated Serialization ID for RMI */
   private static final long serialVersionUID = 6785353529347360357L;
 
-  /** a cache of the last pattern */ 
-  private Pattern pattern = null;
+  /** a cache of the last RegularExpression */
+  private RegularExpression re = null;
 
+  /** a cache of the last pattern string */
+  private String oldPattern = null;
+
   /** The expression that resolves flags */
   private ValueLiteral flagExpression = null;
 
   /** a cache of the last flag string */
-  private String flagsStr = null;
+  private String oldFlags = null;
 
-  /** a cache of the last flags */
-  private int flags = 0;
-
   /**
    * Creates an equality test operation with default flags
    * @param lhs The first term to compare
@@ -67,10 +63,32 @@
 
   /** @see org.mulgara.query.filter.BinaryTestFilter#testCmp() */
   boolean testCmp() throws QueryException {
-    return pattern().matcher(str()).matches();
+    return regex().matches(str());
   }
 
   /**
+   * Gets the regular expression to use for the current variable bindings.
+   * This will calculate a new pattern and flags if either change for the current variable bindings.
+   * @return A RegularExpression using the existing object if there was no update.
+   * @throws QueryException If the pattern string or flags string cannot be resolved.
+   */
+  private RegularExpression regex() throws QueryException {
+    String patternStr = pattern();
+    String flagsStr = flags();
+    if (re == null) {
+      re = new RegularExpression(patternStr, flagsStr);
+      oldPattern = patternStr;
+      oldFlags = flagsStr;
+    } else if (!patternStr.equals(oldPattern) || notEquals(flagsStr, oldFlags)) {
+      // re.setPattern(patternStr, flagsStr); // this has a Xerces bug
+      re = new RegularExpression(patternStr, flagsStr);
+      oldPattern = patternStr;
+      oldFlags = flagsStr;
+    }
+    return re;
+  }
+
+  /**
    * Gets the string to be matched in this regular expression.
    * @return The string to be matched against.
    * @throws QueryException If the expression for the string cannot be resolved.
@@ -81,46 +99,35 @@
   }
 
   /**
-   * Gets the Pattern to use for the current variable bindings. This will calculate a new pattern
-   * and flags if either change for the current variable bindings.
-   * @return A Pattern for regex matching, using the existing pattern if there was no update.
-   * @throws QueryException If the pattern string or flags string cannot be resolved.
+   * Gets the pattern to use for this regex call.
+   * @return The pattern to use.
+   * @throws QueryException The expression for the pattern cannot be resolved.
    */
-  private Pattern pattern() throws QueryException {
+  private String pattern() throws QueryException {
     if (!rhs.isLiteral() || !((ValueLiteral)rhs).isSimple()) throw new QueryException("Type Error: Invalid pattern type in regular expression. Need string, got: " + rhs.getClass().getSimpleName());
-    String patternStr = ((ValueLiteral)rhs).getLexical();
-    int oldFlags = flags;
-    // note that the call to flags has a side-effect
-    if (oldFlags != flags() || pattern == null || !patternStr.equals(pattern.pattern())) {
-      pattern = Pattern.compile(patternStr, flags);
-    }
-    return pattern;
+    return ((ValueLiteral)rhs).getLexical();
   }
 
-  /** Characters used for regex flags */
-  private static final String optionChars = "smix";
-  /** Regex flags that correspond to the optionChars */
-  private static final int[] optionFlags = new int[] { DOTALL, MULTILINE, CASE_INSENSITIVE, COMMENTS };
-
   /**
-   * Gets the flags to use for this regex call. This will calculate new flags is the expression
-   * the flags come from is updated.
-   * @return An int with the flags for the current binding. Returns 0 if no flags are to be used.
+   * Gets the flags to use for this regex call.
+   * @return The flags to use, or an empty string if none was provided.
    * @throws QueryException The expression the flags are built on cannot be resolved.
    */
-  private int flags() throws QueryException {
-    if (flagExpression == null) return 0;
-    if (!flagExpression.isLiteral() || !((ValueLiteral)flagExpression).isSimple()) throw new QueryException("Type Error: Invalid flags in regular expression. Need string, got: " + rhs.getClass().getSimpleName());
-    String currentFlagStr = flagExpression.getLexical();
-    if (flagsStr == null || !flagsStr.equals(currentFlagStr)) {
-      flagsStr = currentFlagStr;
-      // calculate the new flags
-      flags = 0;
-      for (int i = 0; i < optionChars.length(); i++) {
-        if (flagsStr.indexOf(optionChars.charAt(i)) != -1) flags |= optionFlags[i];
-      }
-    }
-    return flags;
+  private String flags() throws QueryException {
+    if (flagExpression == null) return null;
+    if (!flagExpression.isLiteral() || !((ValueLiteral)flagExpression).isSimple()) throw new QueryException("Type Error: Invalid flags in regular expression. Need string, got: " + flagExpression.getClass().getSimpleName());
+    return flagExpression.getLexical();
   }
-  
+
+  /**
+   * Compares two strings that may be null for inequality.
+   * @param a The first string.
+   * @param b The second string.
+   * @return <code>false</code> if the strings represent the same value, or are both null,
+   *         <code>true</code> otherwise.
+   */
+  private static boolean notEquals(String a, String b) {
+    if (a == null) return b != null;
+    return b == null || !a.equals(b);
+  }
 }

Modified: trunk/src/jar/query/java/org/mulgara/query/filter/RegexFnUnitTest.java
===================================================================
--- trunk/src/jar/query/java/org/mulgara/query/filter/RegexFnUnitTest.java	2009-02-19 16:49:37 UTC (rev 1530)
+++ trunk/src/jar/query/java/org/mulgara/query/filter/RegexFnUnitTest.java	2009-02-20 00:08:06 UTC (rev 1531)
@@ -66,13 +66,13 @@
   public void testLiteral() throws Exception {
     SimpleLiteral str = new SimpleLiteral("a foolish test");
     SimpleLiteral diffStr = new SimpleLiteral("another test");
-    SimpleLiteral pattern = new SimpleLiteral(".*foo.*");
-    SimpleLiteral patternCaps = new SimpleLiteral(".*FOO.*");
-    SimpleLiteral pattern2 = new SimpleLiteral(".*foo.*test.*");
+    SimpleLiteral pattern = new SimpleLiteral("foo");
+    SimpleLiteral patternCaps = new SimpleLiteral("FOO");
+    SimpleLiteral pattern2 = new SimpleLiteral("foo.*test");
     SimpleLiteral noTest = new SimpleLiteral("fred");
     SimpleLiteral caseFlag = new SimpleLiteral("i");
     ValueLiteral typed = TypedLiteral.newLiteral("a foolish test");
-    ValueLiteral typedPattern = TypedLiteral.newLiteral(".*foo.*");
+    ValueLiteral typedPattern = TypedLiteral.newLiteral("foo");
 
     RegexFn fn = new RegexFn(str, pattern);
     assertTrue(t.equals(fn));
@@ -121,12 +121,12 @@
 
     Literal str = new LiteralImpl("a foolish test");
     Literal diffStr = new LiteralImpl("another test");
-    Literal pattern = new LiteralImpl(".*foo.*");
-    Literal patternCaps = new LiteralImpl(".*FOO.*");
-    Literal pattern2 = new LiteralImpl(".*foo.*test.*");
+    Literal pattern = new LiteralImpl("foo");
+    Literal patternCaps = new LiteralImpl("FOO");
+    Literal pattern2 = new LiteralImpl("foo.*test");
     Literal noTest = new LiteralImpl("fred");
     Literal typed = new LiteralImpl("a foolish test", SimpleLiteral.STRING_TYPE.getValue());
-    Literal typedPattern = new LiteralImpl(".*foo.*", SimpleLiteral.STRING_TYPE.getValue());
+    Literal typedPattern = new LiteralImpl("foo", SimpleLiteral.STRING_TYPE.getValue());
     URIReferenceImpl xsdString = new URIReferenceImpl(SimpleLiteral.STRING_TYPE.getValue());
     BlankNodeImpl bn = new BlankNodeImpl(101);
     Node[][] rows = {




More information about the Mulgara-svn mailing list