[Mulgara-svn] r743 - branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples

pag at mulgara.org pag at mulgara.org
Sat Apr 5 04:50:46 UTC 2008


Author: pag
Date: 2008-04-04 21:50:45 -0700 (Fri, 04 Apr 2008)
New Revision: 743

Added:
   branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java
   branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/PartialColumnComparator.java
Modified:
   branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java
Log:
Added method and classes to support OPTIONAL joins from SPARQL

Added: branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java
===================================================================
--- branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java	                        (rev 0)
+++ branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java	2008-04-05 04:50:45 UTC (rev 743)
@@ -0,0 +1,280 @@
+/*
+ * The contents of this file are subject to the Open Software License
+ * Version 3.0 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.opensource.org/licenses/osl-3.0.txt
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ */
+
+package org.mulgara.store.tuples;
+
+// Java 2 standard packages
+import java.math.BigInteger;
+import java.util.*;
+
+// Third party packages
+import org.apache.log4j.*;
+
+// Locally written packages
+import org.mulgara.query.Constraint;
+import org.mulgara.query.TuplesException;
+import org.mulgara.query.Variable;
+import org.mulgara.store.tuples.AbstractTuples;
+
+/**
+ * Left Join operation.
+ *
+ * This operation is similar to a subtraction, only it returns every row from the LHS,
+ * while returning rows from the RHS when available, and <code>null</code> otherwise.
+ *
+ * The join is performed by iterating over the lhs, and searching on the
+ * RHS for matching rows.  For efficient searching, the RHS must be
+ * ordered according to the matching variables.  This class is not responsible for
+ * ensuring the sort order of the RHS; that responsibility falls to
+ * {@link TuplesOperations#optionalJoin(Tuples, Tuples)}.
+ *
+ * @created 2008-04-04
+ * @author <a href="mailto:pgearon at users.sourceforge.net">Paul Gearon</a>
+ * @copyright &copy; 2005 <A href="mailto:pgearon at users.sourceforge.net">Paul Gearon</A>
+ * @licence <a href="{@docRoot}/../../LICENCE">Open Software License v3.0</a>
+ */
+public class LeftJoin extends AbstractTuples {
+
+  @SuppressWarnings("unused")
+  private static Logger logger = Logger.getLogger(LeftJoin.class.getName());
+
+  /** The set of tuples to return all row from. */
+  protected Tuples lhs;
+
+  /** The set of tuples to add to the lhs. */
+  protected Tuples rhs;
+
+  /** The set of variables common to both the lhs and the rhs. */
+  protected Set<Variable> commonVars;
+
+  /** An array of the matching variables' columns within the lhs, indexed by the rhs position. */
+  protected int[] varMap;
+
+  /** Indicates if the RHS is currently sitting on a match. */
+  private boolean rightMatches = false;
+  
+  /** A collection of the variables on the LHS */
+  private ArrayList<Variable> lhsVars;
+
+  /** The offset for indexing into the RHS, while avoiding LHS variables */
+  private int rhsOffset;
+
+  /**
+   * Configure a subtraction operation for lazy evaluation.
+   *
+   * @param lhs The original tuples, including the rows to be removed.
+   * @param rhs The tuples defining the rows to be removed from the lhs.
+   * @throws IllegalArgumentException If the <var>lhs</var> and  <var>rhs</var>
+   *         contain no variables in common.
+   */
+  @SuppressWarnings("unchecked")
+  LeftJoin(Tuples lhs, Tuples rhs) throws TuplesException, IllegalArgumentException {
+    // store the operands
+    this.lhs = (Tuples)lhs.clone();
+    this.rhs = (Tuples)rhs.clone();
+
+    // get the variables to subtract with. TODO: get the correct type back from getMatchingVars
+    commonVars = Collections.unmodifiableSet((Set<Variable>)TuplesOperations.getMatchingVars(lhs, rhs));
+
+    if (commonVars.isEmpty()) logger.warn("Tuples should have variables in common for optional join to occur");
+
+    // initialise the mapping of lhs columns to rhs columns
+    varMap = new int[commonVars.size()];
+    // iterate over the variables to do the mapping
+    for (Variable var: commonVars) {
+      // get the index of the variable in the rhs
+      int si = rhs.getColumnIndex(var);
+      // check that it is within the prefix columns. If not, then the rhs is not properly sorted.
+      if (si >= varMap.length) {
+        String op = "common= " + commonVars.toString();
+        op += "; var= " + var + "; index in sub= " + si +"; rhs= [ ";
+        Variable[] v = rhs.getVariables();
+        for (int k = 0; k < v.length; k++) {
+          op += v[k] + " ";
+        }
+        op += "]";
+        // usually this would be an assertion, but it's too important to miss
+        throw new IllegalArgumentException("Subtracted tuples not sorted correctly: " + op);
+      }
+      // map the rhs index of the variable to the lhs index
+      varMap[si] = lhs.getColumnIndex(var);
+    }
+
+    // set the variables for this optional conjunction
+    lhsVars = new ArrayList<Variable>(Arrays.asList(lhs.getVariables()));
+    ArrayList<Variable> vars = (ArrayList<Variable>)lhsVars.clone();
+    ArrayList<Variable> rhsVars = new ArrayList<Variable>(Arrays.asList(rhs.getVariables()));
+    rhsVars.removeAll(commonVars);
+    vars.addAll(rhsVars);
+    setVariables(vars);
+    
+    // set the column offset for indexing into the RHS
+    rhsOffset = lhsVars.size() - varMap.length;
+    assert rhsOffset >= 0;
+  }
+
+
+  //
+  // Methods implementing Tuples
+  //
+
+  /** {@inheritDoc} */
+  public long getColumnValue(int column) throws TuplesException {
+    int nrLeftVars = lhs.getNumberOfVariables();
+    if (column < nrLeftVars) return lhs.getColumnValue(column);
+    // return the column minus the LHS columns, and then skip over the matching vars
+    return rhs.getColumnValue(column - rhsOffset);
+  }
+
+
+  /** {@inheritDoc} */
+  public long getRowUpperBound() throws TuplesException {
+    BigInteger rowCount = BigInteger.valueOf(lhs.getRowUpperBound());
+    rowCount = rowCount.multiply(BigInteger.valueOf(rhs.getRowUpperBound()));
+    return rowCount.longValue();
+  }
+
+
+  /** {@inheritDoc}  Relies on the lhs of the optional. */
+  public boolean isColumnEverUnbound(int column) throws TuplesException {
+    int nrLeftVars = lhs.getNumberOfVariables();
+    if (column < nrLeftVars) return lhs.isColumnEverUnbound(column);
+    // return the column minus the LHS columns, and then skip over the matching vars
+    return rhs.isColumnEverUnbound(column - rhsOffset);
+  }
+
+
+  /** {@inheritDoc} */
+  public int getColumnIndex(Variable variable) throws TuplesException {
+    if (lhsVars.contains(variable)) return lhs.getColumnIndex(variable);
+    return rhs.getColumnIndex(variable) + rhsOffset;
+  }
+
+
+  /**
+   * {@inheritDoc}
+   * @return Always <code>false</code>.
+   */
+  public boolean isMaterialized() {
+    return false;
+  }
+
+
+  /** {@inheritDoc} */
+  public boolean hasNoDuplicates() throws TuplesException {
+    return lhs.hasNoDuplicates();
+  }
+
+
+  /** {@inheritDoc} */
+  public RowComparator getComparator() {
+    return lhs.getComparator();
+  }
+
+
+  /** {@inheritDoc} */
+  public List<Tuples> getOperands() {
+    return Collections.unmodifiableList(Arrays.asList(new Tuples[] {lhs, rhs}));
+  }
+
+
+  /** {@inheritDoc} */
+  public boolean isUnconstrained() throws TuplesException {
+    return lhs.isUnconstrained();
+  }
+
+
+  /** {@inheritDoc} */
+  public void renameVariables(Constraint constraint) {
+    lhs.renameVariables(constraint);
+    rhs.renameVariables(constraint);
+  }
+
+
+  /** {@inheritDoc} */
+  public void beforeFirst(long[] prefix, int suffixTruncation) throws TuplesException {
+    int nrVars = lhs.getNumberOfVariables();
+    int tailLen = prefix.length - nrVars;
+    if (tailLen <= 0) {
+      // search on the LHS
+      lhs.beforeFirst(prefix, suffixTruncation);
+      matchRight();
+    } else {
+      // search on the LHS, and do the remainder of the search of the RHS
+      lhs.beforeFirst(prefix, nrVars);
+      matchRight();
+      // if there is data on the right, then search in it
+      if (rightMatches) {
+        long[] tailPrefix = new long[tailLen];
+        for (int i = 0; i < tailLen; i++) tailPrefix[i] = prefix[i + nrVars];
+        rhs.beforeFirst(tailPrefix, 0);
+      }
+    }
+  }
+
+  /** {@inheritDoc} */
+  public boolean next() throws TuplesException {
+    // check if we are currently in a match
+    if (rightMatches) {
+      // if there is more, then return
+      if (rhs.next()) return true;
+      // no more, so move on the left...
+    }
+
+    // not in a match, so move
+    if (!lhs.next()) return false;
+    // attempt match on the right
+    matchRight();
+    return true;
+  }
+
+
+  /**
+   * Closes all the operands.
+   *
+   * @throws TuplesException If either the lhs or the rhs can't be closed.
+   */
+  public void close() throws TuplesException {
+    lhs.close();
+    rhs.close();
+  }
+
+
+  /**
+   * @return {@inheritDoc}
+   */
+  public Object clone() {
+    LeftJoin cloned = (LeftJoin)super.clone();
+
+    // Copy mutable fields by value
+    cloned.lhs = (Tuples)lhs.clone();
+    cloned.rhs = (Tuples)rhs.clone();
+
+    return cloned;
+  }
+
+
+  //
+  // Internal methods
+  //
+
+  private void matchRight() throws TuplesException {
+    long[] prefix = new long[varMap.length];
+    // copy the variables from the current lhs row into the prefix
+    for (int i = 0; i < varMap.length; i++) prefix[i] = lhs.getColumnValue(varMap[i]);
+    // find the entry in the rhs
+    rhs.beforeFirst(prefix, 0);
+    // mark the RHS has matching if the search found anything
+    rightMatches = rhs.next();
+  }
+
+}

Added: branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/PartialColumnComparator.java
===================================================================
--- branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/PartialColumnComparator.java	                        (rev 0)
+++ branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/PartialColumnComparator.java	2008-04-05 04:50:45 UTC (rev 743)
@@ -0,0 +1,126 @@
+/*
+ * The contents of this file are subject to the Open Software License
+ * Version 3.0 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.opensource.org/licenses/osl-3.0.txt
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ */
+
+package org.mulgara.store.tuples;
+
+// Java 2 standard packages
+import java.util.Arrays;
+
+// Third party packages
+import org.apache.log4j.Logger;
+
+// Local packages
+import org.mulgara.query.TuplesException;
+
+/**
+ * Order the rows of a tuples in ascending order by node number. Columns with
+ * lower index numbers are major.
+ *
+ * @created 2008-04-04
+ * @author <a href="mailto:pgearon at users.sourceforge.net">Paul Gearon</a>
+ * @copyright &copy; 2008 <a href="mailto:pgearon at users.sourceforge.net">Paul Gearon</a>
+ * @licence <a href="{@docRoot}/../../LICENCE">Open Software License v3.0</a>
+ */
+public class PartialColumnComparator implements RowComparator {
+
+  /** Logger. */
+  private static final Logger logger = Logger.getLogger(PartialColumnComparator.class);
+  
+  private int[] varMap;
+
+  public PartialColumnComparator(int[] varMap) {
+    this.varMap = varMap;
+  }
+
+  /**
+   * Compares the LHS and RHS parameters as tuples which are to be compared in column order.
+   * @param lhs The left hand side operand.
+   * @param rhs The right hand side operand.
+   * @return -1 if lhs<rhs, +1 if lhs>rhs, 0 otherwise.
+   */
+  public int compare(long[] lhs, long[] rhs) {
+    if (lhs.length != rhs.length) throw new IllegalArgumentException("Tuples must be the same length for comparison.");
+    if (lhs.length < varMap.length) throw new IllegalArgumentException("Too many columns to look for in the tuples.");
+
+    // Compare each column for equality
+    for (int i = 0; i < varMap.length; i++) {
+      if (lhs[varMap[i]] < rhs[varMap[i]]) return -1;
+      if (lhs[varMap[i]] > rhs[varMap[i]]) return +1;
+    }
+    // We have a duplicate row
+    return 0; // indicate equality
+  }
+
+  /**
+   * Compares in column order the LHS as a row of a Tuples and a RHS Tuples.
+   * The lhs may be shorter than the rhs, as it can be for a beforeFirst search.
+   * @param lhs An array to be treated as a Tuples row.
+   * @param rhs A Tuples, with the current row to be compared.
+   * @return -1 if lhs<rhs, +1 if lhs>rhs, 0 otherwise.
+   * @throws TuplesException If there is an error accessing the Tuples.
+   */
+  public int compare(long[] lhs, Tuples rhs) throws TuplesException {
+
+    if (lhs.length > rhs.getNumberOfVariables()) {
+      // We permit the length of lhs to be less than the number of columns
+      // of rhs to permit the use of this method where lhs is a prefix.
+      throw new IllegalArgumentException("LHS array length (" + lhs.length +
+              ") is greater than RHS (" + rhs.getNumberOfVariables() + ")");
+    }
+
+    // Compare each column for equality
+    try {
+      int searchlength = Math.min(lhs.length, varMap.length);
+      for (int i = 0; i < searchlength; i++) {
+        if (varMap[i] >= lhs.length) throw new IllegalArgumentException("Ordering of column search does not match search parameters");
+        long rhsValue = rhs.getColumnValue(varMap[i]);
+        if (lhs[varMap[i]] < rhsValue) return -1;
+        if (lhs[varMap[i]] > rhsValue) return +1;
+      }
+    } catch (TuplesException e) {
+      logger.error("Couldn't perform row comparison", e);
+      throw e;
+    }
+    // We have a duplicate row
+    return 0; // indicate equality
+  }
+
+  /**
+   * Compares in column order the current rows of two Tuples.
+   * @param lhs A Tuples with the current row to be compared.
+   * @param rhs A Tuples with the current row to be compared.
+   * @return -1 if lhs<rhs, +1 if lhs>rhs, 0 otherwise.
+   * @throws TuplesException If there is an error accessing the Tuples.
+   */
+  public int compare(Tuples lhs, Tuples rhs) throws TuplesException {
+
+    // Compare each column for equality
+    int lhsLength = lhs.getNumberOfVariables();
+
+    if (lhsLength != rhs.getNumberOfVariables()) throw new IllegalArgumentException("LHS number of variables (" + lhsLength + ") doesn't match RHS (" + rhs.getNumberOfVariables() + ")");
+
+    if (lhsLength < varMap.length) throw new IllegalArgumentException("Too many columns to look for in the tuples.");
+
+    assert Arrays.equals(lhs.getVariables(), rhs.getVariables()) :
+        "Variables arrays must match, lhs length:" + lhsLength + " vs rhs length: " + rhs.getVariables().length + " lhs value: " + lhs + ", rhs: " + rhs;
+
+    for (int i = 0; i < varMap.length; i++) {
+      long lhsValue = lhs.getColumnValue(varMap[i]);
+      long rhsValue = rhs.getColumnValue(varMap[i]);
+      if (lhsValue < rhsValue) return -1;
+      if (lhsValue > rhsValue) return +1;
+    }
+
+    // We have a duplicate row
+    return 0; // indicate equality
+  }
+}

Modified: branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java
===================================================================
--- branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java	2008-04-05 04:49:39 UTC (rev 742)
+++ branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java	2008-04-05 04:50:45 UTC (rev 743)
@@ -282,7 +282,7 @@
   /**
    * This is approximately a subtraction.  The subtrahend is matched against the minuend in the same
    * way as a conjunction, and the matching lines removed from the minuend.  The remaining lines in
-   * the minuend are the result.
+   * the minuend are the result. Parameters are not closed during this operation.
    * @param minuend The tuples to subtract from.
    * @param subtrahend The tuples to match against the minuend for removal.
    * @return The contents from the minuend, excluding those rows which match against the subtrahend.
@@ -342,6 +342,68 @@
 
 
   /**
+   * Does a left-outer-join between two tuples. Parameters are not closed during this
+   * operation.
+   * @param standard The standard pattern that appears in all results.
+   * @param optional The optional pattern that may or may not be bound in each result
+   * @return A Tuples containing variables from both parameters. All variables from
+   *   <em>standard</em> will be bound at all times. Variables from <em>optional</em>
+   *   may or may not be bound.
+   */
+  public static Tuples optionalJoin(Tuples standard, Tuples optional) throws TuplesException {
+    try {
+
+      if (logger.isDebugEnabled()) {
+        logger.debug("optional join of " + standard + " optional { " + optional + " }");
+      }
+      // get the matching columns
+      Set<Variable> matchingVars = getMatchingVars(standard, optional);
+      // check for empty parameters
+      if (standard.getRowCardinality() == Cursor.ZERO) {
+        logger.debug("Nothing to the left of an optional");
+        return (Tuples)standard.clone();
+      }
+      if (optional.getRowCardinality() == Cursor.ZERO) {
+        // need to return standard, projected out to the extra variables
+        if (optional.getNumberOfVariables() == 0) {
+          logger.warn("Lost variables on empty optional");
+          // TODO: This may be a problem. Throw an exception? A fix may require variables
+          // to be attached to empty tuples
+          return (Tuples)standard.clone();
+        }
+      }
+
+      // check if there are variables which should not be considered when sorting
+      if (!checkForExtraVariables(optional, matchingVars)) {
+        // there were no extra variables in the optional
+        logger.debug("All variables needed");
+        // if all variables match, then this is an inner join
+        return join(standard, optional);
+      }
+
+      // yes, there are extra variables
+      logger.debug("sorting on the common variables");
+      // re-sort the optional according to the matching variables
+      // reorder the optional as necessary
+      Tuples sortedOptional = reSort(optional, new ArrayList<Variable>(matchingVars));
+
+      // return the difference
+      try {
+        return new LeftJoin(standard, sortedOptional);
+      } finally {
+        if (sortedOptional != optional) sortedOptional.close();
+      }
+
+    } catch (RuntimeException re) {
+      logger.warn("RuntimeException thrown in optional", re);
+      throw re;
+    } catch (TuplesException te) {
+      logger.warn("TuplesException thrown in optional", te);
+      throw te;
+    }
+  }
+
+  /**
    * Flattens any nested joins to allow polyadic join operations.
    */
   private static List flattenOperands(List operands) throws TuplesException {
@@ -816,6 +878,61 @@
   }
 
   /**
+   * Sort into an order given by the list of variables
+   *
+   * @param tuples The parameter to sort. This will be closed if a new tuples is created.
+   * @param variableList the list of {@link Variable}s to sort by
+   * @return A {@link Tuples} that meets the sort criteria. This may be the original tuples parameter.
+   * @throws TuplesException if the sort operation fails
+   */
+  public static Tuples reSort(Tuples tuples, List<Variable> variableList) throws TuplesException {
+    try {
+      // if there is nothing to sort on, then tuples meets the criteria
+      if ((variableList == null) || (variableList.size() == 0)) return tuples;
+
+      // if there is nothing to sort, then just return that nothing
+      if (tuples.isUnconstrained()) {
+        if (logger.isDebugEnabled()) logger.debug("Returning Unconstrained Tuples.");
+        return TuplesOperations.unconstrained();
+      } else if (tuples.getRowCardinality() == Cursor.ZERO) {
+        return empty();
+      }
+
+      // initialise the mapping of column names to tuples columns.
+      int[] varMap = new int[variableList.size()];
+
+      boolean sortNeeded = false;
+      // iterate over the variables to do the mapping
+      for (int varCol = 0; varCol < variableList.size(); varCol++) {
+        Variable var = variableList.get(varCol);
+        // get the index of the variable in the tuples
+        int ti = tuples.getColumnIndex(var);
+        if (ti == Tuples.UNBOUND) throw new IllegalArgumentException("Variables to sort by not found in Tuples");
+        // check that it is within the prefix columns. If not, then sorting is needed
+        if (ti >= varMap.length) sortNeeded = true;
+        // map the tuples index of the variable to the column index
+        varMap[varCol++] = ti;
+      }
+
+      if (logger.isDebugEnabled()) logger.debug("No sort needed on tuples.");
+      if (!sortNeeded) return tuples;
+      
+      if (logger.isDebugEnabled()) logger.debug("Sorting on " + variableList);
+
+      // Perform the actual sort
+      Tuples oldTuples = tuples;
+      tuples = tuplesFactory.newTuples(tuples, new PartialColumnComparator(varMap));
+      assert tuples != oldTuples;
+      oldTuples.close();
+
+      return tuples;
+    } catch (TuplesException e) {
+      throw new TuplesException("Couldn't perform projection", e);
+    }
+  }
+
+
+  /**
    * Truncate a tuples to have no more than a specified number of rows. This
    * method removes rows from the end of the tuples; to remove rows from the
    * start of the tuples, the {@link #offset} method can be used. If the limit




More information about the Mulgara-svn mailing list