[Mulgara-svn] r743 - branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples
pag at mulgara.org
pag at mulgara.org
Sat Apr 5 04:50:46 UTC 2008
Author: pag
Date: 2008-04-04 21:50:45 -0700 (Fri, 04 Apr 2008)
New Revision: 743
Added:
branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java
branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/PartialColumnComparator.java
Modified:
branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java
Log:
Added method and classes to support OPTIONAL joins from SPARQL
Added: branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java
===================================================================
--- branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java (rev 0)
+++ branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java 2008-04-05 04:50:45 UTC (rev 743)
@@ -0,0 +1,280 @@
+/*
+ * The contents of this file are subject to the Open Software License
+ * Version 3.0 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.opensource.org/licenses/osl-3.0.txt
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ */
+
+package org.mulgara.store.tuples;
+
+// Java 2 standard packages
+import java.math.BigInteger;
+import java.util.*;
+
+// Third party packages
+import org.apache.log4j.*;
+
+// Locally written packages
+import org.mulgara.query.Constraint;
+import org.mulgara.query.TuplesException;
+import org.mulgara.query.Variable;
+import org.mulgara.store.tuples.AbstractTuples;
+
+/**
+ * Left Join operation.
+ *
+ * This operation is similar to a subtraction, only it returns every row from the LHS,
+ * while returning rows from the RHS when available, and <code>null</code> otherwise.
+ *
+ * The join is performed by iterating over the lhs, and searching on the
+ * RHS for matching rows. For efficient searching, the RHS must be
+ * ordered according to the matching variables. This class is not responsible for
+ * ensuring the sort order of the RHS; that responsibility falls to
+ * {@link TuplesOperations#optionalJoin(Tuples, Tuples)}.
+ *
+ * @created 2008-04-04
+ * @author <a href="mailto:pgearon at users.sourceforge.net">Paul Gearon</a>
+ * @copyright © 2005 <A href="mailto:pgearon at users.sourceforge.net">Paul Gearon</A>
+ * @licence <a href="{@docRoot}/../../LICENCE">Open Software License v3.0</a>
+ */
+public class LeftJoin extends AbstractTuples {
+
+ @SuppressWarnings("unused")
+ private static Logger logger = Logger.getLogger(LeftJoin.class.getName());
+
+ /** The set of tuples to return all row from. */
+ protected Tuples lhs;
+
+ /** The set of tuples to add to the lhs. */
+ protected Tuples rhs;
+
+ /** The set of variables common to both the lhs and the rhs. */
+ protected Set<Variable> commonVars;
+
+ /** An array of the matching variables' columns within the lhs, indexed by the rhs position. */
+ protected int[] varMap;
+
+ /** Indicates if the RHS is currently sitting on a match. */
+ private boolean rightMatches = false;
+
+ /** A collection of the variables on the LHS */
+ private ArrayList<Variable> lhsVars;
+
+ /** The offset for indexing into the RHS, while avoiding LHS variables */
+ private int rhsOffset;
+
+ /**
+ * Configure a subtraction operation for lazy evaluation.
+ *
+ * @param lhs The original tuples, including the rows to be removed.
+ * @param rhs The tuples defining the rows to be removed from the lhs.
+ * @throws IllegalArgumentException If the <var>lhs</var> and <var>rhs</var>
+ * contain no variables in common.
+ */
+ @SuppressWarnings("unchecked")
+ LeftJoin(Tuples lhs, Tuples rhs) throws TuplesException, IllegalArgumentException {
+ // store the operands
+ this.lhs = (Tuples)lhs.clone();
+ this.rhs = (Tuples)rhs.clone();
+
+ // get the variables to subtract with. TODO: get the correct type back from getMatchingVars
+ commonVars = Collections.unmodifiableSet((Set<Variable>)TuplesOperations.getMatchingVars(lhs, rhs));
+
+ if (commonVars.isEmpty()) logger.warn("Tuples should have variables in common for optional join to occur");
+
+ // initialise the mapping of lhs columns to rhs columns
+ varMap = new int[commonVars.size()];
+ // iterate over the variables to do the mapping
+ for (Variable var: commonVars) {
+ // get the index of the variable in the rhs
+ int si = rhs.getColumnIndex(var);
+ // check that it is within the prefix columns. If not, then the rhs is not properly sorted.
+ if (si >= varMap.length) {
+ String op = "common= " + commonVars.toString();
+ op += "; var= " + var + "; index in sub= " + si +"; rhs= [ ";
+ Variable[] v = rhs.getVariables();
+ for (int k = 0; k < v.length; k++) {
+ op += v[k] + " ";
+ }
+ op += "]";
+ // usually this would be an assertion, but it's too important to miss
+ throw new IllegalArgumentException("Subtracted tuples not sorted correctly: " + op);
+ }
+ // map the rhs index of the variable to the lhs index
+ varMap[si] = lhs.getColumnIndex(var);
+ }
+
+ // set the variables for this optional conjunction
+ lhsVars = new ArrayList<Variable>(Arrays.asList(lhs.getVariables()));
+ ArrayList<Variable> vars = (ArrayList<Variable>)lhsVars.clone();
+ ArrayList<Variable> rhsVars = new ArrayList<Variable>(Arrays.asList(rhs.getVariables()));
+ rhsVars.removeAll(commonVars);
+ vars.addAll(rhsVars);
+ setVariables(vars);
+
+ // set the column offset for indexing into the RHS
+ rhsOffset = lhsVars.size() - varMap.length;
+ assert rhsOffset >= 0;
+ }
+
+
+ //
+ // Methods implementing Tuples
+ //
+
+ /** {@inheritDoc} */
+ public long getColumnValue(int column) throws TuplesException {
+ int nrLeftVars = lhs.getNumberOfVariables();
+ if (column < nrLeftVars) return lhs.getColumnValue(column);
+ // return the column minus the LHS columns, and then skip over the matching vars
+ return rhs.getColumnValue(column - rhsOffset);
+ }
+
+
+ /** {@inheritDoc} */
+ public long getRowUpperBound() throws TuplesException {
+ BigInteger rowCount = BigInteger.valueOf(lhs.getRowUpperBound());
+ rowCount = rowCount.multiply(BigInteger.valueOf(rhs.getRowUpperBound()));
+ return rowCount.longValue();
+ }
+
+
+ /** {@inheritDoc} Relies on the lhs of the optional. */
+ public boolean isColumnEverUnbound(int column) throws TuplesException {
+ int nrLeftVars = lhs.getNumberOfVariables();
+ if (column < nrLeftVars) return lhs.isColumnEverUnbound(column);
+ // return the column minus the LHS columns, and then skip over the matching vars
+ return rhs.isColumnEverUnbound(column - rhsOffset);
+ }
+
+
+ /** {@inheritDoc} */
+ public int getColumnIndex(Variable variable) throws TuplesException {
+ if (lhsVars.contains(variable)) return lhs.getColumnIndex(variable);
+ return rhs.getColumnIndex(variable) + rhsOffset;
+ }
+
+
+ /**
+ * {@inheritDoc}
+ * @return Always <code>false</code>.
+ */
+ public boolean isMaterialized() {
+ return false;
+ }
+
+
+ /** {@inheritDoc} */
+ public boolean hasNoDuplicates() throws TuplesException {
+ return lhs.hasNoDuplicates();
+ }
+
+
+ /** {@inheritDoc} */
+ public RowComparator getComparator() {
+ return lhs.getComparator();
+ }
+
+
+ /** {@inheritDoc} */
+ public List<Tuples> getOperands() {
+ return Collections.unmodifiableList(Arrays.asList(new Tuples[] {lhs, rhs}));
+ }
+
+
+ /** {@inheritDoc} */
+ public boolean isUnconstrained() throws TuplesException {
+ return lhs.isUnconstrained();
+ }
+
+
+ /** {@inheritDoc} */
+ public void renameVariables(Constraint constraint) {
+ lhs.renameVariables(constraint);
+ rhs.renameVariables(constraint);
+ }
+
+
+ /** {@inheritDoc} */
+ public void beforeFirst(long[] prefix, int suffixTruncation) throws TuplesException {
+ int nrVars = lhs.getNumberOfVariables();
+ int tailLen = prefix.length - nrVars;
+ if (tailLen <= 0) {
+ // search on the LHS
+ lhs.beforeFirst(prefix, suffixTruncation);
+ matchRight();
+ } else {
+ // search on the LHS, and do the remainder of the search of the RHS
+ lhs.beforeFirst(prefix, nrVars);
+ matchRight();
+ // if there is data on the right, then search in it
+ if (rightMatches) {
+ long[] tailPrefix = new long[tailLen];
+ for (int i = 0; i < tailLen; i++) tailPrefix[i] = prefix[i + nrVars];
+ rhs.beforeFirst(tailPrefix, 0);
+ }
+ }
+ }
+
+ /** {@inheritDoc} */
+ public boolean next() throws TuplesException {
+ // check if we are currently in a match
+ if (rightMatches) {
+ // if there is more, then return
+ if (rhs.next()) return true;
+ // no more, so move on the left...
+ }
+
+ // not in a match, so move
+ if (!lhs.next()) return false;
+ // attempt match on the right
+ matchRight();
+ return true;
+ }
+
+
+ /**
+ * Closes all the operands.
+ *
+ * @throws TuplesException If either the lhs or the rhs can't be closed.
+ */
+ public void close() throws TuplesException {
+ lhs.close();
+ rhs.close();
+ }
+
+
+ /**
+ * @return {@inheritDoc}
+ */
+ public Object clone() {
+ LeftJoin cloned = (LeftJoin)super.clone();
+
+ // Copy mutable fields by value
+ cloned.lhs = (Tuples)lhs.clone();
+ cloned.rhs = (Tuples)rhs.clone();
+
+ return cloned;
+ }
+
+
+ //
+ // Internal methods
+ //
+
+ private void matchRight() throws TuplesException {
+ long[] prefix = new long[varMap.length];
+ // copy the variables from the current lhs row into the prefix
+ for (int i = 0; i < varMap.length; i++) prefix[i] = lhs.getColumnValue(varMap[i]);
+ // find the entry in the rhs
+ rhs.beforeFirst(prefix, 0);
+ // mark the RHS has matching if the search found anything
+ rightMatches = rhs.next();
+ }
+
+}
Added: branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/PartialColumnComparator.java
===================================================================
--- branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/PartialColumnComparator.java (rev 0)
+++ branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/PartialColumnComparator.java 2008-04-05 04:50:45 UTC (rev 743)
@@ -0,0 +1,126 @@
+/*
+ * The contents of this file are subject to the Open Software License
+ * Version 3.0 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ * http://www.opensource.org/licenses/osl-3.0.txt
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ */
+
+package org.mulgara.store.tuples;
+
+// Java 2 standard packages
+import java.util.Arrays;
+
+// Third party packages
+import org.apache.log4j.Logger;
+
+// Local packages
+import org.mulgara.query.TuplesException;
+
+/**
+ * Order the rows of a tuples in ascending order by node number. Columns with
+ * lower index numbers are major.
+ *
+ * @created 2008-04-04
+ * @author <a href="mailto:pgearon at users.sourceforge.net">Paul Gearon</a>
+ * @copyright © 2008 <a href="mailto:pgearon at users.sourceforge.net">Paul Gearon</a>
+ * @licence <a href="{@docRoot}/../../LICENCE">Open Software License v3.0</a>
+ */
+public class PartialColumnComparator implements RowComparator {
+
+ /** Logger. */
+ private static final Logger logger = Logger.getLogger(PartialColumnComparator.class);
+
+ private int[] varMap;
+
+ public PartialColumnComparator(int[] varMap) {
+ this.varMap = varMap;
+ }
+
+ /**
+ * Compares the LHS and RHS parameters as tuples which are to be compared in column order.
+ * @param lhs The left hand side operand.
+ * @param rhs The right hand side operand.
+ * @return -1 if lhs<rhs, +1 if lhs>rhs, 0 otherwise.
+ */
+ public int compare(long[] lhs, long[] rhs) {
+ if (lhs.length != rhs.length) throw new IllegalArgumentException("Tuples must be the same length for comparison.");
+ if (lhs.length < varMap.length) throw new IllegalArgumentException("Too many columns to look for in the tuples.");
+
+ // Compare each column for equality
+ for (int i = 0; i < varMap.length; i++) {
+ if (lhs[varMap[i]] < rhs[varMap[i]]) return -1;
+ if (lhs[varMap[i]] > rhs[varMap[i]]) return +1;
+ }
+ // We have a duplicate row
+ return 0; // indicate equality
+ }
+
+ /**
+ * Compares in column order the LHS as a row of a Tuples and a RHS Tuples.
+ * The lhs may be shorter than the rhs, as it can be for a beforeFirst search.
+ * @param lhs An array to be treated as a Tuples row.
+ * @param rhs A Tuples, with the current row to be compared.
+ * @return -1 if lhs<rhs, +1 if lhs>rhs, 0 otherwise.
+ * @throws TuplesException If there is an error accessing the Tuples.
+ */
+ public int compare(long[] lhs, Tuples rhs) throws TuplesException {
+
+ if (lhs.length > rhs.getNumberOfVariables()) {
+ // We permit the length of lhs to be less than the number of columns
+ // of rhs to permit the use of this method where lhs is a prefix.
+ throw new IllegalArgumentException("LHS array length (" + lhs.length +
+ ") is greater than RHS (" + rhs.getNumberOfVariables() + ")");
+ }
+
+ // Compare each column for equality
+ try {
+ int searchlength = Math.min(lhs.length, varMap.length);
+ for (int i = 0; i < searchlength; i++) {
+ if (varMap[i] >= lhs.length) throw new IllegalArgumentException("Ordering of column search does not match search parameters");
+ long rhsValue = rhs.getColumnValue(varMap[i]);
+ if (lhs[varMap[i]] < rhsValue) return -1;
+ if (lhs[varMap[i]] > rhsValue) return +1;
+ }
+ } catch (TuplesException e) {
+ logger.error("Couldn't perform row comparison", e);
+ throw e;
+ }
+ // We have a duplicate row
+ return 0; // indicate equality
+ }
+
+ /**
+ * Compares in column order the current rows of two Tuples.
+ * @param lhs A Tuples with the current row to be compared.
+ * @param rhs A Tuples with the current row to be compared.
+ * @return -1 if lhs<rhs, +1 if lhs>rhs, 0 otherwise.
+ * @throws TuplesException If there is an error accessing the Tuples.
+ */
+ public int compare(Tuples lhs, Tuples rhs) throws TuplesException {
+
+ // Compare each column for equality
+ int lhsLength = lhs.getNumberOfVariables();
+
+ if (lhsLength != rhs.getNumberOfVariables()) throw new IllegalArgumentException("LHS number of variables (" + lhsLength + ") doesn't match RHS (" + rhs.getNumberOfVariables() + ")");
+
+ if (lhsLength < varMap.length) throw new IllegalArgumentException("Too many columns to look for in the tuples.");
+
+ assert Arrays.equals(lhs.getVariables(), rhs.getVariables()) :
+ "Variables arrays must match, lhs length:" + lhsLength + " vs rhs length: " + rhs.getVariables().length + " lhs value: " + lhs + ", rhs: " + rhs;
+
+ for (int i = 0; i < varMap.length; i++) {
+ long lhsValue = lhs.getColumnValue(varMap[i]);
+ long rhsValue = rhs.getColumnValue(varMap[i]);
+ if (lhsValue < rhsValue) return -1;
+ if (lhsValue > rhsValue) return +1;
+ }
+
+ // We have a duplicate row
+ return 0; // indicate equality
+ }
+}
Modified: branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java
===================================================================
--- branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java 2008-04-05 04:49:39 UTC (rev 742)
+++ branches/mgr-61-sparql/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java 2008-04-05 04:50:45 UTC (rev 743)
@@ -282,7 +282,7 @@
/**
* This is approximately a subtraction. The subtrahend is matched against the minuend in the same
* way as a conjunction, and the matching lines removed from the minuend. The remaining lines in
- * the minuend are the result.
+ * the minuend are the result. Parameters are not closed during this operation.
* @param minuend The tuples to subtract from.
* @param subtrahend The tuples to match against the minuend for removal.
* @return The contents from the minuend, excluding those rows which match against the subtrahend.
@@ -342,6 +342,68 @@
/**
+ * Does a left-outer-join between two tuples. Parameters are not closed during this
+ * operation.
+ * @param standard The standard pattern that appears in all results.
+ * @param optional The optional pattern that may or may not be bound in each result
+ * @return A Tuples containing variables from both parameters. All variables from
+ * <em>standard</em> will be bound at all times. Variables from <em>optional</em>
+ * may or may not be bound.
+ */
+ public static Tuples optionalJoin(Tuples standard, Tuples optional) throws TuplesException {
+ try {
+
+ if (logger.isDebugEnabled()) {
+ logger.debug("optional join of " + standard + " optional { " + optional + " }");
+ }
+ // get the matching columns
+ Set<Variable> matchingVars = getMatchingVars(standard, optional);
+ // check for empty parameters
+ if (standard.getRowCardinality() == Cursor.ZERO) {
+ logger.debug("Nothing to the left of an optional");
+ return (Tuples)standard.clone();
+ }
+ if (optional.getRowCardinality() == Cursor.ZERO) {
+ // need to return standard, projected out to the extra variables
+ if (optional.getNumberOfVariables() == 0) {
+ logger.warn("Lost variables on empty optional");
+ // TODO: This may be a problem. Throw an exception? A fix may require variables
+ // to be attached to empty tuples
+ return (Tuples)standard.clone();
+ }
+ }
+
+ // check if there are variables which should not be considered when sorting
+ if (!checkForExtraVariables(optional, matchingVars)) {
+ // there were no extra variables in the optional
+ logger.debug("All variables needed");
+ // if all variables match, then this is an inner join
+ return join(standard, optional);
+ }
+
+ // yes, there are extra variables
+ logger.debug("sorting on the common variables");
+ // re-sort the optional according to the matching variables
+ // reorder the optional as necessary
+ Tuples sortedOptional = reSort(optional, new ArrayList<Variable>(matchingVars));
+
+ // return the difference
+ try {
+ return new LeftJoin(standard, sortedOptional);
+ } finally {
+ if (sortedOptional != optional) sortedOptional.close();
+ }
+
+ } catch (RuntimeException re) {
+ logger.warn("RuntimeException thrown in optional", re);
+ throw re;
+ } catch (TuplesException te) {
+ logger.warn("TuplesException thrown in optional", te);
+ throw te;
+ }
+ }
+
+ /**
* Flattens any nested joins to allow polyadic join operations.
*/
private static List flattenOperands(List operands) throws TuplesException {
@@ -816,6 +878,61 @@
}
/**
+ * Sort into an order given by the list of variables
+ *
+ * @param tuples The parameter to sort. This will be closed if a new tuples is created.
+ * @param variableList the list of {@link Variable}s to sort by
+ * @return A {@link Tuples} that meets the sort criteria. This may be the original tuples parameter.
+ * @throws TuplesException if the sort operation fails
+ */
+ public static Tuples reSort(Tuples tuples, List<Variable> variableList) throws TuplesException {
+ try {
+ // if there is nothing to sort on, then tuples meets the criteria
+ if ((variableList == null) || (variableList.size() == 0)) return tuples;
+
+ // if there is nothing to sort, then just return that nothing
+ if (tuples.isUnconstrained()) {
+ if (logger.isDebugEnabled()) logger.debug("Returning Unconstrained Tuples.");
+ return TuplesOperations.unconstrained();
+ } else if (tuples.getRowCardinality() == Cursor.ZERO) {
+ return empty();
+ }
+
+ // initialise the mapping of column names to tuples columns.
+ int[] varMap = new int[variableList.size()];
+
+ boolean sortNeeded = false;
+ // iterate over the variables to do the mapping
+ for (int varCol = 0; varCol < variableList.size(); varCol++) {
+ Variable var = variableList.get(varCol);
+ // get the index of the variable in the tuples
+ int ti = tuples.getColumnIndex(var);
+ if (ti == Tuples.UNBOUND) throw new IllegalArgumentException("Variables to sort by not found in Tuples");
+ // check that it is within the prefix columns. If not, then sorting is needed
+ if (ti >= varMap.length) sortNeeded = true;
+ // map the tuples index of the variable to the column index
+ varMap[varCol++] = ti;
+ }
+
+ if (logger.isDebugEnabled()) logger.debug("No sort needed on tuples.");
+ if (!sortNeeded) return tuples;
+
+ if (logger.isDebugEnabled()) logger.debug("Sorting on " + variableList);
+
+ // Perform the actual sort
+ Tuples oldTuples = tuples;
+ tuples = tuplesFactory.newTuples(tuples, new PartialColumnComparator(varMap));
+ assert tuples != oldTuples;
+ oldTuples.close();
+
+ return tuples;
+ } catch (TuplesException e) {
+ throw new TuplesException("Couldn't perform projection", e);
+ }
+ }
+
+
+ /**
* Truncate a tuples to have no more than a specified number of rows. This
* method removes rows from the end of the tuples; to remove rows from the
* start of the tuples, the {@link #offset} method can be used. If the limit
More information about the Mulgara-svn
mailing list