[Mulgara-svn] r1754 - trunk/src/jar/tuples/java/org/mulgara/store/tuples
pag at mulgara.org
pag at mulgara.org
Fri Jul 10 16:25:26 UTC 2009
Author: pag
Date: 2009-07-10 09:25:25 -0700 (Fri, 10 Jul 2009)
New Revision: 1754
Modified:
trunk/src/jar/tuples/java/org/mulgara/store/tuples/AbstractTuples.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/Assignment.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/Difference.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/DistinctTuples.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/EmptyTuples.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/FilteredTuples.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/LetTuples.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/LimitedTuples.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/LiteralTuples.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppend.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppendUnitTest.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedProjection.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/SimpleTuplesFormat.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/TestTuples.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoin.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoinUnitTest.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnconstrainedTuples.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedAppend.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedProjection.java
trunk/src/jar/tuples/java/org/mulgara/store/tuples/WrappedTuples.java
Log:
Added the getRowExpectedCount method to supplant getRowUpperBound, plus cleaned up warnings by removing unused imports and adding generics
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/AbstractTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/AbstractTuples.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/AbstractTuples.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -295,6 +295,8 @@
public abstract long getRowUpperBound() throws TuplesException;
+ public abstract long getRowExpectedCount() throws TuplesException;
+
public int getRowCardinality() throws TuplesException {
if (rowCardinality != -1) return rowCardinality;
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/Assignment.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/Assignment.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/Assignment.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -27,8 +27,8 @@
package org.mulgara.store.tuples;
+import java.util.Collections;
import java.util.List;
-import java.util.ArrayList;
// Third party packages
import org.apache.log4j.*;
@@ -61,11 +61,9 @@
*/
class Assignment extends AbstractTuples {
- /**
- * Logger.
- */
- private static Logger logger =
- Logger.getLogger(Assignment.class.getName());
+ /** Logger. */
+ @SuppressWarnings("unused")
+ private static Logger logger = Logger.getLogger(Assignment.class.getName());
/**
* The value the variable is assigned.
@@ -251,6 +249,10 @@
return getRowCount();
}
+ public long getRowExpectedCount() {
+ return getRowCount();
+ }
+
/**
* METHOD TO DO
*
@@ -279,8 +281,8 @@
return true;
}
- public List getOperands() {
- return new ArrayList(0);
+ public List<Tuples> getOperands() {
+ return Collections.emptyList();
}
public RowComparator getRowComparator() {
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/Difference.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/Difference.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/Difference.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -1,13 +1,16 @@
/*
- * The contents of this file are subject to the Open Software License
- * Version 3.0 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.opensource.org/licenses/osl-3.0.txt
+ * Copyright 2008 Fedora Commons
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*/
package org.mulgara.store.tuples;
@@ -37,10 +40,9 @@
* ensuring the sort order of the subtrahend; that responsibility falls to
* {@link TuplesOperations#subtract}.
*
- * @created 2005-03
- * @author <a href="mailto:pgearon at users.sourceforge.net">Paul Gearon</a>
- * @copyright © 2008 <A href="http://www.topazproject.org/">The Topaz Project</A>
- * @licence <a href="{@docRoot}/../../LICENCE">Open Software License v3.0</a>
+ * @created March, 2005
+ * @author Paul Gearon
+ * @licence <a href="{@docRoot}/../LICENCE.txt">Apache License, Version 2.0</a>
*/
public class Difference extends AbstractTuples {
@@ -67,7 +69,6 @@
* @throws IllegalArgumentException If the <var>minuend</var> and <var>subtrahend</var>
* contain no variables in common.
*/
- @SuppressWarnings("unchecked")
Difference(Tuples minuend, Tuples subtrahend) throws TuplesException, IllegalArgumentException {
// store the operands
this.minuend = (Tuples)minuend.clone();
@@ -128,8 +129,31 @@
return minuend.getRowUpperBound();
}
+ /**
+ * This is a factor that we can expect the subtrahend to match on the minuend.
+ * 1.0 indicates that the subtrahend is a subset of the minuend. 0.0 indicates
+ * there is no match at all.
+ * TODO: update this value statistically, rather than using a constant value.
+ */
+ private static final double MATCH_RATIO = 0.25;
/**
+ * @return {@inheritDoc} This is estimated as the size of the minuend,
+ * though it will probably be smaller.
+ * @throws TuplesException {@inheritDoc}
+ */
+ public long getRowExpectedCount() throws TuplesException {
+ long minCount = minuend.getRowExpectedCount();
+ long subCount = subtrahend.getRowExpectedCount();
+ long guess = minCount - (long)(MATCH_RATIO * subCount);
+ // if the guess is large enough (by some fudge factor), then we'll use it
+ if (guess > minCount / 2) return guess;
+
+ return (long)(minCount * MATCH_RATIO);
+ }
+
+
+ /**
* {@inheritDoc} Relies on the minuend of the difference.
*/
public boolean isColumnEverUnbound(int column) throws TuplesException {
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/DistinctTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/DistinctTuples.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/DistinctTuples.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -143,7 +143,6 @@
* @return The Comparator value
*/
public RowComparator getComparator() {
-
return operand.getComparator();
}
@@ -151,6 +150,10 @@
return operand.getRowUpperBound();
}
+ public long getRowExpectedCount() throws TuplesException {
+ return operand.getRowExpectedCount();
+ }
+
/**
* Gets the Variables attribute of the DistinctTuples object
*
@@ -187,7 +190,7 @@
return operand.isUnconstrained();
}
- public List getOperands() {
+ public List<Tuples> getOperands() {
return Collections.singletonList(operand);
}
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/EmptyTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/EmptyTuples.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/EmptyTuples.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -27,7 +27,7 @@
package org.mulgara.store.tuples;
-import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
// Locally written packages
@@ -131,6 +131,10 @@
return 0;
}
+ public long getRowExpectedCount() {
+ return 0;
+ }
+
/**
* Empty has no rows that can be unbound.
*
@@ -155,8 +159,8 @@
return true;
}
- public List getOperands() {
- return new ArrayList(0);
+ public List<Tuples> getOperands() {
+ return Collections.emptyList();
}
/**
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/FilteredTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/FilteredTuples.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/FilteredTuples.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -94,6 +94,12 @@
/** {@inheritDoc} */
+ public long getRowExpectedCount() throws TuplesException {
+ return (long)(unfiltered.getRowExpectedCount() * getMatchRatio());
+ }
+
+
+ /** {@inheritDoc} */
public boolean isColumnEverUnbound(int column) throws TuplesException {
return unfiltered.isColumnEverUnbound(column);
}
@@ -248,4 +254,12 @@
contextListeners.add(l);
}
+ /**
+ * The expected ratio for matching on the filter. This value should update over time.
+ * TODO: calculate this value, update it over time, and record it against the filter pattern.
+ * @return A value between 1.0 (100% match) and 0.0 (no match).
+ */
+ private double getMatchRatio() {
+ return 0.5;
+ }
}
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -181,6 +181,13 @@
}
+ /** {@inheritDoc} */
+ public long getRowExpectedCount() throws TuplesException {
+ // TODO: work out a better expected value. Maybe add about 10%
+ return lhs.getRowExpectedCount();
+ }
+
+
/** {@inheritDoc} Relies on the lhs of the optional. */
public boolean isColumnEverUnbound(int column) throws TuplesException {
int nrLeftVars = lhs.getNumberOfVariables();
@@ -709,5 +716,10 @@
return wrapped.getRowUpperBound();
}
+ /** @see org.mulgara.query.Cursor#getRowExpectedCount() */
+ public long getRowExpectedCount() throws TuplesException {
+ return wrapped.getRowExpectedCount();
+ }
+
}
}
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/LetTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/LetTuples.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/LetTuples.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -133,6 +133,12 @@
/** {@inheritDoc} */
+ public long getRowExpectedCount() throws TuplesException {
+ return innerTuples.getRowExpectedCount();
+ }
+
+
+ /** {@inheritDoc} */
public boolean isColumnEverUnbound(int column) throws TuplesException {
return column == lastCol || innerTuples.isColumnEverUnbound(column);
}
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/LimitedTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/LimitedTuples.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/LimitedTuples.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -55,9 +55,8 @@
*/
class LimitedTuples extends WrappedTuples {
- /**
- * Logger.
- */
+ /** Logger. */
+ @SuppressWarnings("unused")
private final static Logger logger = Logger.getLogger(LimitedTuples.class);
/**
@@ -103,14 +102,23 @@
* Gets the RowCount attribute of the WrappedTuples object
*
* @return The RowCount value
- * @throws TuplesException EXCEPTION TO DO
+ * @throws TuplesException Error accessing underlying tuples.
*/
public long getRowCount() throws TuplesException {
-
long rowCount = tuples.getRowCount();
return (rowCount < limit) ? rowCount : limit;
}
+ public long getRowUpperBound() throws TuplesException {
+ long rowCount = tuples.getRowUpperBound();
+ return (rowCount < limit) ? rowCount : limit;
+ }
+
+ public long getRowExpectedCount() throws TuplesException {
+ long rowCount = tuples.getRowExpectedCount();
+ return (rowCount < limit) ? rowCount : limit;
+ }
+
//
// Methods implementing Tuples
//
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/LiteralTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/LiteralTuples.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/LiteralTuples.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -175,6 +175,10 @@
return getRowCount();
}
+ public long getRowExpectedCount() throws TuplesException {
+ return getRowCount();
+ }
+
public boolean isColumnEverUnbound(int column) throws TuplesException {
return columnContainsUnbound[column];
}
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppend.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppend.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppend.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -201,6 +201,17 @@
return bound;
}
+ public long getRowExpectedCount() throws TuplesException {
+ long bound = 0;
+
+ for (int i = 0; i < operands.length; i++) {
+ bound += operands[i].getRowExpectedCount();
+ if (bound < 0) return Long.MAX_VALUE;
+ }
+
+ return bound;
+ }
+
/**
* A column is unbound if it's unbound in any of the operands.
*/
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppendUnitTest.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppendUnitTest.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppendUnitTest.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -59,9 +59,8 @@
*/
public class OrderedAppendUnitTest extends TestCase {
- /**
- * Logger.
- */
+ /** Logger. */
+ @SuppressWarnings("unused")
private Logger logger = Logger.getLogger(OrderedAppendUnitTest.class.getName());
/**
@@ -243,7 +242,6 @@
Variable subject = StatementStore.VARIABLES[0];
Variable predicate = StatementStore.VARIABLES[1];
- Variable object = StatementStore.VARIABLES[2];
Tuples lhs = new TestTuples(subject, 1).and(predicate, 8).or(subject, 3);
@@ -271,7 +269,7 @@
newTuples.renameVariables(newConstraint);
- assertEquals(eol + "{$x $y (unevaluated, 4 rows max)" + eol +
+ assertEquals(eol + "{$x $y (unevaluated, 4 rows max, 4 rows expected)" + eol +
"[000001 000008 ]" + eol +
"[000002 000006 ]" + eol +
"[000003 000000 ]" + eol +
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedProjection.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedProjection.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedProjection.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -90,18 +90,15 @@
*
* @param operand the tuples to project
* @param variables the columns to retain
- * @throws IllegalArgumentException if <var>operand</var> is <code>null</code>
- * @throws TuplesException EXCEPTION TO DO
+ * @throws IllegalArgumentException if <var>operand</var> is <code>null</code>
+ * @throws TuplesException Error while accessing the operand data.
*/
- OrderedProjection(Tuples operand, Collection variables)
+ OrderedProjection(Tuples operand, Collection<Variable> variables)
throws TuplesException {
// Validate "operand" parameter
- if (operand == null) {
+ if (operand == null) throw new IllegalArgumentException("Null \"operand\" parameter");
- throw new IllegalArgumentException("Null \"operand\" parameter");
- }
-
// Validate "variables" parameter
if (variables == null) {
@@ -110,28 +107,24 @@
// Determine which columns haven't been projected away
Variable[] operandVariables = operand.getVariables();
- List columnMappingList = new ArrayList(operandVariables.length);
- List variableList = new ArrayList(operandVariables.length);
+ List<Integer> columnMappingList = new ArrayList<Integer>(operandVariables.length);
+ List<Variable> variableList = new ArrayList<Variable>(operandVariables.length);
for (int i = 0; i < operandVariables.length; i++) {
-
if (variables.contains(operandVariables[i])) {
-
variableList.add(operandVariables[i]);
columnMappingList.add(new Integer(i));
}
}
// Initialize fields
- this.operand = (Tuples) operand.clone();
- this.variables =
- (Variable[]) variableList.toArray(new Variable[variableList.size()]);
+ this.operand = (Tuples)operand.clone();
+ this.variables = variableList.toArray(new Variable[variableList.size()]);
// Create column mapping
columnMapping = new int[columnMappingList.size()];
for (int i = 0; i < columnMapping.length; i++) {
-
columnMapping[i] = ( (Integer) columnMappingList.get(i)).intValue();
}
@@ -190,7 +183,7 @@
* Gets the RowCount attribute of the OrderedProjection object
*
* @return The RowCount value
- * @throws TuplesException EXCEPTION TO DO
+ * @throws TuplesException Error on underlying data
*/
public long getRowCount() throws TuplesException {
return operand.getRowCount();
@@ -200,6 +193,10 @@
return operand.getRowUpperBound();
}
+ public long getRowExpectedCount() throws TuplesException {
+ return operand.getRowExpectedCount();
+ }
+
/**
* Gets the Variables attribute of the OrderedProjection object
*
@@ -241,7 +238,7 @@
}
- public List getOperands() {
+ public List<Tuples> getOperands() {
return Collections.singletonList(operand);
}
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/SimpleTuplesFormat.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/SimpleTuplesFormat.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/SimpleTuplesFormat.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -107,11 +107,12 @@
buffer.append("(");
buffer.append(cloned.getRowCount());
buffer.append(" rows)" + eol);
- }
- else {
+ } else {
buffer.append("(unevaluated, ");
buffer.append(cloned.getRowUpperBound());
- buffer.append(" rows max)" + eol);
+ buffer.append(" rows max, ");
+ buffer.append(cloned.getRowExpectedCount());
+ buffer.append(" rows expected)").append(eol);
}
// Display rows
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/TestTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/TestTuples.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/TestTuples.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -69,22 +69,22 @@
/**
* Description of the Field
*/
- private final List termList;
+ private final List<Map<Variable,Long>> termList;
/**
* Description of the Field
*/
- private final List variableList;
+ private final List<Variable> variableList;
/**
* Description of the Field
*/
- private Map rowMap = null;
+ private Map<Variable,Long> rowMap = null;
/**
* Description of the Field
*/
- private Map lastMap = new HashMap();
+ private Map<Variable,Long> lastMap = new HashMap<Variable,Long>();
/**
* The number of columns in common for sort order.
@@ -98,8 +98,8 @@
*/
public TestTuples() {
- termList = new ArrayList();
- variableList = new ArrayList();
+ termList = new ArrayList<Map<Variable,Long>>();
+ variableList = new ArrayList<Variable>();
}
/**
@@ -196,11 +196,11 @@
return true;
}
- public List getOperands() {
+ public List<Tuples> getOperands() {
if (tuples != null) {
return Collections.singletonList(tuples);
} else {
- return new ArrayList(0);
+ return Collections.emptyList();
}
}
@@ -234,19 +234,20 @@
* @return The RowCount value
*/
public long getRowCount() {
-
return termList.size();
}
public long getRowUpperBound() {
+ return getRowCount();
+ }
+ public long getRowExpectedCount() {
return getRowCount();
}
/**
* This method isn't really implemented; it just assumes all columns might be
* unbound.
- *
* @return <code>true</code>
*/
public boolean isColumnEverUnbound(int column) throws TuplesException {
@@ -393,7 +394,7 @@
*/
private TestTuples or() {
- lastMap = new HashMap();
+ lastMap = new HashMap<Variable,Long>();
termList.add(lastMap);
return this;
@@ -410,14 +411,11 @@
assert row <= termList.size();
if (row == termList.size()) {
-
rowMap = null;
return false;
- }
- else {
-
+ } else {
assert row < termList.size();
- rowMap = (Map) termList.get(row);
+ rowMap = (Map<Variable,Long>) termList.get(row);
row++;
return true;
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -665,9 +665,9 @@
double weightedRowCount = 0.0;
for (int weight = 0; weight < numLeftBindings + 1; weight++) {
double term = vars.length > 0
- ? Math.pow(tuples.getRowUpperBound(), (double)(vars.length - (numLeftBindings - weight)) / vars.length)
- : tuples.getRowUpperBound();
- weightedRowCount += term / Math.pow(100.0, weight);
+ ? Math.pow(tuples.getRowExpectedCount(), (double)(vars.length - (numLeftBindings - weight)) / vars.length)
+ : tuples.getRowExpectedCount();
+ weightedRowCount += term / Math.pow(10.0, weight);
}
if (logger.isDebugEnabled()) {
@@ -1018,7 +1018,9 @@
else buff.append("=");
try {
- buff.append(tuples.getRowUpperBound()).append("]");
+ buff.append(tuples.getRowUpperBound());
+ buff.append(" (~").append(tuples.getRowExpectedCount());
+ buff.append(")]");
} catch (TuplesException et) {
buff.append(et.toString()).append("]");
}
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoin.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoin.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoin.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -30,6 +30,7 @@
package org.mulgara.store.tuples;
// Java 2 standard packages
+import java.math.BigInteger;
import java.util.*;
// Third party packages
@@ -161,6 +162,11 @@
protected long[] prefix = null;
/**
+ * The variable groups formed in this operation. If more than one there will be a cartesian product in the result.
+ */
+ protected List<VarGroup> varGroups = null;
+
+ /**
* Conjoin a list of propositions.
*
* @param operands the propositions to conjoin; the order affects efficiency,
@@ -185,7 +191,7 @@
if (logger.isDebugEnabled()) {
logger.debug("Operands " + i + " : " + operands[i]);
logger.debug("Operands variables " + i + " : " + Arrays.asList(operands[i].getVariables()));
- logger.debug("Ooperands types " + i + " : " + operands[i].getClass());
+ logger.debug("Operands types " + i + " : " + operands[i].getClass());
}
operandBinding[i] = new long[operands[i].getVariables().length];
if (!operands[i].hasNoDuplicates()) {
@@ -268,6 +274,8 @@
}
}
}
+
+ buildVarGroups();
}
/**
@@ -349,29 +357,52 @@
* @throws TuplesException {@inheritDoc}
*/
public long getRowUpperBound() throws TuplesException {
- if (operands.length == 0) {
- return 0;
+ if (operands.length == 0) return 0;
+ if (operands.length == 1) return operands[0].getRowUpperBound();
+
+ BigInteger rowCount = BigInteger.valueOf(operands[0].getRowUpperBound());
+
+ for (int i = 1; i < operands.length; i++) {
+ rowCount = rowCount.multiply(BigInteger.valueOf(operands[i].getRowUpperBound()));
+ if (rowCount.bitLength() > 63)
+ return Long.MAX_VALUE;
}
- if (operands.length == 1) {
- return operands[0].getRowUpperBound();
- }
- /*
- BigInteger rowCount = BigInteger.valueOf(operands[0].getRowUpperBound());
-
+ return rowCount.longValue();
+ }
+
+ /**
+ * @return {@inheritDoc} This is estimated as the size of the minumum
+ * of the row counts of all the {@link #operands}.
+ * @throws TuplesException {@inheritDoc}
+ */
+ public long getRowExpectedCount() throws TuplesException {
+ if (operands.length == 0) return 0;
+ if (operands.length == 1) return operands[0].getRowExpectedCount();
+
+ // simple joined group. Get the minimum as a guess.
+ if (varGroups.size() == 1) {
+ long result = operands[0].getRowExpectedCount();
for (int i = 1; i < operands.length; i++) {
- rowCount = rowCount.multiply(BigInteger.valueOf(operands[i].getRowUpperBound()));
- if (rowCount.bitLength() > 63)
- return Long.MAX_VALUE;
+ result = Math.min(result, operands[i].getRowExpectedCount());
}
-
+ return result;
+ } else {
+ // cartesian product. Get the simple joins, and multiply.
+ BigInteger rowCount = null;
+ for (VarGroup vg: varGroups) {
+ // calculate the size of this group
+ List<Integer> ops = vg.getOps();
+ long groupResult = operands[ops.get(0)].getRowExpectedCount();
+ for (int i = 1; i < ops.size(); i++) {
+ groupResult = Math.min(groupResult, operands[ops.get(i)].getRowExpectedCount());
+ }
+ // merge the current group into the running total
+ if (rowCount == null) rowCount = BigInteger.valueOf(groupResult);
+ else rowCount = rowCount.multiply(BigInteger.valueOf(groupResult));
+ }
return rowCount.longValue();
- */
- long result = operands[0].getRowUpperBound();
- for (int i = 1; i < operands.length; i++) {
- result = Math.min(result, operands[i].getRowUpperBound());
}
- return result;
}
public boolean isColumnEverUnbound(int column) throws TuplesException {
@@ -459,6 +490,15 @@
return cloned;
}
+ /**
+ * Get the number of groups in this join, based on their shared variables.
+ * This indicates the number of cartesian products required.
+ * @return The number of variable groups discovered between the operands
+ */
+ int getNrGroups() {
+ return varGroups.size();
+ }
+
//
// Internal methods
//
@@ -553,4 +593,122 @@
return true;
}
}
+
+
+ /**
+ * Creates the groupings of variables formed during this join.
+ * An inner join will form if there is just one group.
+ */
+ void buildVarGroups() {
+ varGroups = new LinkedList<VarGroup>();
+
+ // go over all the operands
+ G: for (int i = 0; i < operands.length; i++) {
+ Variable[] vars = operands[i].getVariables();
+ // test if any group already matches this operand
+ for (VarGroup v: varGroups) {
+ if (v.joinsTo(vars)) {
+ // found a match, so add it
+ v.addOperand(i);
+ // this may join in other groups, so test
+ Iterator<VarGroup> vgi = varGroups.iterator();
+ while (vgi.hasNext()) {
+ VarGroup ov = vgi.next();
+ // don't test if this group joins to itself
+ if (ov == v) continue;
+ if (v.joinsTo(ov)) {
+ // groups join, so merge them
+ v.merge(ov);
+ vgi.remove();
+ }
+ }
+ // we've matched this operand in, so move to the next operand
+ continue G;
+ }
+ }
+ // no matches, so create a new group
+ varGroups.add(new VarGroup(i));
+ }
+ }
+
+
+ /**
+ * A class to record a group of variables and the operands they are associated with.
+ */
+ class VarGroup {
+ /** The variables for the group */
+ HashSet<Variable> variables = new HashSet<Variable>();
+
+ /** The operands this group's variables can be found in */
+ ArrayList<Integer> opList = new ArrayList<Integer>();
+
+ /**
+ * Create a group, starting with a given operand.
+ * @param opIndex The index of the operand to seed the group with.
+ */
+ public VarGroup(int opIndex) {
+ addOperand(opIndex);
+ }
+
+
+ /**
+ * Adds a new operand's variables to the group.
+ * @param i The index of the operand to add.
+ */
+ public void addOperand(int i) {
+ assert !opList.contains(operands[i]);
+ opList.add(i);
+ for (Variable v: operands[i].getVariables()) {
+ variables.add(v);
+ }
+ }
+
+
+ /**
+ * Adds another group to this one, based on shared variables.
+ * @param v The other variable group to merge.
+ */
+ @SuppressWarnings("unchecked")
+ public void merge(VarGroup v) {
+ // check that some variables are shared
+ assert ((HashSet<Variable>)variables.clone()).removeAll(v.variables);
+ // check that no operands are shared
+ assert !((ArrayList<Integer>)opList.clone()).removeAll(v.opList);
+ variables.addAll(v.variables);
+ opList.addAll(v.opList);
+ }
+
+
+ /**
+ * Tests if this group joins to a given set of variables.
+ * @param vars An array of variables to test.
+ * @return <code>true</code> if the variables join to this group.
+ */
+ public boolean joinsTo(Variable[] vars) {
+ for (Variable v: vars) {
+ if (variables.contains(v)) return true;
+ }
+ return false;
+ }
+
+
+ /**
+ * Tests if this group joins to another group.
+ * @param og The other group.
+ * @return <code>true</code> if the groups share variables.
+ */
+ @SuppressWarnings("unchecked")
+ public boolean joinsTo(VarGroup og) {
+ return ((HashSet<Variable>)variables.clone()).removeAll(og.variables);
+ }
+
+
+ /**
+ * Get the list of operands for this group.
+ * @return The operand list.
+ */
+ public List<Integer> getOps() {
+ return opList;
+ }
+ }
}
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoinUnitTest.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoinUnitTest.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoinUnitTest.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -32,7 +32,6 @@
import org.apache.log4j.Logger; // Log4J
// Locally written packages
-import org.mulgara.query.TuplesException;
import org.mulgara.query.Variable;
/**
@@ -171,6 +170,7 @@
Tuples joinedTuples = new UnboundJoin(new Tuples[] {});
assertTrue(joinedTuples.isUnconstrained());
+ assertEquals(0, ((UnboundJoin)joinedTuples).getNrGroups());
}
/**
@@ -183,12 +183,11 @@
Tuples operand =
TuplesFactory.newInstance().newTuples(new TestTuples(x, 1).and(y, 2)
- .or(x, 3).and(y,
- 4));
+ .or(x, 3).and(y, 4));
assert operand != null;
- Tuples joinedTuples = new UnboundJoin(new Tuples[] {
- operand});
+ Tuples joinedTuples = new UnboundJoin(new Tuples[] {operand});
+ assertEquals(1, ((UnboundJoin)joinedTuples).getNrGroups());
joinedTuples.beforeFirst();
@@ -210,16 +209,14 @@
Tuples lhs =
TuplesFactory.newInstance().newTuples(new TestTuples(x, 1).and(y, 2)
- .or(x, 3).and(y,
- 4));
+ .or(x, 3).and(y, 4));
Tuples rhs =
TuplesFactory.newInstance().newTuples(new TestTuples(z, 5).and(w, 6)
- .or(z, 7).and(w,
- 8));
+ .or(z, 7).and(w, 8));
- Tuples joined = new UnboundJoin(new Tuples[] {
- lhs, rhs});
+ Tuples joined = new UnboundJoin(new Tuples[] {lhs, rhs});
+ assertEquals(2, ((UnboundJoin)joined).getNrGroups());
Variable[] variables = joined.getVariables();
@@ -249,21 +246,18 @@
Tuples lhs =
TuplesFactory.newInstance().newTuples(new TestTuples(x, 1).and(y, 2)
- .or(x, 3).and(y,
- 4));
+ .or(x, 3).and(y, 4));
Tuples mhs =
TuplesFactory.newInstance().newTuples(new TestTuples(z, 5).and(w, 6)
- .or(z, 7).and(w,
- 8));
+ .or(z, 7).and(w, 8));
Tuples rhs =
TuplesFactory.newInstance().newTuples(new TestTuples(u, 9).and(v, 10)
- .or(u, 11).and(v,
- 12));
+ .or(u, 11).and(v, 12));
- Tuples joined = new UnboundJoin(new Tuples[] {
- lhs, mhs, rhs});
+ Tuples joined = new UnboundJoin(new Tuples[] {lhs, mhs, rhs});
+ assertEquals(3, ((UnboundJoin)joined).getNrGroups());
Variable[] variables = joined.getVariables();
assertEquals(6, variables.length);
@@ -317,11 +311,11 @@
rhs.appendTuple(new long[] {4, 6});
TuplesFactory.newInstance().newTuples(new TestTuples(y, 2).and(z, 5)
- .or(y, 4).and(z,
- 6));
+ .or(y, 4).and(z, 6));
- Tuples actual = TuplesOperations.sort(new UnboundJoin(new Tuples[] {lhs,
- rhs}));
+ UnboundJoin joined = new UnboundJoin(new Tuples[] {lhs, rhs});
+ assertEquals(1, joined.getNrGroups());
+ Tuples actual = TuplesOperations.sort(joined);
// First, try a straightforward iteration through all rows
actual.beforeFirst();
@@ -606,7 +600,9 @@
LiteralTuples lhs = LiteralTuples.create(lvars, lhsValues);
LiteralTuples rhs = LiteralTuples.create(rvars, rhsValues);
- Tuples actual = TuplesOperations.sort(new UnboundJoin(new Tuples[] {lhs, rhs}));
+ UnboundJoin joined = new UnboundJoin(new Tuples[] {lhs, rhs});
+ assertEquals(1, joined.getNrGroups());
+ Tuples actual = TuplesOperations.sort(joined);
logger.warn("testPartialMGR36 result = " + actual);
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnconstrainedTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnconstrainedTuples.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnconstrainedTuples.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -27,8 +27,8 @@
package org.mulgara.store.tuples;
+import java.util.Collections;
import java.util.List;
-import java.util.ArrayList;
// Locally written packages
import org.mulgara.query.TuplesException;
@@ -134,6 +134,10 @@
return getRowCount();
}
+ public long getRowExpectedCount() {
+ return getRowCount();
+ }
+
/**
* Unconstrained has no columns that can be unbound.
*
@@ -157,8 +161,8 @@
return true;
}
- public List getOperands() {
- return new ArrayList(0);
+ public List<Tuples> getOperands() {
+ return Collections.emptyList();
}
/**
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedAppend.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedAppend.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedAppend.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -61,9 +61,8 @@
*/
class UnorderedAppend extends AbstractTuples {
- /**
- * Logger.
- */
+ /** Logger. */
+ @SuppressWarnings("unused")
private final static Logger logger = Logger.getLogger(UnorderedAppend.class);
/**
@@ -164,8 +163,7 @@
for (int i = 0; i < operands.length; i++) {
rowCount += operands[i].getRowCount();
- if (rowCount < 0)
- return Long.MAX_VALUE;
+ if (rowCount < 0) return Long.MAX_VALUE;
}
return rowCount;
@@ -176,15 +174,24 @@
for (int i = 0; i < operands.length; i++) {
bound += operands[i].getRowUpperBound();
- if (bound < 0)
- return Long.MAX_VALUE;
+ if (bound < 0) return Long.MAX_VALUE;
}
return bound;
}
- public boolean isColumnEverUnbound(int column) throws TuplesException
- {
+ public long getRowExpectedCount() throws TuplesException {
+ long bound = 0;
+
+ for (int i = 0; i < operands.length; i++) {
+ bound += operands[i].getRowExpectedCount();
+ if (bound < 0) return Long.MAX_VALUE;
+ }
+
+ return bound;
+ }
+
+ public boolean isColumnEverUnbound(int column) throws TuplesException {
assert(operand >= 0) || (operand < operands.length):"No column " +
column;
@@ -219,7 +226,7 @@
}
- public List getOperands() {
+ public List<Tuples> getOperands() {
return Arrays.asList(operands);
}
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedProjection.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedProjection.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedProjection.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -94,7 +94,7 @@
* @throws IllegalArgumentException if <var>operand</var> is <code>null</code>
* @throws TuplesException EXCEPTION TO DO
*/
- UnorderedProjection(Tuples operand, List columnList) throws TuplesException {
+ UnorderedProjection(Tuples operand, List<Variable> columnList) throws TuplesException {
// Validate "operand" parameter
if (operand == null) {
@@ -111,7 +111,7 @@
}
// Initialize fields
- this.operand = (Tuples) operand.clone();
+ this.operand = (Tuples)operand.clone();
setVariables(columnList);
// Create column mapping
@@ -181,10 +181,9 @@
* Gets the RowCount attribute of the UnorderedProjection object
*
* @return The RowCount value
- * @throws TuplesException EXCEPTION TO DO
+ * @throws TuplesException Error accessing underlying data
*/
public long getRowCount() throws TuplesException {
-
return operand.getRowCount();
}
@@ -192,6 +191,10 @@
return operand.getRowUpperBound();
}
+ public long getRowExpectedCount() throws TuplesException {
+ return operand.getRowExpectedCount();
+ }
+
/**
* A column is unbound if it is unbound in the source of the projection.
*/
@@ -233,7 +236,7 @@
(getVariables().length > 0 && operand.isUnconstrained()));
}
- public List getOperands() {
+ public List<Tuples> getOperands() {
return Collections.singletonList(operand);
}
Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/WrappedTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/WrappedTuples.java 2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/WrappedTuples.java 2009-07-10 16:25:25 UTC (rev 1754)
@@ -163,13 +163,15 @@
return tuples.getRowCount();
}
- public long getRowUpperBound() throws TuplesException
- {
+ public long getRowUpperBound() throws TuplesException {
return tuples.getRowUpperBound();
}
- public int getRowCardinality() throws TuplesException
- {
+ public long getRowExpectedCount() throws TuplesException {
+ return tuples.getRowExpectedCount();
+ }
+
+ public int getRowCardinality() throws TuplesException {
return tuples.getRowCardinality();
}
@@ -180,7 +182,6 @@
* @return The Variables value
*/
public Variable[] getVariables() {
-
return tuples.getVariables();
}
@@ -190,7 +191,6 @@
* @return The NumberOfVariables value
*/
public int getNumberOfVariables() {
-
return tuples.getNumberOfVariables();
}
@@ -198,8 +198,7 @@
* Delegates to the {@link Tuples#isColumnEverUnbound} method of the wrapped
* instance.
*/
- public boolean isColumnEverUnbound(int column) throws TuplesException
- {
+ public boolean isColumnEverUnbound(int column) throws TuplesException {
return tuples.isColumnEverUnbound(column);
}
More information about the Mulgara-svn
mailing list