[Mulgara-svn] r1754 - trunk/src/jar/tuples/java/org/mulgara/store/tuples

pag at mulgara.org pag at mulgara.org
Fri Jul 10 16:25:26 UTC 2009


Author: pag
Date: 2009-07-10 09:25:25 -0700 (Fri, 10 Jul 2009)
New Revision: 1754

Modified:
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/AbstractTuples.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/Assignment.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/Difference.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/DistinctTuples.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/EmptyTuples.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/FilteredTuples.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/LetTuples.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/LimitedTuples.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/LiteralTuples.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppend.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppendUnitTest.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedProjection.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/SimpleTuplesFormat.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/TestTuples.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoin.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoinUnitTest.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnconstrainedTuples.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedAppend.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedProjection.java
   trunk/src/jar/tuples/java/org/mulgara/store/tuples/WrappedTuples.java
Log:
Added the getRowExpectedCount method to supplant getRowUpperBound, plus cleaned up warnings by removing unused imports and adding generics

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/AbstractTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/AbstractTuples.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/AbstractTuples.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -295,6 +295,8 @@
 
   public abstract long getRowUpperBound() throws TuplesException;
 
+  public abstract long getRowExpectedCount() throws TuplesException;
+
   public int getRowCardinality() throws TuplesException {
     if (rowCardinality != -1) return rowCardinality;
 

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/Assignment.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/Assignment.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/Assignment.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -27,8 +27,8 @@
 
 package org.mulgara.store.tuples;
 
+import java.util.Collections;
 import java.util.List;
-import java.util.ArrayList;
 
 // Third party packages
 import org.apache.log4j.*;
@@ -61,11 +61,9 @@
  */
 class Assignment extends AbstractTuples {
 
-  /**
-   * Logger.
-   */
-  private static Logger logger =
-      Logger.getLogger(Assignment.class.getName());
+  /** Logger. */
+  @SuppressWarnings("unused")
+  private static Logger logger = Logger.getLogger(Assignment.class.getName());
 
   /**
    * The value the variable is assigned.
@@ -251,6 +249,10 @@
     return getRowCount();
   }
 
+  public long getRowExpectedCount() {
+    return getRowCount();
+  }
+
   /**
    * METHOD TO DO
    *
@@ -279,8 +281,8 @@
     return true;
   }
 
-  public List getOperands() {
-    return new ArrayList(0);
+  public List<Tuples> getOperands() {
+    return Collections.emptyList();
   }
 
   public RowComparator getRowComparator() {

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/Difference.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/Difference.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/Difference.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -1,13 +1,16 @@
 /*
- * The contents of this file are subject to the Open Software License
- * Version 3.0 (the "License"); you may not use this file except in
- * compliance with the License. You may obtain a copy of the License at
- * http://www.opensource.org/licenses/osl-3.0.txt
+ * Copyright 2008 Fedora Commons
  *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.mulgara.store.tuples;
@@ -37,10 +40,9 @@
  * ensuring the sort order of the subtrahend; that responsibility falls to
  * {@link TuplesOperations#subtract}.
  *
- * @created 2005-03
- * @author <a href="mailto:pgearon at users.sourceforge.net">Paul Gearon</a>
- * @copyright &copy; 2008 <A href="http://www.topazproject.org/">The Topaz Project</A>
- * @licence <a href="{@docRoot}/../../LICENCE">Open Software License v3.0</a>
+ * @created March, 2005
+ * @author Paul Gearon
+ * @licence <a href="{@docRoot}/../LICENCE.txt">Apache License, Version 2.0</a>
  */
 public class Difference extends AbstractTuples {
 
@@ -67,7 +69,6 @@
    * @throws IllegalArgumentException If the <var>minuend</var> and  <var>subtrahend</var>
    *         contain no variables in common.
    */
-  @SuppressWarnings("unchecked")
   Difference(Tuples minuend, Tuples subtrahend) throws TuplesException, IllegalArgumentException {
     // store the operands
     this.minuend = (Tuples)minuend.clone();
@@ -128,8 +129,31 @@
     return minuend.getRowUpperBound();
   }
 
+  /**
+   * This is a factor that we can expect the subtrahend to match on the minuend.
+   * 1.0 indicates that the subtrahend is a subset of the minuend. 0.0 indicates
+   * there is no match at all.
+   * TODO: update this value statistically, rather than using a constant value.
+   */
+  private static final double MATCH_RATIO = 0.25; 
 
   /**
+   * @return {@inheritDoc}  This is estimated as the size of the minuend,
+   * though it will probably be smaller.
+   * @throws TuplesException {@inheritDoc}
+   */
+  public long getRowExpectedCount() throws TuplesException {
+    long minCount = minuend.getRowExpectedCount();
+    long subCount = subtrahend.getRowExpectedCount();
+    long guess = minCount - (long)(MATCH_RATIO * subCount);
+    // if the guess is large enough (by some fudge factor), then we'll use it
+    if (guess > minCount / 2) return guess;
+
+    return (long)(minCount * MATCH_RATIO);
+  }
+
+
+  /**
    * {@inheritDoc}  Relies on the minuend of the difference.
    */
   public boolean isColumnEverUnbound(int column) throws TuplesException {

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/DistinctTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/DistinctTuples.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/DistinctTuples.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -143,7 +143,6 @@
    * @return The Comparator value
    */
   public RowComparator getComparator() {
-
     return operand.getComparator();
   }
 
@@ -151,6 +150,10 @@
     return operand.getRowUpperBound();
   }
 
+  public long getRowExpectedCount() throws TuplesException {
+    return operand.getRowExpectedCount();
+  }
+
   /**
    * Gets the Variables attribute of the DistinctTuples object
    *
@@ -187,7 +190,7 @@
     return operand.isUnconstrained();
   }
 
-  public List getOperands() {
+  public List<Tuples> getOperands() {
     return Collections.singletonList(operand);
   }
 

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/EmptyTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/EmptyTuples.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/EmptyTuples.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -27,7 +27,7 @@
 
 package org.mulgara.store.tuples;
 
-import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
 // Locally written packages
@@ -131,6 +131,10 @@
     return 0;
   }
 
+  public long getRowExpectedCount() {
+    return 0;
+  }
+
   /**
    * Empty has no rows that can be unbound.
    *
@@ -155,8 +159,8 @@
     return true;
   }
 
-  public List getOperands() {
-    return new ArrayList(0);
+  public List<Tuples> getOperands() {
+    return Collections.emptyList();
   }
 
   /**

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/FilteredTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/FilteredTuples.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/FilteredTuples.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -94,6 +94,12 @@
 
 
   /** {@inheritDoc} */
+  public long getRowExpectedCount() throws TuplesException {
+    return (long)(unfiltered.getRowExpectedCount() * getMatchRatio());
+  }
+
+
+  /** {@inheritDoc} */
   public boolean isColumnEverUnbound(int column) throws TuplesException {
     return unfiltered.isColumnEverUnbound(column);
   }
@@ -248,4 +254,12 @@
     contextListeners.add(l);
   }
 
+  /**
+   * The expected ratio for matching on the filter. This value should update over time.
+   * TODO: calculate this value, update it over time, and record it against the filter pattern.
+   * @return A value between 1.0 (100% match) and 0.0 (no match).
+   */
+  private double getMatchRatio() {
+    return 0.5;
+  }
 }

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/LeftJoin.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -181,6 +181,13 @@
   }
 
 
+  /** {@inheritDoc} */
+  public long getRowExpectedCount() throws TuplesException {
+    // TODO: work out a better expected value. Maybe add about 10%
+    return lhs.getRowExpectedCount();
+  }
+
+
   /** {@inheritDoc}  Relies on the lhs of the optional. */
   public boolean isColumnEverUnbound(int column) throws TuplesException {
     int nrLeftVars = lhs.getNumberOfVariables();
@@ -709,5 +716,10 @@
       return wrapped.getRowUpperBound();
     }
     
+    /** @see org.mulgara.query.Cursor#getRowExpectedCount() */
+    public long getRowExpectedCount() throws TuplesException {
+      return wrapped.getRowExpectedCount();
+    }
+    
   }
 }

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/LetTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/LetTuples.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/LetTuples.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -133,6 +133,12 @@
 
 
   /** {@inheritDoc} */
+  public long getRowExpectedCount() throws TuplesException {
+    return innerTuples.getRowExpectedCount();
+  }
+
+
+  /** {@inheritDoc} */
   public boolean isColumnEverUnbound(int column) throws TuplesException {
     return column == lastCol || innerTuples.isColumnEverUnbound(column);
   }

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/LimitedTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/LimitedTuples.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/LimitedTuples.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -55,9 +55,8 @@
  */
 class LimitedTuples extends WrappedTuples {
 
-  /**
-   * Logger.
-   */
+  /** Logger. */
+  @SuppressWarnings("unused")
   private final static Logger logger = Logger.getLogger(LimitedTuples.class);
 
   /**
@@ -103,14 +102,23 @@
    * Gets the RowCount attribute of the WrappedTuples object
    *
    * @return The RowCount value
-   * @throws TuplesException EXCEPTION TO DO
+   * @throws TuplesException Error accessing underlying tuples.
    */
   public long getRowCount() throws TuplesException {
-
     long rowCount = tuples.getRowCount();
     return (rowCount < limit) ? rowCount : limit;
   }
 
+  public long getRowUpperBound() throws TuplesException {
+    long rowCount = tuples.getRowUpperBound();
+    return (rowCount < limit) ? rowCount : limit;
+  }
+
+  public long getRowExpectedCount() throws TuplesException {
+    long rowCount = tuples.getRowExpectedCount();
+    return (rowCount < limit) ? rowCount : limit;
+  }
+
   //
   // Methods implementing Tuples
   //

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/LiteralTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/LiteralTuples.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/LiteralTuples.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -175,6 +175,10 @@
     return getRowCount();
   }
 
+  public long getRowExpectedCount() throws TuplesException {
+    return getRowCount();
+  }
+
   public boolean isColumnEverUnbound(int column) throws TuplesException {
     return columnContainsUnbound[column];
   }

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppend.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppend.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppend.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -201,6 +201,17 @@
     return bound;
   }
 
+  public long getRowExpectedCount() throws TuplesException {
+    long bound = 0;
+
+    for (int i = 0; i < operands.length; i++) {
+      bound += operands[i].getRowExpectedCount();
+      if (bound < 0) return Long.MAX_VALUE;
+    }
+
+    return bound;
+  }
+
   /**
    * A column is unbound if it's unbound in any of the operands.
    */

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppendUnitTest.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppendUnitTest.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedAppendUnitTest.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -59,9 +59,8 @@
  */
 public class OrderedAppendUnitTest extends TestCase {
 
-  /**
-   * Logger.
-   */
+  /** Logger. */
+  @SuppressWarnings("unused")
   private Logger logger = Logger.getLogger(OrderedAppendUnitTest.class.getName());
 
   /**
@@ -243,7 +242,6 @@
 
     Variable subject = StatementStore.VARIABLES[0];
     Variable predicate = StatementStore.VARIABLES[1];
-    Variable object = StatementStore.VARIABLES[2];
 
     Tuples lhs = new TestTuples(subject, 1).and(predicate, 8).or(subject, 3);
 
@@ -271,7 +269,7 @@
 
     newTuples.renameVariables(newConstraint);
 
-    assertEquals(eol + "{$x      $y      (unevaluated, 4 rows max)" + eol +
+    assertEquals(eol + "{$x      $y      (unevaluated, 4 rows max, 4 rows expected)" + eol +
         "[000001  000008  ]" + eol +
         "[000002  000006  ]" + eol +
         "[000003  000000  ]" + eol +

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedProjection.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedProjection.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/OrderedProjection.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -90,18 +90,15 @@
    *
    * @param operand the tuples to project
    * @param variables the columns to retain
-       * @throws IllegalArgumentException if <var>operand</var> is <code>null</code>
-   * @throws TuplesException EXCEPTION TO DO
+   * @throws IllegalArgumentException if <var>operand</var> is <code>null</code>
+   * @throws TuplesException Error while accessing the operand data.
    */
-  OrderedProjection(Tuples operand, Collection variables)
+  OrderedProjection(Tuples operand, Collection<Variable> variables)
       throws TuplesException {
 
     // Validate "operand" parameter
-    if (operand == null) {
+    if (operand == null) throw new IllegalArgumentException("Null \"operand\" parameter");
 
-      throw new IllegalArgumentException("Null \"operand\" parameter");
-    }
-
     // Validate "variables" parameter
     if (variables == null) {
 
@@ -110,28 +107,24 @@
 
     // Determine which columns haven't been projected away
     Variable[] operandVariables = operand.getVariables();
-    List columnMappingList = new ArrayList(operandVariables.length);
-    List variableList = new ArrayList(operandVariables.length);
+    List<Integer> columnMappingList = new ArrayList<Integer>(operandVariables.length);
+    List<Variable> variableList = new ArrayList<Variable>(operandVariables.length);
 
     for (int i = 0; i < operandVariables.length; i++) {
-
       if (variables.contains(operandVariables[i])) {
-
         variableList.add(operandVariables[i]);
         columnMappingList.add(new Integer(i));
       }
     }
 
     // Initialize fields
-    this.operand = (Tuples) operand.clone();
-    this.variables =
-        (Variable[]) variableList.toArray(new Variable[variableList.size()]);
+    this.operand = (Tuples)operand.clone();
+    this.variables = variableList.toArray(new Variable[variableList.size()]);
 
     // Create column mapping
     columnMapping = new int[columnMappingList.size()];
 
     for (int i = 0; i < columnMapping.length; i++) {
-
       columnMapping[i] = ( (Integer) columnMappingList.get(i)).intValue();
     }
 
@@ -190,7 +183,7 @@
    * Gets the RowCount attribute of the OrderedProjection object
    *
    * @return The RowCount value
-   * @throws TuplesException EXCEPTION TO DO
+   * @throws TuplesException Error on underlying data
    */
   public long getRowCount() throws TuplesException {
     return operand.getRowCount();
@@ -200,6 +193,10 @@
     return operand.getRowUpperBound();
   }
 
+  public long getRowExpectedCount() throws TuplesException {
+    return operand.getRowExpectedCount();
+  }
+
   /**
    * Gets the Variables attribute of the OrderedProjection object
    *
@@ -241,7 +238,7 @@
   }
 
 
-  public List getOperands() {
+  public List<Tuples> getOperands() {
     return Collections.singletonList(operand);
   }
 

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/SimpleTuplesFormat.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/SimpleTuplesFormat.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/SimpleTuplesFormat.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -107,11 +107,12 @@
         buffer.append("(");
         buffer.append(cloned.getRowCount());
         buffer.append(" rows)" + eol);
-      }
-      else {
+      } else {
         buffer.append("(unevaluated, ");
         buffer.append(cloned.getRowUpperBound());
-        buffer.append(" rows max)" + eol);
+        buffer.append(" rows max, ");
+        buffer.append(cloned.getRowExpectedCount());
+        buffer.append(" rows expected)").append(eol);
       }
 
       // Display rows

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/TestTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/TestTuples.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/TestTuples.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -69,22 +69,22 @@
   /**
    * Description of the Field
    */
-  private final List termList;
+  private final List<Map<Variable,Long>> termList;
 
   /**
    * Description of the Field
    */
-  private final List variableList;
+  private final List<Variable> variableList;
 
   /**
    * Description of the Field
    */
-  private Map rowMap = null;
+  private Map<Variable,Long> rowMap = null;
 
   /**
    * Description of the Field
    */
-  private Map lastMap = new HashMap();
+  private Map<Variable,Long> lastMap = new HashMap<Variable,Long>();
 
   /**
    * The number of columns in common for sort order.
@@ -98,8 +98,8 @@
    */
   public TestTuples() {
 
-    termList = new ArrayList();
-    variableList = new ArrayList();
+    termList = new ArrayList<Map<Variable,Long>>();
+    variableList = new ArrayList<Variable>();
   }
 
   /**
@@ -196,11 +196,11 @@
     return true;
   }
 
-  public List getOperands() {
+  public List<Tuples> getOperands() {
     if (tuples != null) {
       return Collections.singletonList(tuples);
     } else {
-      return new ArrayList(0);
+      return Collections.emptyList();
     }
   }
 
@@ -234,19 +234,20 @@
    * @return The RowCount value
    */
   public long getRowCount() {
-
     return termList.size();
   }
 
   public long getRowUpperBound() {
+    return getRowCount();
+  }
 
+  public long getRowExpectedCount() {
     return getRowCount();
   }
 
   /**
   * This method isn't really implemented; it just assumes all columns might be
   * unbound.
-  *
   * @return <code>true</code>
   */
   public boolean isColumnEverUnbound(int column) throws TuplesException {
@@ -393,7 +394,7 @@
    */
   private TestTuples or() {
 
-    lastMap = new HashMap();
+    lastMap = new HashMap<Variable,Long>();
     termList.add(lastMap);
 
     return this;
@@ -410,14 +411,11 @@
     assert row <= termList.size();
 
     if (row == termList.size()) {
-
       rowMap = null;
       return false;
-    }
-    else {
-
+    } else {
       assert row < termList.size();
-      rowMap = (Map) termList.get(row);
+      rowMap = (Map<Variable,Long>) termList.get(row);
       row++;
 
       return true;

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/TuplesOperations.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -665,9 +665,9 @@
       double weightedRowCount = 0.0;
       for (int weight = 0; weight < numLeftBindings + 1; weight++) {
         double term = vars.length > 0
-                        ? Math.pow(tuples.getRowUpperBound(), (double)(vars.length - (numLeftBindings - weight)) / vars.length)
-                        : tuples.getRowUpperBound();
-        weightedRowCount += term / Math.pow(100.0, weight);
+                        ? Math.pow(tuples.getRowExpectedCount(), (double)(vars.length - (numLeftBindings - weight)) / vars.length)
+                        : tuples.getRowExpectedCount();
+        weightedRowCount += term / Math.pow(10.0, weight);
       }
 
       if (logger.isDebugEnabled()) {
@@ -1018,7 +1018,9 @@
     else buff.append("=");
 
     try {
-      buff.append(tuples.getRowUpperBound()).append("]");
+      buff.append(tuples.getRowUpperBound());
+      buff.append(" (~").append(tuples.getRowExpectedCount());
+      buff.append(")]");
     } catch (TuplesException et) {
       buff.append(et.toString()).append("]");
     }

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoin.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoin.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoin.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -30,6 +30,7 @@
 package org.mulgara.store.tuples;
 
 // Java 2 standard packages
+import java.math.BigInteger;
 import java.util.*;
 
 // Third party packages
@@ -161,6 +162,11 @@
   protected long[] prefix = null;
 
   /**
+   * The variable groups formed in this operation. If more than one there will be a cartesian product in the result.
+   */
+  protected List<VarGroup> varGroups = null;
+
+  /**
    * Conjoin a list of propositions.
    *
    * @param operands the propositions to conjoin; the order affects efficiency,
@@ -185,7 +191,7 @@
       if (logger.isDebugEnabled()) {
         logger.debug("Operands " + i + " : " + operands[i]);
         logger.debug("Operands variables " + i + " : " + Arrays.asList(operands[i].getVariables()));
-        logger.debug("Ooperands types " + i + " : " + operands[i].getClass());
+        logger.debug("Operands types " + i + " : " + operands[i].getClass());
       }
       operandBinding[i] = new long[operands[i].getVariables().length];
       if (!operands[i].hasNoDuplicates()) {
@@ -268,6 +274,8 @@
         }
       }
     }
+
+    buildVarGroups();
   }
 
   /**
@@ -349,29 +357,52 @@
    * @throws TuplesException {@inheritDoc}
    */
   public long getRowUpperBound() throws TuplesException {
-    if (operands.length == 0) {
-      return 0;
+    if (operands.length == 0) return 0;
+    if (operands.length == 1) return operands[0].getRowUpperBound();
+
+    BigInteger rowCount = BigInteger.valueOf(operands[0].getRowUpperBound());
+
+    for (int i = 1; i < operands.length; i++) {
+      rowCount = rowCount.multiply(BigInteger.valueOf(operands[i].getRowUpperBound()));
+      if (rowCount.bitLength() > 63)
+        return Long.MAX_VALUE;
     }
-    if (operands.length == 1) {
-      return operands[0].getRowUpperBound();
-    }
 
-    /*
-      BigInteger rowCount = BigInteger.valueOf(operands[0].getRowUpperBound());
-  
+    return rowCount.longValue();
+  }
+
+  /**
+   * @return {@inheritDoc}  This is estimated as the size of the minumum
+   *         of the row counts of all the {@link #operands}.
+   * @throws TuplesException {@inheritDoc}
+   */
+  public long getRowExpectedCount() throws TuplesException {
+    if (operands.length == 0) return 0;
+    if (operands.length == 1) return operands[0].getRowExpectedCount();
+
+    // simple joined group. Get the minimum as a guess.
+    if (varGroups.size() == 1) {
+      long result = operands[0].getRowExpectedCount();
       for (int i = 1; i < operands.length; i++) {
-        rowCount = rowCount.multiply(BigInteger.valueOf(operands[i].getRowUpperBound()));
-        if (rowCount.bitLength() > 63)
-          return Long.MAX_VALUE;
+        result = Math.min(result, operands[i].getRowExpectedCount());
       }
-  
+      return result;
+    } else {
+      // cartesian product. Get the simple joins, and multiply.
+      BigInteger rowCount = null;
+      for (VarGroup vg: varGroups) {
+        // calculate the size of this group
+        List<Integer> ops = vg.getOps();
+        long groupResult = operands[ops.get(0)].getRowExpectedCount();
+        for (int i = 1; i < ops.size(); i++) {
+          groupResult = Math.min(groupResult, operands[ops.get(i)].getRowExpectedCount());
+        }
+        // merge the current group into the running total
+        if (rowCount == null) rowCount = BigInteger.valueOf(groupResult);
+        else rowCount = rowCount.multiply(BigInteger.valueOf(groupResult));
+      }
       return rowCount.longValue();
-      */
-    long result = operands[0].getRowUpperBound();
-    for (int i = 1; i < operands.length; i++) {
-      result = Math.min(result, operands[i].getRowUpperBound());
     }
-    return result;
   }
 
   public boolean isColumnEverUnbound(int column) throws TuplesException {
@@ -459,6 +490,15 @@
     return cloned;
   }
 
+  /**
+   * Get the number of groups in this join, based on their shared variables.
+   * This indicates the number of cartesian products required.
+   * @return The number of variable groups discovered between the operands
+   */
+  int getNrGroups() {
+    return varGroups.size();
+  }
+
   //
   // Internal methods
   //
@@ -553,4 +593,122 @@
       return true;
     }
   }
+
+
+  /**
+   * Creates the groupings of variables formed during this join.
+   * An inner join will form if there is just one group.
+   */
+  void buildVarGroups() {
+    varGroups = new LinkedList<VarGroup>();
+
+    // go over all the operands
+    G: for (int i = 0; i < operands.length; i++) {
+      Variable[] vars = operands[i].getVariables();
+      // test if any group already matches this operand
+      for (VarGroup v: varGroups) {
+        if (v.joinsTo(vars)) {
+          // found a match, so add it
+          v.addOperand(i);
+          // this may join in other groups, so test
+          Iterator<VarGroup> vgi = varGroups.iterator();
+          while (vgi.hasNext()) {
+            VarGroup ov = vgi.next();
+            // don't test if this group joins to itself
+            if (ov == v) continue;
+            if (v.joinsTo(ov)) {
+              // groups join, so merge them
+              v.merge(ov);
+              vgi.remove();
+            }
+          }
+          // we've matched this operand in, so move to the next operand
+          continue G;
+        }
+      }
+      // no matches, so create a new group
+      varGroups.add(new VarGroup(i));
+    }
+  }
+
+
+  /**
+   * A class to record a group of variables and the operands they are associated with.
+   */
+  class VarGroup {
+    /** The variables for the group */
+    HashSet<Variable> variables = new HashSet<Variable>();
+
+    /** The operands this group's variables can be found in */
+    ArrayList<Integer> opList = new ArrayList<Integer>();
+
+    /**
+     * Create a group, starting with a given operand.
+     * @param opIndex The index of the operand to seed the group with.
+     */
+    public VarGroup(int opIndex) {
+      addOperand(opIndex);
+    }
+
+
+    /**
+     * Adds a new operand's variables to the group.
+     * @param i The index of the operand to add.
+     */
+    public void addOperand(int i) {
+      assert !opList.contains(operands[i]);
+      opList.add(i);
+      for (Variable v: operands[i].getVariables()) {
+        variables.add(v);
+      }
+    }
+
+
+    /**
+     * Adds another group to this one, based on shared variables.
+     * @param v The other variable group to merge.
+     */
+    @SuppressWarnings("unchecked")
+    public void merge(VarGroup v) {
+      // check that some variables are shared
+      assert ((HashSet<Variable>)variables.clone()).removeAll(v.variables);
+      // check that no operands are shared
+      assert !((ArrayList<Integer>)opList.clone()).removeAll(v.opList);
+      variables.addAll(v.variables);
+      opList.addAll(v.opList);
+    }
+
+
+    /**
+     * Tests if this group joins to a given set of variables.
+     * @param vars An array of variables to test.
+     * @return <code>true</code> if the variables join to this group.
+     */
+    public boolean joinsTo(Variable[] vars) {
+      for (Variable v: vars) {
+        if (variables.contains(v)) return true;
+      }
+      return false;
+    }
+
+
+    /**
+     * Tests if this group joins to another group.
+     * @param og The other group.
+     * @return <code>true</code> if the groups share variables.
+     */
+    @SuppressWarnings("unchecked")
+    public boolean joinsTo(VarGroup og) {
+      return ((HashSet<Variable>)variables.clone()).removeAll(og.variables);
+    }
+
+
+    /**
+     * Get the list of operands for this group.
+     * @return The operand list.
+     */
+    public List<Integer> getOps() {
+      return opList;
+    }
+  }
 }

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoinUnitTest.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoinUnitTest.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnboundJoinUnitTest.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -32,7 +32,6 @@
 import org.apache.log4j.Logger; // Log4J
 
 // Locally written packages
-import org.mulgara.query.TuplesException;
 import org.mulgara.query.Variable;
 
 /**
@@ -171,6 +170,7 @@
 
     Tuples joinedTuples = new UnboundJoin(new Tuples[] {});
     assertTrue(joinedTuples.isUnconstrained());
+    assertEquals(0, ((UnboundJoin)joinedTuples).getNrGroups());
   }
 
   /**
@@ -183,12 +183,11 @@
 
     Tuples operand =
         TuplesFactory.newInstance().newTuples(new TestTuples(x, 1).and(y, 2)
-        .or(x, 3).and(y,
-        4));
+        .or(x, 3).and(y, 4));
     assert operand != null;
 
-    Tuples joinedTuples = new UnboundJoin(new Tuples[] {
-        operand});
+    Tuples joinedTuples = new UnboundJoin(new Tuples[] {operand});
+    assertEquals(1, ((UnboundJoin)joinedTuples).getNrGroups());
 
     joinedTuples.beforeFirst();
 
@@ -210,16 +209,14 @@
 
     Tuples lhs =
         TuplesFactory.newInstance().newTuples(new TestTuples(x, 1).and(y, 2)
-        .or(x, 3).and(y,
-        4));
+        .or(x, 3).and(y, 4));
 
     Tuples rhs =
         TuplesFactory.newInstance().newTuples(new TestTuples(z, 5).and(w, 6)
-        .or(z, 7).and(w,
-        8));
+        .or(z, 7).and(w, 8));
 
-    Tuples joined = new UnboundJoin(new Tuples[] {
-        lhs, rhs});
+    Tuples joined = new UnboundJoin(new Tuples[] {lhs, rhs});
+    assertEquals(2, ((UnboundJoin)joined).getNrGroups());
 
     Variable[] variables = joined.getVariables();
 
@@ -249,21 +246,18 @@
 
     Tuples lhs =
         TuplesFactory.newInstance().newTuples(new TestTuples(x, 1).and(y, 2)
-        .or(x, 3).and(y,
-        4));
+        .or(x, 3).and(y, 4));
 
     Tuples mhs =
         TuplesFactory.newInstance().newTuples(new TestTuples(z, 5).and(w, 6)
-        .or(z, 7).and(w,
-        8));
+        .or(z, 7).and(w, 8));
 
     Tuples rhs =
         TuplesFactory.newInstance().newTuples(new TestTuples(u, 9).and(v, 10)
-        .or(u, 11).and(v,
-        12));
+        .or(u, 11).and(v, 12));
 
-    Tuples joined = new UnboundJoin(new Tuples[] {
-        lhs, mhs, rhs});
+    Tuples joined = new UnboundJoin(new Tuples[] {lhs, mhs, rhs});
+    assertEquals(3, ((UnboundJoin)joined).getNrGroups());
 
     Variable[] variables = joined.getVariables();
     assertEquals(6, variables.length);
@@ -317,11 +311,11 @@
     rhs.appendTuple(new long[] {4, 6});
 
     TuplesFactory.newInstance().newTuples(new TestTuples(y, 2).and(z, 5)
-        .or(y, 4).and(z,
-        6));
+        .or(y, 4).and(z, 6));
 
-    Tuples actual = TuplesOperations.sort(new UnboundJoin(new Tuples[] {lhs,
-        rhs}));
+    UnboundJoin joined = new UnboundJoin(new Tuples[] {lhs, rhs});
+    assertEquals(1, joined.getNrGroups());
+    Tuples actual = TuplesOperations.sort(joined);
 
     // First, try a straightforward iteration through all rows
     actual.beforeFirst();
@@ -606,7 +600,9 @@
     LiteralTuples lhs = LiteralTuples.create(lvars, lhsValues);
     LiteralTuples rhs = LiteralTuples.create(rvars, rhsValues);
 
-    Tuples actual = TuplesOperations.sort(new UnboundJoin(new Tuples[] {lhs, rhs}));
+    UnboundJoin joined = new UnboundJoin(new Tuples[] {lhs, rhs});
+    assertEquals(1, joined.getNrGroups());
+    Tuples actual = TuplesOperations.sort(joined);
 
     logger.warn("testPartialMGR36 result = " + actual);
 

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnconstrainedTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnconstrainedTuples.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnconstrainedTuples.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -27,8 +27,8 @@
 
 package org.mulgara.store.tuples;
 
+import java.util.Collections;
 import java.util.List;
-import java.util.ArrayList;
 
 // Locally written packages
 import org.mulgara.query.TuplesException;
@@ -134,6 +134,10 @@
     return getRowCount();
   }
 
+  public long getRowExpectedCount() {
+    return getRowCount();
+  }
+
   /**
    * Unconstrained has no columns that can be unbound.
    *
@@ -157,8 +161,8 @@
     return true;
   }
 
-  public List getOperands() {
-    return new ArrayList(0);
+  public List<Tuples> getOperands() {
+    return Collections.emptyList();
   }
 
   /**

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedAppend.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedAppend.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedAppend.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -61,9 +61,8 @@
  */
 class UnorderedAppend extends AbstractTuples {
 
-  /**
-   * Logger.
-   */
+  /** Logger. */
+  @SuppressWarnings("unused")
   private final static Logger logger = Logger.getLogger(UnorderedAppend.class);
 
   /**
@@ -164,8 +163,7 @@
 
     for (int i = 0; i < operands.length; i++) {
       rowCount += operands[i].getRowCount();
-      if (rowCount < 0)
-        return Long.MAX_VALUE;
+      if (rowCount < 0) return Long.MAX_VALUE;
     }
 
     return rowCount;
@@ -176,15 +174,24 @@
 
     for (int i = 0; i < operands.length; i++) {
       bound += operands[i].getRowUpperBound();
-      if (bound < 0)
-        return Long.MAX_VALUE;
+      if (bound < 0) return Long.MAX_VALUE;
     }
 
     return bound;
   }
 
-  public boolean isColumnEverUnbound(int column) throws TuplesException
-  {
+  public long getRowExpectedCount() throws TuplesException {
+    long bound = 0;
+
+    for (int i = 0; i < operands.length; i++) {
+      bound += operands[i].getRowExpectedCount();
+      if (bound < 0) return Long.MAX_VALUE;
+    }
+
+    return bound;
+  }
+
+  public boolean isColumnEverUnbound(int column) throws TuplesException {
     assert(operand >= 0) || (operand < operands.length):"No column " +
         column;
 
@@ -219,7 +226,7 @@
   }
 
 
-  public List getOperands() {
+  public List<Tuples> getOperands() {
     return Arrays.asList(operands);
   }
 

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedProjection.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedProjection.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/UnorderedProjection.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -94,7 +94,7 @@
    * @throws IllegalArgumentException if <var>operand</var> is <code>null</code>
    * @throws TuplesException EXCEPTION TO DO
    */
-  UnorderedProjection(Tuples operand, List columnList) throws TuplesException {
+  UnorderedProjection(Tuples operand, List<Variable> columnList) throws TuplesException {
 
     // Validate "operand" parameter
     if (operand == null) {
@@ -111,7 +111,7 @@
     }
 
     // Initialize fields
-    this.operand = (Tuples) operand.clone();
+    this.operand = (Tuples)operand.clone();
     setVariables(columnList);
 
     // Create column mapping
@@ -181,10 +181,9 @@
    * Gets the RowCount attribute of the UnorderedProjection object
    *
    * @return The RowCount value
-   * @throws TuplesException EXCEPTION TO DO
+   * @throws TuplesException Error accessing underlying data
    */
   public long getRowCount() throws TuplesException {
-
     return operand.getRowCount();
   }
 
@@ -192,6 +191,10 @@
     return operand.getRowUpperBound();
   }
 
+  public long getRowExpectedCount() throws TuplesException {
+    return operand.getRowExpectedCount();
+  }
+
   /**
    * A column is unbound if it is unbound in the source of the projection.
    */
@@ -233,7 +236,7 @@
         (getVariables().length > 0 && operand.isUnconstrained()));
   }
 
-  public List getOperands() {
+  public List<Tuples> getOperands() {
     return Collections.singletonList(operand);
   }
 

Modified: trunk/src/jar/tuples/java/org/mulgara/store/tuples/WrappedTuples.java
===================================================================
--- trunk/src/jar/tuples/java/org/mulgara/store/tuples/WrappedTuples.java	2009-07-06 22:40:35 UTC (rev 1753)
+++ trunk/src/jar/tuples/java/org/mulgara/store/tuples/WrappedTuples.java	2009-07-10 16:25:25 UTC (rev 1754)
@@ -163,13 +163,15 @@
     return tuples.getRowCount();
   }
 
-  public long getRowUpperBound() throws TuplesException
-  {
+  public long getRowUpperBound() throws TuplesException {
     return tuples.getRowUpperBound();
   }
 
-  public int getRowCardinality() throws TuplesException
-  {
+  public long getRowExpectedCount() throws TuplesException {
+    return tuples.getRowExpectedCount();
+  }
+
+  public int getRowCardinality() throws TuplesException {
     return tuples.getRowCardinality();
   }
 
@@ -180,7 +182,6 @@
    * @return The Variables value
    */
   public Variable[] getVariables() {
-
     return tuples.getVariables();
   }
 
@@ -190,7 +191,6 @@
    * @return The NumberOfVariables value
    */
   public int getNumberOfVariables() {
-
     return tuples.getNumberOfVariables();
   }
 
@@ -198,8 +198,7 @@
    * Delegates to the {@link Tuples#isColumnEverUnbound} method of the wrapped
    * instance.
    */
-  public boolean isColumnEverUnbound(int column) throws TuplesException
-  {
+  public boolean isColumnEverUnbound(int column) throws TuplesException {
     return tuples.isColumnEverUnbound(column);
   }
 




More information about the Mulgara-svn mailing list