Staging
v0.5.0
https://repo1.maven.org/maven2/org/prefuse/prefuse
Raw File
FilterIteratorFactory.java
package prefuse.data.util;

import java.util.Comparator;
import java.util.Iterator;

import prefuse.data.Table;
import prefuse.data.expression.AndPredicate;
import prefuse.data.expression.ColumnExpression;
import prefuse.data.expression.ComparisonPredicate;
import prefuse.data.expression.Expression;
import prefuse.data.expression.ExpressionAnalyzer;
import prefuse.data.expression.NotPredicate;
import prefuse.data.expression.OrPredicate;
import prefuse.data.expression.Predicate;
import prefuse.data.expression.RangePredicate;
import prefuse.data.tuple.TupleSet;
import prefuse.util.PrefuseConfig;
import prefuse.util.collections.CompositeIntIterator;
import prefuse.util.collections.IntIterator;

/**
 * Factory class that creates optimized filter iterators. When possible,
 * this factory will attempt to create an optimized query plan by using
 * available indexes, in many incrasing performance by only visiting
 * the tuples which will pass the filter condition.
 * 
 * @author <a href="http://jheer.org">jeffrey heer</a>
 */
public class FilterIteratorFactory {

    private static final int OPTIMIZATION_THRESHOLD
        = PrefuseConfig.getInt("data.filter.optimizeThreshold");
    
    // we can stash our query plan generation and optimization here to deal 
    // with it all in one spot, and keep the rest of the classes clean
    
    /**
     * Get a filtered iterator over the tuples in the given set,
     * filtered by the given predicate.
     * @param ts the TupleSet to iterate over
     * @param p the filter predicate
     * @return a filtered iterator over the tuples
     */
    public static Iterator tuples(TupleSet ts, Predicate p) {
        // no predicate means no filtering
        if ( p == null )
            return ts.tuples();
        
        // attempt to generate an optimized query plan
        Iterator iter = null;
        if ( ts instanceof Table ) {
            Table t = (Table)ts;
            IntIterator ii = getOptimizedIterator(t,p);
            if ( ii != null )
                iter = t.tuples(ii);
        }
        
        // optimization fails, scan the entire table
        if ( iter == null ) {
            iter = new FilterIterator(ts.tuples(), p);
        }
        
        return iter;
    }
    
    /**
     * Get a filtered iterator over the rows in the given table,
     * filtered by the given predicate.
     * @param t the Table to iterate over
     * @param p the filter predicate
     * @return a filtered iterator over the table rows
     */
    public static IntIterator rows(Table t, Predicate p) {
        // attempt to generate an optimized query plan
        IntIterator iter = null;
        iter = getOptimizedIterator(t, p);
        
        // optimization fails, scan the entire table
        if ( iter == null ) {
            iter = new FilterRowIterator(t.rows(), t, p);
        }
        return iter;
    }
    
    /**
     * Get an optimized iterator over the rows of a table, if possible.
     * @param t the Table to iterator over
     * @param p the filter predicate
     * @return an optimized iterator, or null if no optimization was found
     */
    protected static IntIterator getOptimizedIterator(Table t, Predicate p) {
        if ( t.getRowCount() < OPTIMIZATION_THRESHOLD ) 
            return null; // avoid overhead for small tables
        
        if ( p instanceof ColumnExpression ) {
            // try to optimize a boolean column
            return getColumnIterator(t,
                    ((ColumnExpression)p).getColumnName(), true);
        }
        else if ( p instanceof NotPredicate )
        {
            // try to optimize the negation a boolean column
            Predicate pp = ((NotPredicate)p).getPredicate();
            if ( pp instanceof ColumnExpression ) {
                return getColumnIterator(t,
                        ((ColumnExpression)pp).getColumnName(), false);
            }
        }
        else if ( p instanceof AndPredicate )
        {
            // try to optimize an and clause
            return getAndIterator(t, (AndPredicate)p);
        }
        else if ( p instanceof OrPredicate )
        {
            // try to optimize an or clause
            return getOrIterator(t, (OrPredicate)p);
        }
        else if ( p instanceof ComparisonPredicate )
        {
            // try to optimize a comparison (=, !=, <, > ,etc)
            return getComparisonIterator(t,(ComparisonPredicate)p);
        }
        else if ( p instanceof RangePredicate )
        {
            // try to optimize a bounded range of values
            return getRangeIterator(t, (RangePredicate)p); 
        }
        
        return null;
    }
    
    protected static IntIterator getColumnIterator(
            Table t, String field, boolean val)
    {
        if ( t.getColumnType(field) != boolean.class )
            return null; // only works for boolean-valued columns
        
        Index index = t.getIndex(field);
        if ( index == null ) {
            return null;
        } else {
            return index.rows(val);
        }
    }
    
    protected static IntIterator getOrIterator(Table t, OrPredicate op) {
        int size = op.size();
        if ( size > 1 ) {
            // if all subclauses can be optimized, we can optimize the query
            IntIterator[] rows = new IntIterator[size];
            for ( int i=0; i<rows.length; ++i ) {
                rows[i] = getOptimizedIterator(t, op.get(i));
                
                // all clauses must be optimized to avoid linear scan
                if ( rows[i] == null ) return null;
            }
            // group iterators, and filter for uniqueness
            return new UniqueRowIterator(new CompositeIntIterator(rows));
        } else if ( size == 1 ) {
            // only one clause, optimize for that
            return getOptimizedIterator(t, op.get(0));
        } else {
            // no woman, no cry
            return null;
        }
    }
    
    protected static IntIterator getAndIterator(Table t, AndPredicate ap) {
        // possible TODO: add scoring to select best optimized iterator
        // for now just work from the end backwards and take the first
        // optimized iterator we find
        IntIterator rows = null;
        Predicate clause = null;
        for ( int i=ap.size(); --i >= 0; ) {
            clause = ap.get(i);
            if ( (rows=getOptimizedIterator(t,clause)) != null )
                break;
        }
        
        // exit if we didn't optimize
        if ( rows == null ) return null;
        
        // if only one clause, no extras needed
        if ( ap.size() == 1 ) return rows;
        
        // otherwise get optimized source, run through other clauses
        return new FilterRowIterator(rows, t, ap.getSubPredicate(clause));
    }
    
    protected static IntIterator getComparisonIterator(Table t, 
                                           ComparisonPredicate cp)
    {
        Expression l = cp.getLeftExpression();
        Expression r = cp.getRightExpression();
        int operation = cp.getOperation();
        
        // not equals operations aren't handled by the index
        if ( operation == ComparisonPredicate.NEQ )
            return null;
        
        ColumnExpression col;
        Expression lit;
        
        // make sure columns are of the right type
        if (l instanceof ColumnExpression && 
                !ExpressionAnalyzer.hasDependency(r))
        {
            col = (ColumnExpression)l;
            lit = r;
        } else if (r instanceof ColumnExpression &&
                !ExpressionAnalyzer.hasDependency(l))
        {
            col = (ColumnExpression)r;
            lit = l;
        } else {
            return null;
        }
        
        // if table has index of the right type, use it
        Comparator cmp = cp.getComparator();
        Index index = t.getIndex(col.getColumnName());
        
        if ( index == null || !cmp.equals(index.getComparator()) )
            return null;
        
        Class ltype = lit.getClass();
        if ( ltype == int.class ) {
            int val = lit.getInt(null); // literal value, so null is safe
            switch ( operation ) {
            case ComparisonPredicate.LT:
                return index.rows(Integer.MIN_VALUE, val, Index.TYPE_AIE);
            case ComparisonPredicate.GT:
                return index.rows(val, Integer.MAX_VALUE, Index.TYPE_AEI);
            case ComparisonPredicate.EQ:
                return index.rows(val, val, Index.TYPE_AII);
            case ComparisonPredicate.LTEQ:
                return index.rows(Integer.MIN_VALUE, val, Index.TYPE_AII);
            case ComparisonPredicate.GTEQ:
                return index.rows(val, Integer.MAX_VALUE, Index.TYPE_AII);
            default:
                throw new IllegalStateException(); // should never occur
            }
        } else if ( ltype == long.class ) {
            long val = lit.getLong(null); // literal value, so null is safe
            switch ( operation ) {
            case ComparisonPredicate.LT:
                return index.rows(Long.MIN_VALUE, val, Index.TYPE_AIE);
            case ComparisonPredicate.GT:
                return index.rows(val, Long.MAX_VALUE, Index.TYPE_AEI);
            case ComparisonPredicate.EQ:
                return index.rows(val, val, Index.TYPE_AII);
            case ComparisonPredicate.LTEQ:
                return index.rows(Long.MIN_VALUE, val, Index.TYPE_AII);
            case ComparisonPredicate.GTEQ:
                return index.rows(val, Long.MAX_VALUE, Index.TYPE_AII);
            default:
                throw new IllegalStateException(); // should never occur
            }
        } else if ( ltype == float.class ) {
            float val = lit.getFloat(null); // literal value, so null is safe
            switch ( operation ) {
            case ComparisonPredicate.LT:
                return index.rows(Float.MIN_VALUE, val, Index.TYPE_AIE);
            case ComparisonPredicate.GT:
                return index.rows(val, Float.MAX_VALUE, Index.TYPE_AEI);
            case ComparisonPredicate.EQ:
                return index.rows(val, val, Index.TYPE_AII);
            case ComparisonPredicate.LTEQ:
                return index.rows(Float.MIN_VALUE, val, Index.TYPE_AII);
            case ComparisonPredicate.GTEQ:
                return index.rows(val, Float.MAX_VALUE, Index.TYPE_AII);
            default:
                throw new IllegalStateException(); // should never occur
            }
        } else if ( ltype == double.class ) {
            double val = lit.getDouble(null); // literal value, so null is safe
            switch ( operation ) {
            case ComparisonPredicate.LT:
                return index.rows(Double.MIN_VALUE, val, Index.TYPE_AIE);
            case ComparisonPredicate.GT:
                return index.rows(val, Double.MAX_VALUE, Index.TYPE_AEI);
            case ComparisonPredicate.EQ:
                return index.rows(val, val, Index.TYPE_AII);
            case ComparisonPredicate.LTEQ:
                return index.rows(Double.MIN_VALUE, val, Index.TYPE_AII);
            case ComparisonPredicate.GTEQ:
                return index.rows(val, Double.MAX_VALUE, Index.TYPE_AII);
            default:
                throw new IllegalStateException(); // should never occur
            }
        } else {
            Object val = lit.get(null); // literal value, so null is safe
            switch ( operation ) {
            case ComparisonPredicate.LT:
                return index.rows(null, val, Index.TYPE_AIE);
            case ComparisonPredicate.GT:
                return index.rows(val, null, Index.TYPE_AEI);
            case ComparisonPredicate.EQ:
                return index.rows(val, val, Index.TYPE_AII);
            case ComparisonPredicate.LTEQ:
                return index.rows(null, val, Index.TYPE_AII);
            case ComparisonPredicate.GTEQ:
                return index.rows(val, null, Index.TYPE_AII);
            default:
                throw new IllegalStateException(); // should never occur
            }
        }        
    }
    
    protected static IntIterator getRangeIterator(Table t, RangePredicate rp) {
        ColumnExpression col;
        Expression l, r;
        
        // make sure columns are of the right type
        if ( !(rp.getMiddleExpression() instanceof ColumnExpression) ||
                ExpressionAnalyzer.hasDependency(rp.getLeftExpression()) ||
                ExpressionAnalyzer.hasDependency(rp.getRightExpression()) )
        {
            return null;
        }
        
        // assign variables
        col = (ColumnExpression)rp.getMiddleExpression();
        l = rp.getLeftExpression();
        r = rp.getRightExpression();
        
        // if table has index of the right type, use it
        Comparator cmp = rp.getComparator();
        Index index = t.getIndex(col.getColumnName());
        
        if ( index == null || !cmp.equals(index.getComparator()) )
            return null;
        
        int operation = rp.getOperation();
        Class ltype = t.getColumnType(col.getColumnName());
        
        // TODO safety check literal types
        
        // get the index type
        int indexType;
        switch ( operation ) {
        case RangePredicate.IN_IN:
            indexType = Index.TYPE_AII;
            break;
        case RangePredicate.IN_EX:
            indexType = Index.TYPE_AIE;
            break;
        case RangePredicate.EX_IN:
            indexType = Index.TYPE_AEI;
            break;
        case RangePredicate.EX_EX:
            indexType = Index.TYPE_AEE;
            break;
        default:
            throw new IllegalStateException(); // should never occur
        }
        
        // get the indexed rows
        if ( ltype == int.class ) {
            return index.rows(l.getInt(null), r.getInt(null), indexType);
        } else if ( ltype == long.class ) {
            return index.rows(l.getLong(null), r.getLong(null), indexType);
        } else if ( ltype == float.class ) {
            return index.rows(l.getFloat(null), r.getFloat(null), indexType);
        } else if ( ltype == double.class ) {
            return index.rows(l.getDouble(null), r.getDouble(null), indexType);
        } else {
            return index.rows(l.get(null), r.get(null), indexType);
        }
    }
    
} // end of class FilterIteratorFactory
back to top