Content-Length: 644460 | pFad | http://github.com/namrathamyske/iceberg/commit/0955a6d085fb02743ff937615dcfa2c7a6f3d4df

B1 API: Add expression equivalence testing (#4947) · namrathamyske/iceberg@0955a6d · GitHub
Skip to content

Commit

Permalink
API: Add expression equivalence testing (apache#4947)
Browse files Browse the repository at this point in the history
  • Loading branch information
rdblue authored and nkeshavaprakash committed Jul 10, 2022
1 parent 32865ec commit 0955a6d
Show file tree
Hide file tree
Showing 14 changed files with 385 additions and 1 deletion.
11 changes: 11 additions & 0 deletions api/src/main/java/org/apache/iceberg/expressions/And.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ public Operation op() {
return Expression.Operation.AND;
}

@Override
public boolean isEquivalentTo(Expression expr) {
if (expr.op() == Operation.AND) {
And other = (And) expr;
return (left.isEquivalentTo(other.left()) && right.isEquivalentTo(other.right())) ||
(left.isEquivalentTo(other.right()) && right.isEquivalentTo(other.left()));
}

return false;
}

@Override
public Expression negate() {
// not(and(a, b)) => or(not(a), not(b))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,19 @@
package org.apache.iceberg.expressions;

import java.util.Comparator;
import java.util.Set;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Type;

public class BoundLiteralPredicate<T> extends BoundPredicate<T> {
private static final Set<Type.TypeID> INTEGRAL_TYPES = Sets.newHashSet(
Type.TypeID.INTEGER, Type.TypeID.LONG, Type.TypeID.DATE, Type.TypeID.TIME, Type.TypeID.TIMESTAMP);

private static long toLong(Literal<?> lit) {
return ((Number) lit.value()).longValue();
}

private final Literal<T> literal;

BoundLiteralPredicate(Operation op, BoundTerm<T> term, Literal<T> lit) {
Expand Down Expand Up @@ -76,6 +86,53 @@ public boolean test(T value) {
}
}

@Override
@SuppressWarnings("unchecked")
public boolean isEquivalentTo(Expression expr) {
if (op() == expr.op()) {
BoundLiteralPredicate<?> other = (BoundLiteralPredicate<?>) expr;
if (term().isEquivalentTo(other.term())) {
// because the term is equivalent, the literal must have the same type, T
Literal<T> otherLiteral = (Literal<T>) other.literal();
Comparator<T> cmp = literal.comparator();
return cmp.compare(literal.value(), otherLiteral.value()) == 0;
}

} else if (expr instanceof BoundLiteralPredicate) {
BoundLiteralPredicate<?> other = (BoundLiteralPredicate<?>) expr;
if (INTEGRAL_TYPES.contains(term().type().typeId()) && term().isEquivalentTo(other.term())) {
switch (op()) {
case LT:
if (other.op() == Operation.LT_EQ) {
// < 6 is equivalent to <= 5
return toLong(literal()) == toLong(other.literal()) + 1L;
}
break;
case LT_EQ:
if (other.op() == Operation.LT) {
// <= 5 is equivalent to < 6
return toLong(literal()) == toLong(other.literal()) - 1L;
}
break;
case GT:
if (other.op() == Operation.GT_EQ) {
// > 5 is equivalent to >= 6
return toLong(literal()) == toLong(other.literal()) - 1L;
}
break;
case GT_EQ:
if (other.op() == Operation.GT) {
// >= 5 is equivalent to > 4
return toLong(literal()) == toLong(other.literal()) + 1L;
}
break;
}
}
}

return false;
}

@Override
public String toString() {
switch (op()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,19 @@ public Type type() {
return field.type();
}

@Override
public boolean isEquivalentTo(BoundTerm<?> other) {
if (other instanceof BoundReference) {
Types.NestedField otherField = ((BoundReference<?>) other).field();
// equivalence only depends on the field ID, type, and optional. name and accessor are ignored
return field.fieldId() == otherField.fieldId() &&
field.type().equals(otherField.type()) &&
field.isOptional() == otherField.isOptional();
}

return other.isEquivalentTo(this);
}

public int fieldId() {
return field.fieldId();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,17 @@ public boolean test(T value) {
}
}

@Override
public boolean isEquivalentTo(Expression other) {
// only check bound set predicate; binding will convert sets of a single item to a literal predicate
if (op() == other.op()) {
BoundSetPredicate<?> pred = (BoundSetPredicate<?>) other;
return literalSet().equals(pred.literalSet());
}

return false;
}

@Override
public String toString() {
switch (op()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,11 @@ public interface BoundTerm<T> extends Bound<T>, Term {
default Comparator<T> comparator() {
return Comparators.forType(type().asPrimitiveType());
}

/**
* Returns whether this term is equivalent to another.
* @param other a term
* @return true if this term returns the same values as the other, false otherwise
*/
boolean isEquivalentTo(BoundTerm<?> other);
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ public Type type() {
return transform.getResultType(ref.type());
}

@Override
public boolean isEquivalentTo(BoundTerm<?> other) {
if (other instanceof BoundTransform) {
BoundTransform<?, ?> bound = (BoundTransform<?, ?>) other;
return ref.isEquivalentTo(bound.ref()) && transform.equals(bound.transform());
} else if (transform.isIdentity() && other instanceof BoundReference) {
return ref.isEquivalentTo(other);
}

return false;
}

@Override
public String toString() {
return transform + "(" + ref + ")";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@ public boolean test(T value) {
}
}

@Override
public boolean isEquivalentTo(Expression other) {
if (op() == other.op()) {
return term().isEquivalentTo(((BoundUnaryPredicate<?>) other).term());
}

return false;
}

@Override
public String toString() {
switch (op()) {
Expand Down
17 changes: 17 additions & 0 deletions api/src/main/java/org/apache/iceberg/expressions/Expression.java
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,21 @@ public Operation flipLR() {
default Expression negate() {
throw new UnsupportedOperationException(String.format("%s cannot be negated", this));
}

/**
* Returns whether this expression will accept the same values as another.
* <p>
* If this returns true, the expressions are guaranteed to return the same evaluation for the same input. However, if
* this returns false the expressions may return the same evaluation for the same input. That is, expressions may
* be equivalent even if this returns false.
* <p>
* For best results, rewrite not and bind expressions before calling this method.
*
* @param other another expression
* @return true if the expressions are equivalent
*/
default boolean isEquivalentTo(Expression other) {
// only bound predicates can be equivalent
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.transforms.Transform;
import org.apache.iceberg.transforms.Transforms;
import org.apache.iceberg.types.Types;
Expand Down Expand Up @@ -66,6 +67,41 @@ public static String toSanitizedString(Expression expr) {
return ExpressionVisitors.visit(expr, StringSanitizer.INSTANCE);
}

/**
* Returns whether two unbound expressions will accept the same inputs.
* <p>
* If this returns true, the expressions are guaranteed to return the same evaluation for the same input. However, if
* this returns false the expressions may return the same evaluation for the same input. That is, expressions may
* be equivalent even if this returns false.
*
* @param left an unbound expression
* @param right an unbound expression
* @param struct a struct type for binding
* @param caseSensitive whether to bind expressions using case-sensitive matching
* @return true if the expressions are equivalent
*/
public static boolean equivalent(Expression left, Expression right, Types.StructType struct, boolean caseSensitive) {
return Binder.bind(struct, Expressions.rewriteNot(left), caseSensitive)
.isEquivalentTo(Binder.bind(struct, Expressions.rewriteNot(right), caseSensitive));
}

/**
* Returns whether an expression selects whole partitions for a partition spec.
* <p>
* For example, ts &lt; '2021-03-09T10:00:00.000' selects whole partitions in an hourly spec, [hours(ts)], but does
* not select whole partitions in a daily spec, [days(ts)].
*
* @param expr an unbound expression
* @param spec a partition spec
* @return true if the expression will select whole partitions in the given spec
*/
public static boolean selectsPartitions(Expression expr, PartitionSpec spec, boolean caseSensitive) {
return equivalent(
Projections.inclusive(spec, caseSensitive).project(expr),
Projections.strict(spec, caseSensitive).project(expr),
spec.partitionType(), caseSensitive);
}

private static class ExpressionSanitizer extends ExpressionVisitors.ExpressionVisitor<Expression> {
private static final ExpressionSanitizer INSTANCE = new ExpressionSanitizer();

Expand Down
5 changes: 5 additions & 0 deletions api/src/main/java/org/apache/iceberg/expressions/False.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ public Expression negate() {
return True.INSTANCE;
}

@Override
public boolean isEquivalentTo(Expression other) {
return other.op() == Operation.FALSE;
}

@Override
public String toString() {
return "false";
Expand Down
11 changes: 11 additions & 0 deletions api/src/main/java/org/apache/iceberg/expressions/Or.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ public Operation op() {
return Expression.Operation.OR;
}

@Override
public boolean isEquivalentTo(Expression expr) {
if (expr.op() == Operation.OR) {
Or other = (Or) expr;
return (left.isEquivalentTo(other.left()) && right.isEquivalentTo(other.right())) ||
(left.isEquivalentTo(other.right()) && right.isEquivalentTo(other.left()));
}

return false;
}

@Override
public Expression negate() {
// not(or(a, b)) => and(not(a), not(b))
Expand Down
5 changes: 5 additions & 0 deletions api/src/main/java/org/apache/iceberg/expressions/True.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ public Expression negate() {
return False.INSTANCE;
}

@Override
public boolean isEquivalentTo(Expression other) {
return other.op() == Operation.TRUE;
}

@Override
public String toString() {
return "true";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ public String toString() {
@SuppressWarnings("unchecked")
static <T> Set<T> setOf(Iterable<Literal<T>> literals) {
Literal<T> lit = Iterables.get(literals, 0);
if (lit instanceof Literals.StringLiteral && lit.value() instanceof CharSequence) {
if (lit instanceof Literals.StringLiteral) {
Iterable<T> values = Iterables.transform(literals, Literal::value);
Iterable<CharSequence> charSeqs = Iterables.transform(values, val -> (CharSequence) val);
return (Set<T>) CharSequenceSet.of(charSeqs);
Expand Down
Loading

0 comments on commit 0955a6d

Please sign in to comment.








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/namrathamyske/iceberg/commit/0955a6d085fb02743ff937615dcfa2c7a6f3d4df

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy