Content-Length: 754974 | pFad | http://github.com/apache/iceberg/pull/4947/files/c1f29d2f03c903884737cb4254d2bd28bc72fc78

83 API: Add expression equivalence testing by rdblue · Pull Request #4947 · apache/iceberg · GitHub
Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: Add expression equivalence testing #4947

Merged
merged 6 commits into from
Jun 3, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions api/src/main/java/org/apache/iceberg/expressions/And.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ public Operation op() {
return Expression.Operation.AND;
}

@Override
public boolean isEquivalentTo(Expression expr) {
if (expr.op() == Operation.AND) {
And other = (And) expr;
return (left.isEquivalentTo(other.left()) && right.isEquivalentTo(other.right())) ||
(left.isEquivalentTo(other.right()) && right.isEquivalentTo(other.left()));
}

return false;
}

@Override
public Expression negate() {
// not(and(a, b)) => or(not(a), not(b))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,19 @@
package org.apache.iceberg.expressions;

import java.util.Comparator;
import java.util.Set;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Type;

public class BoundLiteralPredicate<T> extends BoundPredicate<T> {
private static final Set<Type.TypeID> INTEGRAL_TYPES = Sets.newHashSet(
Type.TypeID.INTEGER, Type.TypeID.LONG, Type.TypeID.DATE, Type.TypeID.TIME, Type.TypeID.TIMESTAMP);

private static long toLong(Literal<?> lit) {
return ((Number) lit.value()).longValue();
}

private final Literal<T> literal;

BoundLiteralPredicate(Operation op, BoundTerm<T> term, Literal<T> lit) {
Expand Down Expand Up @@ -76,6 +86,53 @@ public boolean test(T value) {
}
}

@Override
@SuppressWarnings("unchecked")
public boolean isEquivalentTo(Expression expr) {
if (op() == expr.op()) {
BoundLiteralPredicate<?> other = (BoundLiteralPredicate<?>) expr;
if (term().isEquivalentTo(other.term())) {
// because the term is equivalent, the literal must have the same type, T
Literal<T> otherLiteral = (Literal<T>) other.literal();
Comparator<T> cmp = literal.comparator();
return cmp.compare(literal.value(), otherLiteral.value()) == 0;
}

} else if (expr instanceof BoundLiteralPredicate) {
BoundLiteralPredicate<?> other = (BoundLiteralPredicate<?>) expr;
if (INTEGRAL_TYPES.contains(term().type().typeId()) && term().isEquivalentTo(other.term())) {
switch (op()) {
case LT:
if (other.op() == Operation.LT_EQ) {
// < 6 is equivalent to <= 5
return toLong(literal()) == toLong(other.literal()) + 1L;
}
break;
case LT_EQ:
if (other.op() == Operation.LT) {
// <= 5 is equivalent to < 6
return toLong(literal()) == toLong(other.literal()) - 1L;
}
break;
case GT:
if (other.op() == Operation.GT_EQ) {
// > 5 is equivalent to >= 6
return toLong(literal()) == toLong(other.literal()) - 1L;
}
break;
case GT_EQ:
if (other.op() == Operation.GT) {
// >= 5 is equivalent to > 4
return toLong(literal()) == toLong(other.literal()) + 1L;
}
break;
}
}
}

return false;
}

@Override
public String toString() {
switch (op()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,19 @@ public Type type() {
return field.type();
}

@Override
public boolean isEquivalentTo(BoundTerm<?> other) {
if (other instanceof BoundReference) {
Types.NestedField otherField = ((BoundReference<?>) other).field();
// equivalence only depends on the field ID, type, and optional. name and accessor are ignored
return field.fieldId() == otherField.fieldId() &&
field.type().equals(otherField.type()) &&
field.isOptional() == otherField.isOptional();
}

return other.isEquivalentTo(this);
}

public int fieldId() {
return field.fieldId();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,17 @@ public boolean test(T value) {
}
}

@Override
public boolean isEquivalentTo(Expression other) {
// only check bound set predicate; binding will convert sets of a single item to a literal predicate
if (op() == other.op()) {
BoundSetPredicate<?> pred = (BoundSetPredicate<?>) other;
return literalSet().equals(pred.literalSet());
}

return false;
}

@Override
public String toString() {
switch (op()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,11 @@ public interface BoundTerm<T> extends Bound<T>, Term {
default Comparator<T> comparator() {
return Comparators.forType(type().asPrimitiveType());
}

/**
* Returns whether this term is equivalent to another.
* @param other a term
* @return true if this term returns the same values as the other, false otherwise
*/
boolean isEquivalentTo(BoundTerm<?> other);
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ public Type type() {
return transform.getResultType(ref.type());
}

@Override
public boolean isEquivalentTo(BoundTerm<?> other) {
if (other instanceof BoundTransform) {
BoundTransform<?, ?> bound = (BoundTransform<?, ?>) other;
return ref.isEquivalentTo(bound.ref()) && transform.equals(bound.transform());
} else if (transform.isIdentity() && other instanceof BoundReference) {
return ref.isEquivalentTo(other);
}

return false;
}

@Override
public String toString() {
return transform + "(" + ref + ")";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@ public boolean test(T value) {
}
}

@Override
public boolean isEquivalentTo(Expression other) {
if (op() == other.op()) {
return term().isEquivalentTo(((BoundUnaryPredicate<?>) other).term());
}

return false;
}

@Override
public String toString() {
switch (op()) {
Expand Down
17 changes: 17 additions & 0 deletions api/src/main/java/org/apache/iceberg/expressions/Expression.java
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,21 @@ public Operation flipLR() {
default Expression negate() {
throw new UnsupportedOperationException(String.format("%s cannot be negated", this));
}

/**
* Returns whether this expression will accept the same values as another.
* <p>
* If this returns true, the expressions are guaranteed to return the same evaluation for the same input. However, if
* this returns false the expressions may return the same evaluation for the same input. That is, expressions may
* be equivalent even if this returns false.
* <p>
* For best results, rewrite not and bind expressions before calling this method.
*
* @param other another expression
* @return true if the expressions are equivalent
*/
default boolean isEquivalentTo(Expression other) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In which cases we use is prefix for boolean methods and in which no?
We are not very consistent in the code right now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I typically try to make the method name form a natural sentence in English, like if (someFilter.isEquivalentTo(other)) reads like "if some filter is equivalent to other". In some cases it makes more sense to use other words, like has.

I wasn't sure what to do with ExpressionUtil.equivalent since it compares two things, but areEquivalent(f1, f2, schema) doesn't feel right. I just left that as ExpressionUtil.equivalent. Suggestions are welcome!

rdblue marked this conversation as resolved.
Show resolved Hide resolved
// only bound predicates can be equivalent
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.transforms.Transform;
import org.apache.iceberg.transforms.Transforms;
import org.apache.iceberg.types.Types;
Expand Down Expand Up @@ -66,6 +67,40 @@ public static String toSanitizedString(Expression expr) {
return ExpressionVisitors.visit(expr, StringSanitizer.INSTANCE);
}

/**
* Returns whether two unbound expressions will accept the same inputs.
* <p>
* If this returns true, the expressions are guaranteed to return the same evaluation for the same input. However, if
* this returns false the expressions may return the same evaluation for the same input. That is, expressions may
* be equivalent even if this returns false.
*
* @param left an unbound expression
* @param right an unbound expression
* @param struct a struct type for binding
* @return true if the expressions are equivalent
*/
public static boolean equivalent(Expression left, Expression right, Types.StructType struct) {
rdblue marked this conversation as resolved.
Show resolved Hide resolved
return Binder.bind(struct, Expressions.rewriteNot(left))
.isEquivalentTo(Binder.bind(struct, Expressions.rewriteNot(right)));
}

/**
* Returns whether an expression selects whole partitions for a partition spec.
* <p>
* For example, ts &lt; '2021-03-09T10:00:00.000' selects whole partitions in an hourly spec, [hours(ts)], but does
* not select whole partitions in a daily spec, [days(ts)].
*
* @param expr an unbound expression
* @param spec a partition spec
* @return true if the expression will select whole partitions in the given spec
*/
public static boolean selectsPartitions(Expression expr, PartitionSpec spec) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I correct this should allow us to optimize metadata-only deletes by handling obvious cases without the need to plan files with potential matches and invoking evaluators?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool, I'll follow up with that once this is merged.

return equivalent(
Projections.inclusive(spec).project(expr),
rdblue marked this conversation as resolved.
Show resolved Hide resolved
Projections.strict(spec).project(expr),
spec.partitionType());
}

private static class ExpressionSanitizer extends ExpressionVisitors.ExpressionVisitor<Expression> {
private static final ExpressionSanitizer INSTANCE = new ExpressionSanitizer();

Expand Down
5 changes: 5 additions & 0 deletions api/src/main/java/org/apache/iceberg/expressions/False.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ public Expression negate() {
return True.INSTANCE;
}

@Override
public boolean isEquivalentTo(Expression other) {
return other.op() == Operation.FALSE;
}

@Override
public String toString() {
return "false";
Expand Down
11 changes: 11 additions & 0 deletions api/src/main/java/org/apache/iceberg/expressions/Or.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ public Operation op() {
return Expression.Operation.OR;
}

@Override
public boolean isEquivalentTo(Expression expr) {
if (expr.op() == Operation.OR) {
Or other = (Or) expr;
return (left.isEquivalentTo(other.left()) && right.isEquivalentTo(other.right())) ||
(left.isEquivalentTo(other.right()) && right.isEquivalentTo(other.left()));
}

return false;
}

@Override
public Expression negate() {
// not(or(a, b)) => and(not(a), not(b))
Expand Down
5 changes: 5 additions & 0 deletions api/src/main/java/org/apache/iceberg/expressions/True.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ public Expression negate() {
return False.INSTANCE;
}

@Override
public boolean isEquivalentTo(Expression other) {
return other.op() == Operation.TRUE;
}

@Override
public String toString() {
return "true";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ public String toString() {
@SuppressWarnings("unchecked")
static <T> Set<T> setOf(Iterable<Literal<T>> literals) {
Literal<T> lit = Iterables.get(literals, 0);
if (lit instanceof Literals.StringLiteral && lit.value() instanceof CharSequence) {
if (lit instanceof Literals.StringLiteral) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is always true because StringLiteral wraps a CharSequence.

Iterable<T> values = Iterables.transform(literals, Literal::value);
Iterable<CharSequence> charSeqs = Iterables.transform(values, val -> (CharSequence) val);
return (Set<T>) CharSequenceSet.of(charSeqs);
Expand Down
Loading








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/apache/iceberg/pull/4947/files/c1f29d2f03c903884737cb4254d2bd28bc72fc78

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy