Content-Length: 421969 | pFad | http://github.com/apache/iceberg/pull/4947/commits/0e93e179336e8f55793566281a182d3e68a6c1f4

45 API: Add expression equivalence testing by rdblue · Pull Request #4947 · apache/iceberg · GitHub
Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: Add expression equivalence testing #4947

Merged
merged 6 commits into from
Jun 3, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add case sensitive arguments.
  • Loading branch information
rdblue committed Jun 3, 2022
commit 0e93e179336e8f55793566281a182d3e68a6c1f4
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,12 @@ public static String toSanitizedString(Expression expr) {
* @param left an unbound expression
* @param right an unbound expression
* @param struct a struct type for binding
* @param caseSensitive whether to bind expressions using case-sensitive matching
* @return true if the expressions are equivalent
*/
public static boolean equivalent(Expression left, Expression right, Types.StructType struct) {
return Binder.bind(struct, Expressions.rewriteNot(left))
.isEquivalentTo(Binder.bind(struct, Expressions.rewriteNot(right)));
public static boolean equivalent(Expression left, Expression right, Types.StructType struct, boolean caseSensitive) {
return Binder.bind(struct, Expressions.rewriteNot(left), caseSensitive)
.isEquivalentTo(Binder.bind(struct, Expressions.rewriteNot(right), caseSensitive));
}

/**
Expand All @@ -94,11 +95,11 @@ public static boolean equivalent(Expression left, Expression right, Types.Struct
* @param spec a partition spec
* @return true if the expression will select whole partitions in the given spec
*/
public static boolean selectsPartitions(Expression expr, PartitionSpec spec) {
public static boolean selectsPartitions(Expression expr, PartitionSpec spec, boolean caseSensitive) {
return equivalent(
Projections.inclusive(spec).project(expr),
rdblue marked this conversation as resolved.
Show resolved Hide resolved
Projections.strict(spec).project(expr),
spec.partitionType());
spec.partitionType(), caseSensitive);
}

private static class ExpressionSanitizer extends ExpressionVisitors.ExpressionVisitor<Expression> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,11 +262,12 @@ public void testIdenticalExpressionIsEquivalent() {
};

for (Expression expr : exprs) {
Assert.assertTrue("Should accept identical expression: " + expr, ExpressionUtil.equivalent(expr, expr, STRUCT));
Assert.assertTrue("Should accept identical expression: " + expr,
ExpressionUtil.equivalent(expr, expr, STRUCT, true));

for (Expression other : exprs) {
if (expr != other) {
Assert.assertFalse(ExpressionUtil.equivalent(expr, other, STRUCT));
Assert.assertFalse(ExpressionUtil.equivalent(expr, other, STRUCT, true));
}
}
}
Expand All @@ -285,12 +286,12 @@ public void testIdenticalTermIsEquivalent() {
};

for (UnboundTerm<?> term : terms) {
BoundTerm<?> bound = term.bind(STRUCT, false);
BoundTerm<?> bound = term.bind(STRUCT, true);
Assert.assertTrue("Should accept identical expression: " + term, bound.isEquivalentTo(bound));

for (UnboundTerm<?> other : terms) {
if (term != other) {
Assert.assertFalse(bound.isEquivalentTo(other.bind(STRUCT, false)));
Assert.assertFalse(bound.isEquivalentTo(other.bind(STRUCT, true)));
}
}
}
Expand All @@ -299,30 +300,33 @@ public void testIdenticalTermIsEquivalent() {
@Test
public void testRefEquivalence() {
Assert.assertFalse("Should not find different refs equivalent",
Expressions.ref("val").bind(STRUCT, false).isEquivalentTo(Expressions.ref("val2").bind(STRUCT, false)));
Expressions.ref("val").bind(STRUCT, true).isEquivalentTo(Expressions.ref("val2").bind(STRUCT, true)));
}

@Test
public void testInEquivalence() {
Assert.assertTrue("Should ignore duplicate longs (in)",
ExpressionUtil.equivalent(Expressions.in("id", 1, 2, 1), Expressions.in("id", 2, 1, 2), STRUCT));
ExpressionUtil.equivalent(Expressions.in("id", 1, 2, 1), Expressions.in("id", 2, 1, 2), STRUCT, true));
Assert.assertTrue("Should ignore duplicate longs (notIn)",
ExpressionUtil.equivalent(Expressions.notIn("id", 1, 2, 1), Expressions.notIn("id", 2, 1, 2), STRUCT));
ExpressionUtil.equivalent(Expressions.notIn("id", 1, 2, 1), Expressions.notIn("id", 2, 1, 2), STRUCT, true));

Assert.assertTrue("Should ignore duplicate strings (in)",
ExpressionUtil.equivalent(Expressions.in("data", "a", "b", "a"), Expressions.in("data", "b", "a"), STRUCT));
ExpressionUtil.equivalent(
Expressions.in("data", "a", "b", "a"),
Expressions.in("data", "b", "a"),
STRUCT, true));
Assert.assertTrue("Should ignore duplicate strings (notIn)",
ExpressionUtil.equivalent(Expressions.notIn("data", "b", "b"), Expressions.notIn("data", "b"), STRUCT));
ExpressionUtil.equivalent(Expressions.notIn("data", "b", "b"), Expressions.notIn("data", "b"), STRUCT, true));

Assert.assertTrue("Should detect equivalence with equal (in, string)",
ExpressionUtil.equivalent(Expressions.in("data", "a"), Expressions.equal("data", "a"), STRUCT));
ExpressionUtil.equivalent(Expressions.in("data", "a"), Expressions.equal("data", "a"), STRUCT, true));
Assert.assertTrue("Should detect equivalence with notEqual (notIn, long)",
ExpressionUtil.equivalent(Expressions.notIn("id", 1), Expressions.notEqual("id", 1), STRUCT));
ExpressionUtil.equivalent(Expressions.notIn("id", 1), Expressions.notEqual("id", 1), STRUCT, true));

Assert.assertFalse("Should detect different sets (in, long)",
ExpressionUtil.equivalent(Expressions.in("id", 1, 2, 3), Expressions.in("id", 1, 2), STRUCT));
ExpressionUtil.equivalent(Expressions.in("id", 1, 2, 3), Expressions.in("id", 1, 2), STRUCT, true));
Assert.assertFalse("Should detect different sets (notIn, string)",
ExpressionUtil.equivalent(Expressions.notIn("data", "a", "b"), Expressions.notIn("data", "a"), STRUCT));
ExpressionUtil.equivalent(Expressions.notIn("data", "a", "b"), Expressions.notIn("data", "a"), STRUCT, true));
}

@Test
Expand All @@ -331,23 +335,37 @@ public void testInequalityEquivalence() {

for (String col : cols) {
Assert.assertTrue("Should detect < to <= equivalence: " + col,
ExpressionUtil.equivalent(Expressions.lessThan(col, 34L), Expressions.lessThanOrEqual(col, 33L), STRUCT));
ExpressionUtil.equivalent(
Expressions.lessThan(col, 34L),
Expressions.lessThanOrEqual(col, 33L),
STRUCT, true));
Assert.assertTrue("Should detect <= to < equivalence: " + col,
ExpressionUtil.equivalent(Expressions.lessThanOrEqual(col, 34L), Expressions.lessThan(col, 35L), STRUCT));
ExpressionUtil.equivalent(
Expressions.lessThanOrEqual(col, 34L),
Expressions.lessThan(col, 35L),
STRUCT, true));
Assert.assertTrue("Should detect > to >= equivalence: " + col,
ExpressionUtil.equivalent(
Expressions.greaterThan(col, 34L),
Expressions.greaterThanOrEqual(col, 35L), STRUCT));
Expressions.greaterThanOrEqual(col, 35L),
STRUCT, true));
Assert.assertTrue("Should detect >= to > equivalence: " + col,
ExpressionUtil.equivalent(
Expressions.greaterThanOrEqual(col, 34L),
Expressions.greaterThan(col, 33L), STRUCT));
Expressions.greaterThan(col, 33L),
STRUCT, true));
}

Assert.assertFalse("Should not detect equivalence for different columns",
ExpressionUtil.equivalent(Expressions.lessThan("val", 34L), Expressions.lessThanOrEqual("val2", 33L), STRUCT));
ExpressionUtil.equivalent(
Expressions.lessThan("val", 34L),
Expressions.lessThanOrEqual("val2", 33L),
STRUCT, true));
Assert.assertFalse("Should not detect equivalence for different types",
ExpressionUtil.equivalent(Expressions.lessThan("val", 34L), Expressions.lessThanOrEqual("id", 33L), STRUCT));
ExpressionUtil.equivalent(
Expressions.lessThan("val", 34L),
Expressions.lessThanOrEqual("id", 33L),
STRUCT, true));
}

@Test
Expand All @@ -356,7 +374,7 @@ public void testAndEquivalence() {
ExpressionUtil.equivalent(
Expressions.and(Expressions.lessThan("id", 34), Expressions.greaterThanOrEqual("id", 20)),
Expressions.and(Expressions.greaterThan("id", 19L), Expressions.lessThanOrEqual("id", 33L)),
STRUCT));
STRUCT, true));
}

@Test
Expand All @@ -365,7 +383,7 @@ public void testOrEquivalence() {
ExpressionUtil.equivalent(
Expressions.or(Expressions.lessThan("id", 20), Expressions.greaterThanOrEqual("id", 34)),
Expressions.or(Expressions.greaterThan("id", 33L), Expressions.lessThanOrEqual("id", 19L)),
STRUCT));
STRUCT, true));
}

@Test
Expand All @@ -374,25 +392,25 @@ public void testNotEquivalence() {
ExpressionUtil.equivalent(
Expressions.not(Expressions.or(Expressions.in("data", "a"), Expressions.greaterThanOrEqual("id", 34))),
Expressions.and(Expressions.lessThan("id", 34L), Expressions.notEqual("data", "a")),
STRUCT));
STRUCT, true));
}

@Test
public void testSelectsPartitions() {
Assert.assertTrue("Should select partitions, on boundary",
ExpressionUtil.selectsPartitions(
Expressions.lessThan("ts", "2021-03-09T10:00:00.000000"),
PartitionSpec.builderFor(SCHEMA).hour("ts").build()));
PartitionSpec.builderFor(SCHEMA).hour("ts").build(), true));

Assert.assertFalse("Should not select partitions, 1 ms off boundary",
ExpressionUtil.selectsPartitions(
Expressions.lessThanOrEqual("ts", "2021-03-09T10:00:00.000000"),
PartitionSpec.builderFor(SCHEMA).hour("ts").build()));
PartitionSpec.builderFor(SCHEMA).hour("ts").build(), true));

Assert.assertFalse("Should not select partitions, on hour not day boundary",
ExpressionUtil.selectsPartitions(
Expressions.lessThan("ts", "2021-03-09T10:00:00.000000"),
PartitionSpec.builderFor(SCHEMA).day("ts").build()));
PartitionSpec.builderFor(SCHEMA).day("ts").build(), true));
}

private void assertEquals(Expression expected, Expression actual) {
Expand Down








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/apache/iceberg/pull/4947/commits/0e93e179336e8f55793566281a182d3e68a6c1f4

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy