Content-Length: 546675 | pFad | http://github.com/zachdisc/iceberg/commit/982fad1b8e729e079c0d0769033fad696fec0f84

3C Flink: Prevent setting endTag/endSnapshotId for streaming source (#10… · zachdisc/iceberg@982fad1 · GitHub
Skip to content

Commit

Permalink
Flink: Prevent setting endTag/endSnapshotId for streaming source (apa…
Browse files Browse the repository at this point in the history
  • Loading branch information
pvary authored and zachdisc committed Dec 12, 2024
1 parent f499b0b commit 982fad1
Show file tree
Hide file tree
Showing 8 changed files with 180 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,8 @@
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.util.PropertyUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FlinkSource {
private static final Logger LOG = LoggerFactory.getLogger(FlinkSource.class);

private FlinkSource() {}

/**
Expand Down Expand Up @@ -263,8 +259,9 @@ public FlinkInputFormat buildFormat() {

contextBuilder.resolveConfig(table, readOptions, readableConfig);

return new FlinkInputFormat(
tableLoader, icebergSchema, io, encryption, contextBuilder.build());
ScanContext context = contextBuilder.build();
context.validate();
return new FlinkInputFormat(tableLoader, icebergSchema, io, encryption, context);
}

public DataStream<RowData> build() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ public IcebergSource<T> build() {
}

ScanContext context = contextBuilder.build();
context.validate();
if (readerFunction == null) {
if (table instanceof BaseMetadataTable) {
MetaDataReaderFunction rowDataReaderFunction =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,9 @@ private ScanContext(
this.maxAllowedPlanningFailures = maxAllowedPlanningFailures;
this.watermarkColumn = watermarkColumn;
this.watermarkColumnTimeUnit = watermarkColumnTimeUnit;

validate();
}

private void validate() {
void validate() {
if (isStreaming) {
if (startingStrategy == StreamingStartingStrategy.INCREMENTAL_FROM_SNAPSHOT_ID) {
Preconditions.checkArgument(
Expand All @@ -155,6 +153,13 @@ private void validate() {
Preconditions.checkArgument(
tag == null,
String.format("Cannot scan table using ref %s configured for streaming reader", tag));
Preconditions.checkArgument(
snapshotId == null, "Cannot set snapshot-id option for streaming reader");
Preconditions.checkArgument(
asOfTimestamp == null, "Cannot set as-of-timestamp option for streaming reader");
Preconditions.checkArgument(
endSnapshotId == null, "Cannot set end-snapshot-id option for streaming reader");
Preconditions.checkArgument(endTag == null, "Cannot set end-tag option for streaming reader");
}

Preconditions.checkArgument(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,16 @@
*/
package org.apache.iceberg.flink.source;

import static org.apache.iceberg.flink.SimpleDataUtil.SCHEMA;
import static org.apache.iceberg.types.Types.NestedField.required;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.assertj.core.api.Assumptions.assumeThat;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.types.logical.RowType;
Expand Down Expand Up @@ -184,6 +187,23 @@ public void testReadPartitionColumn() throws Exception {
TestHelpers.assertRows(result, expected);
}

@TestTemplate
public void testValidation() {
catalogExtension.catalog().createTable(TestFixtures.TABLE_IDENTIFIER, SCHEMA);

assertThatThrownBy(
() ->
FlinkSource.forRowData()
.env(StreamExecutionEnvironment.getExecutionEnvironment())
.tableLoader(tableLoader())
.streaming(false)
.endTag("tag")
.endSnapshotId(1L)
.build())
.hasMessage("END_SNAPSHOT_ID and END_TAG cannot both be set.")
.isInstanceOf(IllegalArgumentException.class);
}

private List<Row> runFormat(FlinkInputFormat inputFormat) throws IOException {
RowType rowType = FlinkSchemaUtil.convert(inputFormat.projectedSchema());
return TestHelpers.readRows(inputFormat, rowType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
*/
package org.apache.iceberg.flink.source;

import static org.apache.iceberg.flink.SimpleDataUtil.SCHEMA;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import java.util.Collections;
import java.util.List;
import java.util.Map;
Expand All @@ -42,8 +45,25 @@
import org.apache.iceberg.flink.source.assigner.SimpleSplitAssignerFactory;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.junit.jupiter.api.TestTemplate;

public class TestIcebergSourceBounded extends TestFlinkScan {
@TestTemplate
public void testValidation() {
catalogExtension.catalog().createTable(TestFixtures.TABLE_IDENTIFIER, SCHEMA);

assertThatThrownBy(
() ->
IcebergSource.forRowData()
.tableLoader(tableLoader())
.assignerFactory(new SimpleSplitAssignerFactory())
.streaming(false)
.endTag("tag")
.endSnapshotId(1L)
.build())
.hasMessage("END_SNAPSHOT_ID and END_TAG cannot both be set.")
.isInstanceOf(IllegalArgumentException.class);
}

@Override
protected List<Row> runWithProjection(String... projected) throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.iceberg.flink.source;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import java.time.Duration;
import java.util.Collection;
Expand Down Expand Up @@ -473,6 +474,20 @@ public void testReadingFromBranch() throws Exception {
}
}

@Test
public void testValidation() {
assertThatThrownBy(
() ->
IcebergSource.forRowData()
.tableLoader(tableResource.tableLoader())
.assignerFactory(new SimpleSplitAssignerFactory())
.streaming(true)
.endTag("tag")
.build())
.hasMessage("Cannot set end-tag option for streaming reader")
.isInstanceOf(IllegalArgumentException.class);
}

private DataStream<Row> createStream(ScanContext scanContext) throws Exception {
// start the source and collect output
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.flink.source;

import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Test;

class TestScanContext {
@Test
void testIncrementalFromSnapshotId() {
ScanContext context =
ScanContext.builder()
.streaming(true)
.startingStrategy(StreamingStartingStrategy.INCREMENTAL_FROM_SNAPSHOT_ID)
.build();
assertException(
context, "Invalid starting snapshot id for SPECIFIC_START_SNAPSHOT_ID strategy: null");

context =
ScanContext.builder()
.streaming(true)
.startingStrategy(StreamingStartingStrategy.INCREMENTAL_FROM_SNAPSHOT_ID)
.startSnapshotId(1L)
.startSnapshotTimestamp(1L)
.build();
assertException(
context,
"Invalid starting snapshot timestamp for SPECIFIC_START_SNAPSHOT_ID strategy: not null");
}

@Test
void testIncrementalFromSnapshotTimestamp() {
ScanContext context =
ScanContext.builder()
.streaming(true)
.startingStrategy(StreamingStartingStrategy.INCREMENTAL_FROM_SNAPSHOT_TIMESTAMP)
.build();
assertException(
context,
"Invalid starting snapshot timestamp for SPECIFIC_START_SNAPSHOT_TIMESTAMP strategy: null");

context =
ScanContext.builder()
.streaming(true)
.startingStrategy(StreamingStartingStrategy.INCREMENTAL_FROM_SNAPSHOT_TIMESTAMP)
.startSnapshotId(1L)
.startSnapshotTimestamp(1L)
.build();
assertException(
context, "Invalid starting snapshot id for SPECIFIC_START_SNAPSHOT_ID strategy: not null");
}

@Test
void testStreaming() {
ScanContext context = ScanContext.builder().streaming(true).useTag("tag").build();
assertException(context, "Cannot scan table using ref tag configured for streaming reader");

context = ScanContext.builder().streaming(true).useSnapshotId(1L).build();
assertException(context, "Cannot set snapshot-id option for streaming reader");

context = ScanContext.builder().streaming(true).asOfTimestamp(1L).build();
assertException(context, "Cannot set as-of-timestamp option for streaming reader");

context = ScanContext.builder().streaming(true).endSnapshotId(1L).build();
assertException(context, "Cannot set end-snapshot-id option for streaming reader");

context = ScanContext.builder().streaming(true).endTag("tag").build();
assertException(context, "Cannot set end-tag option for streaming reader");
}

@Test
void testStartConflict() {
ScanContext context = ScanContext.builder().startTag("tag").startSnapshotId(1L).build();
assertException(context, "START_SNAPSHOT_ID and START_TAG cannot both be set.");
}

@Test
void testEndConflict() {
ScanContext context = ScanContext.builder().endTag("tag").endSnapshotId(1L).build();
assertException(context, "END_SNAPSHOT_ID and END_TAG cannot both be set.");
}

@Test
void testMaxAllowedPlanningFailures() {
ScanContext context = ScanContext.builder().maxAllowedPlanningFailures(-2).build();
assertException(
context, "Cannot set maxAllowedPlanningFailures to a negative number other than -1.");
}

private void assertException(ScanContext context, String message) {
Assertions.assertThatThrownBy(() -> context.validate())
.hasMessage(message)
.isInstanceOf(IllegalArgumentException.class);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.TestTemplate;
import org.junit.jupiter.api.Timeout;

@Timeout(60)
public class TestStreamScanSql extends CatalogTestBase {
private static final String TABLE = "test_table";
private static final FileFormat FORMAT = FileFormat.PARQUET;
Expand Down

0 comments on commit 982fad1

Please sign in to comment.








ApplySandwichStrip

pFad - (p)hone/(F)rame/(a)nonymizer/(d)eclutterfier!      Saves Data!


--- a PPN by Garber Painting Akron. With Image Size Reduction included!

Fetched URL: http://github.com/zachdisc/iceberg/commit/982fad1b8e729e079c0d0769033fad696fec0f84

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy