diff --git a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java index 389036ce237b..0eb7c2fe1eb6 100644 --- a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java +++ b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java @@ -21,6 +21,7 @@ import java.io.Serializable; import org.apache.iceberg.avro.SupportsIndexProjection; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Types; /** Mutable {@link StructLike} implementation of {@link DeletionVector}. */ @@ -49,6 +50,14 @@ private DeletionVectorStruct(DeletionVectorStruct toCopy) { this.cardinality = toCopy.cardinality; } + private DeletionVectorStruct(String location, long offset, long sizeInBytes, long cardinality) { + super(BASE_TYPE, BASE_TYPE); + this.location = location; + this.offset = offset; + this.sizeInBytes = sizeInBytes; + this.cardinality = cardinality; + } + @Override public String location() { return location; @@ -115,6 +124,10 @@ protected void internalSet(int pos, T value) { } } + static Builder builder() { + return new Builder(); + } + @Override public String toString() { return MoreObjects.toStringHelper(this) @@ -124,4 +137,41 @@ public String toString() { .add("cardinality", cardinality) .toString(); } + + static class Builder { + private String location = null; + private long offset = -1L; + private long sizeInBytes = -1L; + private long cardinality = -1L; + + Builder location(String dvLocation) { + this.location = dvLocation; + return this; + } + + Builder offset(long dvOffset) { + this.offset = dvOffset; + return this; + } + + Builder sizeInBytes(long dvSizeInBytes) { + this.sizeInBytes = dvSizeInBytes; + return this; + } + + Builder cardinality(long dvCardinality) { + this.cardinality = dvCardinality; + return this; + } + + DeletionVectorStruct build() { + Preconditions.checkArgument(location != null, "Invalid location: null"); + Preconditions.checkArgument(offset >= 0, "Invalid offset: %s (must be >= 0)", offset); + Preconditions.checkArgument( + sizeInBytes >= 0, "Invalid size in bytes: %s (must be >= 0)", sizeInBytes); + Preconditions.checkArgument( + cardinality >= 0, "Invalid cardinality: %s (must be >= 0)", cardinality); + return new DeletionVectorStruct(location, offset, sizeInBytes, cardinality); + } + } } diff --git a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java index 8f51df749e33..922047bffedd 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java +++ b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java @@ -23,6 +23,7 @@ import java.util.Arrays; import org.apache.iceberg.avro.SupportsIndexProjection; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.ByteBuffers; @@ -73,6 +74,32 @@ private ManifestInfoStruct(ManifestInfoStruct toCopy) { this.dvCardinality = toCopy.dvCardinality; } + private ManifestInfoStruct( + int addedFilesCount, + int existingFilesCount, + int deletedFilesCount, + int replacedFilesCount, + long addedRowsCount, + long existingRowsCount, + long deletedRowsCount, + long replacedRowsCount, + long minSequenceNumber, + byte[] dv, + Long dvCardinality) { + super(BASE_TYPE, BASE_TYPE); + this.addedFilesCount = addedFilesCount; + this.existingFilesCount = existingFilesCount; + this.deletedFilesCount = deletedFilesCount; + this.replacedFilesCount = replacedFilesCount; + this.addedRowsCount = addedRowsCount; + this.existingRowsCount = existingRowsCount; + this.deletedRowsCount = deletedRowsCount; + this.replacedRowsCount = replacedRowsCount; + this.minSequenceNumber = minSequenceNumber; + this.dv = dv; + this.dvCardinality = dvCardinality; + } + @Override public int addedFilesCount() { return addedFilesCount; @@ -208,6 +235,10 @@ protected void internalSet(int pos, T value) { } } + static Builder builder() { + return new Builder(); + } + @Override public String toString() { return MoreObjects.toStringHelper(this) @@ -224,4 +255,130 @@ public String toString() { .add("dv_cardinality", dvCardinality == null ? "null" : dvCardinality) .toString(); } + + static class Builder { + private int addedFilesCount = -1; + private int existingFilesCount = -1; + private int deletedFilesCount = -1; + private int replacedFilesCount = -1; + private long addedRowsCount = -1L; + private long existingRowsCount = -1L; + private long deletedRowsCount = -1L; + private long replacedRowsCount = -1L; + private long minSequenceNumber = -1L; + private byte[] dv = null; + private Long dvCardinality = null; + + Builder addedFilesCount(int count) { + this.addedFilesCount = count; + return this; + } + + Builder existingFilesCount(int count) { + this.existingFilesCount = count; + return this; + } + + Builder deletedFilesCount(int count) { + this.deletedFilesCount = count; + return this; + } + + Builder replacedFilesCount(int count) { + this.replacedFilesCount = count; + return this; + } + + Builder addedRowsCount(long count) { + this.addedRowsCount = count; + return this; + } + + Builder existingRowsCount(long count) { + this.existingRowsCount = count; + return this; + } + + Builder deletedRowsCount(long count) { + this.deletedRowsCount = count; + return this; + } + + Builder replacedRowsCount(long count) { + this.replacedRowsCount = count; + return this; + } + + Builder minSequenceNumber(long sequenceNumber) { + this.minSequenceNumber = sequenceNumber; + return this; + } + + Builder dv(ByteBuffer buffer) { + this.dv = buffer != null ? ByteBuffers.toByteArray(buffer) : null; + return this; + } + + Builder dv(byte[] buffer) { + this.dv = buffer; + return this; + } + + Builder dvCardinality(Long cardinality) { + this.dvCardinality = cardinality; + return this; + } + + ManifestInfoStruct build() { + Preconditions.checkArgument( + addedFilesCount >= 0, "Invalid added files count: %s (must be >= 0)", addedFilesCount); + Preconditions.checkArgument( + existingFilesCount >= 0, + "Invalid existing files count: %s (must be >= 0)", + existingFilesCount); + Preconditions.checkArgument( + deletedFilesCount >= 0, + "Invalid deleted files count: %s (must be >= 0)", + deletedFilesCount); + Preconditions.checkArgument( + replacedFilesCount >= 0, + "Invalid replaced files count: %s (must be >= 0)", + replacedFilesCount); + Preconditions.checkArgument( + addedRowsCount >= 0, "Invalid added rows count: %s (must be >= 0)", addedRowsCount); + Preconditions.checkArgument( + existingRowsCount >= 0, + "Invalid existing rows count: %s (must be >= 0)", + existingRowsCount); + Preconditions.checkArgument( + deletedRowsCount >= 0, "Invalid deleted rows count: %s (must be >= 0)", deletedRowsCount); + Preconditions.checkArgument( + replacedRowsCount >= 0, + "Invalid replaced rows count: %s (must be >= 0)", + replacedRowsCount); + Preconditions.checkArgument( + minSequenceNumber >= 0, + "Invalid min sequence number: %s (must be >= 0)", + minSequenceNumber); + Preconditions.checkArgument( + (dv == null) == (dvCardinality == null), + "Invalid DV and cardinality: must both be null or non-null"); + Preconditions.checkArgument( + dvCardinality == null || dvCardinality > 0, + "Invalid DV cardinality: %s (must be positive)", + dvCardinality); + return new ManifestInfoStruct( + addedFilesCount, + existingFilesCount, + deletedFilesCount, + replacedFilesCount, + addedRowsCount, + existingRowsCount, + deletedRowsCount, + replacedRowsCount, + minSequenceNumber, + dv, + dvCardinality); + } + } } diff --git a/core/src/main/java/org/apache/iceberg/TrackingStruct.java b/core/src/main/java/org/apache/iceberg/TrackingStruct.java index a8624aad15c1..65513c8d4a7c 100644 --- a/core/src/main/java/org/apache/iceberg/TrackingStruct.java +++ b/core/src/main/java/org/apache/iceberg/TrackingStruct.java @@ -59,10 +59,6 @@ class TrackingStruct extends SupportsIndexProjection implements Tracking, Serial super(BASE_TYPE, type); } - TrackingStruct() { - super(BASE_TYPE.fields().size()); - } - private TrackingStruct(TrackingStruct toCopy) { super(toCopy); this.status = toCopy.status; @@ -83,6 +79,26 @@ private TrackingStruct(TrackingStruct toCopy) { this.manifestPos = toCopy.manifestPos; } + private TrackingStruct( + EntryStatus status, + Long snapshotId, + Long dataSequenceNumber, + Long fileSequenceNumber, + Long dvSnapshotId, + Long firstRowId, + byte[] deletedPositions, + byte[] replacedPositions) { + super(BASE_TYPE, BASE_TYPE); + this.status = status; + this.snapshotId = snapshotId; + this.dataSequenceNumber = dataSequenceNumber; + this.fileSequenceNumber = fileSequenceNumber; + this.dvSnapshotId = dvSnapshotId; + this.firstRowId = firstRowId; + this.deletedPositions = deletedPositions; + this.replacedPositions = replacedPositions; + } + void inheritFrom(Tracking manifestTracking) { if (manifestTracking != null) { if (snapshotId == null) { @@ -233,6 +249,10 @@ protected void internalSet(int pos, T value) { } } + static Builder builder() { + return new Builder(); + } + @Override public String toString() { return MoreObjects.toStringHelper(this) @@ -246,4 +266,78 @@ public String toString() { .add("replaced_positions", replacedPositions == null ? "null" : "(binary)") .toString(); } + + static class Builder { + private EntryStatus status = null; + private Long snapshotId = null; + private Long dataSequenceNumber = null; + private Long fileSequenceNumber = null; + private Long dvSnapshotId = null; + private Long firstRowId = null; + private byte[] deletedPositions = null; + private byte[] replacedPositions = null; + + Builder status(EntryStatus entryStatus) { + this.status = entryStatus; + return this; + } + + Builder snapshotId(Long id) { + this.snapshotId = id; + return this; + } + + Builder dataSequenceNumber(Long sequenceNumber) { + this.dataSequenceNumber = sequenceNumber; + return this; + } + + Builder fileSequenceNumber(Long sequenceNumber) { + this.fileSequenceNumber = sequenceNumber; + return this; + } + + Builder dvSnapshotId(Long id) { + this.dvSnapshotId = id; + return this; + } + + Builder firstRowId(Long rowId) { + this.firstRowId = rowId; + return this; + } + + Builder deletedPositions(ByteBuffer positions) { + this.deletedPositions = positions != null ? ByteBuffers.toByteArray(positions) : null; + return this; + } + + Builder deletedPositions(byte[] positions) { + this.deletedPositions = positions; + return this; + } + + Builder replacedPositions(ByteBuffer positions) { + this.replacedPositions = positions != null ? ByteBuffers.toByteArray(positions) : null; + return this; + } + + Builder replacedPositions(byte[] positions) { + this.replacedPositions = positions; + return this; + } + + TrackingStruct build() { + Preconditions.checkArgument(status != null, "Invalid status: null"); + return new TrackingStruct( + status, + snapshotId, + dataSequenceNumber, + fileSequenceNumber, + dvSnapshotId, + firstRowId, + deletedPositions, + replacedPositions); + } + } } diff --git a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java index 5ab6b1f3586c..325f9afd9ca9 100644 --- a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java @@ -19,6 +19,7 @@ package org.apache.iceberg; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.IOException; import org.apache.iceberg.types.Types; @@ -28,12 +29,13 @@ class TestDeletionVectorStruct { @Test void testFieldAccess() { - DeletionVectorStruct dv = new DeletionVectorStruct(DeletionVector.schema()); - - dv.set(0, "s3://bucket/data/dv.puffin"); - dv.set(1, 256L); - dv.set(2, 128L); - dv.set(3, 42L); + DeletionVectorStruct dv = + DeletionVectorStruct.builder() + .location("s3://bucket/data/dv.puffin") + .offset(256L) + .sizeInBytes(128L) + .cardinality(42L) + .build(); assertThat(dv.location()).isEqualTo("s3://bucket/data/dv.puffin"); assertThat(dv.offset()).isEqualTo(256L); @@ -43,12 +45,13 @@ void testFieldAccess() { @Test void testCopy() { - DeletionVectorStruct dv = new DeletionVectorStruct(DeletionVector.schema()); - - dv.set(0, "s3://bucket/data/dv.puffin"); - dv.set(1, 256L); - dv.set(2, 128L); - dv.set(3, 42L); + DeletionVectorStruct dv = + DeletionVectorStruct.builder() + .location("s3://bucket/data/dv.puffin") + .offset(256L) + .sizeInBytes(128L) + .cardinality(42L) + .build(); DeletionVectorStruct copy = dv.copy(); @@ -86,11 +89,13 @@ void testProjectedStructLike() { @Test void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException { - DeletionVectorStruct dv = new DeletionVectorStruct(DeletionVector.schema()); - dv.set(0, "s3://bucket/data/dv.puffin"); - dv.set(1, 256L); - dv.set(2, 128L); - dv.set(3, 42L); + DeletionVectorStruct dv = + DeletionVectorStruct.builder() + .location("s3://bucket/data/dv.puffin") + .offset(256L) + .sizeInBytes(128L) + .cardinality(42L) + .build(); DeletionVectorStruct deserialized = TestHelpers.roundTripSerialize(dv); @@ -100,13 +105,53 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException assertThat(deserialized.cardinality()).isEqualTo(42L); } + @Test + void testBuilderValidation() { + assertThatThrownBy( + () -> DeletionVectorStruct.builder().offset(0).sizeInBytes(1).cardinality(1).build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid location: null"); + + assertThatThrownBy( + () -> + DeletionVectorStruct.builder() + .location("s3://bucket/dv.puffin") + .sizeInBytes(1) + .cardinality(1) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid offset: -1 (must be >= 0)"); + + assertThatThrownBy( + () -> + DeletionVectorStruct.builder() + .location("s3://bucket/dv.puffin") + .offset(0) + .cardinality(1) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid size in bytes: -1 (must be >= 0)"); + + assertThatThrownBy( + () -> + DeletionVectorStruct.builder() + .location("s3://bucket/dv.puffin") + .offset(0) + .sizeInBytes(1) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid cardinality: -1 (must be >= 0)"); + } + @Test void testKryoSerializationRoundTrip() throws IOException { - DeletionVectorStruct dv = new DeletionVectorStruct(DeletionVector.schema()); - dv.set(0, "s3://bucket/data/dv.puffin"); - dv.set(1, 256L); - dv.set(2, 128L); - dv.set(3, 42L); + DeletionVectorStruct dv = + DeletionVectorStruct.builder() + .location("s3://bucket/data/dv.puffin") + .offset(256L) + .sizeInBytes(128L) + .cardinality(42L) + .build(); DeletionVectorStruct deserialized = TestHelpers.KryoHelpers.roundTripSerialize(dv); diff --git a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java index 23917de9cd40..3a694f1a38f2 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java @@ -19,6 +19,7 @@ package org.apache.iceberg; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.IOException; import java.nio.ByteBuffer; @@ -58,19 +59,20 @@ void testFieldAccess() { @Test void testCopy() { - ManifestInfoStruct info = new ManifestInfoStruct(ManifestInfo.schema()); - - info.set(0, 10); - info.set(1, 20); - info.set(2, 3); - info.set(3, 2); - info.set(4, 1000L); - info.set(5, 2000L); - info.set(6, 300L); - info.set(7, 200L); - info.set(8, 5L); - info.set(9, ByteBuffer.wrap(new byte[] {0xF})); - info.set(10, 1L); + ManifestInfoStruct info = + ManifestInfoStruct.builder() + .addedFilesCount(10) + .existingFilesCount(20) + .deletedFilesCount(3) + .replacedFilesCount(2) + .addedRowsCount(1000L) + .existingRowsCount(2000L) + .deletedRowsCount(300L) + .replacedRowsCount(200L) + .minSequenceNumber(5L) + .dv(new byte[] {0xF}) + .dvCardinality(1L) + .build(); ManifestInfoStruct copy = info.copy(); @@ -91,17 +93,18 @@ void testCopy() { @Test void testNullableFields() { - ManifestInfoStruct info = new ManifestInfoStruct(ManifestInfo.schema()); - - info.set(0, 0); - info.set(1, 0); - info.set(2, 0); - info.set(3, 0); - info.set(4, 0L); - info.set(5, 0L); - info.set(6, 0L); - info.set(7, 0L); - info.set(8, 0L); + ManifestInfoStruct info = + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build(); assertThat(info.dv()).isNull(); assertThat(info.dvCardinality()).isNull(); @@ -129,18 +132,20 @@ void testProjectedStructLike() { @Test void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException { - ManifestInfoStruct info = new ManifestInfoStruct(ManifestInfo.schema()); - info.set(0, 10); - info.set(1, 20); - info.set(2, 3); - info.set(3, 2); - info.set(4, 1000L); - info.set(5, 2000L); - info.set(6, 300L); - info.set(7, 200L); - info.set(8, 5L); - info.set(9, ByteBuffer.wrap(new byte[] {0xF})); - info.set(10, 1L); + ManifestInfoStruct info = + ManifestInfoStruct.builder() + .addedFilesCount(10) + .existingFilesCount(20) + .deletedFilesCount(3) + .replacedFilesCount(2) + .addedRowsCount(1000L) + .existingRowsCount(2000L) + .deletedRowsCount(300L) + .replacedRowsCount(200L) + .minSequenceNumber(5L) + .dv(new byte[] {0xF}) + .dvCardinality(1L) + .build(); ManifestInfoStruct deserialized = TestHelpers.roundTripSerialize(info); @@ -157,20 +162,215 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException assertThat(deserialized.dvCardinality()).isEqualTo(1L); } + @Test + void testBuilderValidation() { + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid added files count: -1 (must be >= 0)"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid existing files count: -1 (must be >= 0)"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid deleted files count: -1 (must be >= 0)"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid replaced files count: -1 (must be >= 0)"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid added rows count: -1 (must be >= 0)"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid existing rows count: -1 (must be >= 0)"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid deleted rows count: -1 (must be >= 0)"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid replaced rows count: -1 (must be >= 0)"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid min sequence number: -1 (must be >= 0)"); + } + + @Test + void testBuilderDvPairingValidation() { + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .dv(new byte[] {0xF}) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid DV and cardinality: must both be null or non-null"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .dvCardinality(1L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid DV and cardinality: must both be null or non-null"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .dv(new byte[] {0xF}) + .dvCardinality(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid DV cardinality: 0 (must be positive)"); + } + @Test void testKryoSerializationRoundTrip() throws IOException { - ManifestInfoStruct info = new ManifestInfoStruct(ManifestInfo.schema()); - info.set(0, 10); - info.set(1, 20); - info.set(2, 3); - info.set(3, 2); - info.set(4, 1000L); - info.set(5, 2000L); - info.set(6, 300L); - info.set(7, 200L); - info.set(8, 5L); - info.set(9, ByteBuffer.wrap(new byte[] {0xF})); - info.set(10, 1L); + ManifestInfoStruct info = + ManifestInfoStruct.builder() + .addedFilesCount(10) + .existingFilesCount(20) + .deletedFilesCount(3) + .replacedFilesCount(2) + .addedRowsCount(1000L) + .existingRowsCount(2000L) + .deletedRowsCount(300L) + .replacedRowsCount(200L) + .minSequenceNumber(5L) + .dv(new byte[] {0xF}) + .dvCardinality(1L) + .build(); ManifestInfoStruct deserialized = TestHelpers.KryoHelpers.roundTripSerialize(info); diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java index 05013ae54e79..62324e5607ef 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java @@ -33,27 +33,27 @@ class TestTrackedFileStruct { @Test void testFieldAccess() { TrackedFileStruct file = new TrackedFileStruct(); - TrackingStruct tracking = new TrackingStruct(); - DeletionVectorStruct dv = new DeletionVectorStruct(DeletionVector.schema()); - ManifestInfoStruct info = new ManifestInfoStruct(ManifestInfo.schema()); - - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, 42L); - - dv.set(0, "s3://bucket/dv.puffin"); - dv.set(1, 100L); - dv.set(2, 50L); - dv.set(3, 5L); - - info.set(0, 10); - info.set(1, 20); - info.set(2, 3); - info.set(3, 2); - info.set(4, 1000L); - info.set(5, 2000L); - info.set(6, 300L); - info.set(7, 200L); - info.set(8, 5L); + TrackingStruct tracking = + TrackingStruct.builder().status(EntryStatus.ADDED).snapshotId(42L).build(); + DeletionVectorStruct dv = + DeletionVectorStruct.builder() + .location("s3://bucket/dv.puffin") + .offset(100L) + .sizeInBytes(50L) + .cardinality(5L) + .build(); + ManifestInfoStruct info = + ManifestInfoStruct.builder() + .addedFilesCount(10) + .existingFilesCount(20) + .deletedFilesCount(3) + .replacedFilesCount(2) + .addedRowsCount(1000L) + .existingRowsCount(2000L) + .deletedRowsCount(300L) + .replacedRowsCount(200L) + .minSequenceNumber(5L) + .build(); file.set(0, tracking); file.set(1, FileContent.EQUALITY_DELETES.id()); @@ -90,8 +90,7 @@ void testFieldAccess() { void testReaderSideFields() { TrackedFileStruct file = new TrackedFileStruct(); - TrackingStruct tracking = new TrackingStruct(); - tracking.set(0, EntryStatus.ADDED.id()); + TrackingStruct tracking = TrackingStruct.builder().status(EntryStatus.ADDED).build(); tracking.setManifestLocation("s3://bucket/metadata/manifest.avro"); tracking.set(8, 7L); @@ -279,18 +278,22 @@ void testKryoSerializationRoundTrip() throws IOException { } static TrackedFileStruct createFullTrackedFile() { - TrackingStruct tracking = new TrackingStruct(); - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, 42L); - tracking.set(2, 10L); + TrackingStruct tracking = + TrackingStruct.builder() + .status(EntryStatus.ADDED) + .snapshotId(42L) + .dataSequenceNumber(10L) + .build(); tracking.setManifestLocation("s3://bucket/manifest.avro"); tracking.set(8, 3L); - DeletionVectorStruct dv = new DeletionVectorStruct(DeletionVector.schema()); - dv.set(0, "s3://bucket/dv.puffin"); - dv.set(1, 100L); - dv.set(2, 50L); - dv.set(3, 5L); + DeletionVectorStruct dv = + DeletionVectorStruct.builder() + .location("s3://bucket/dv.puffin") + .offset(100L) + .sizeInBytes(50L) + .cardinality(5L) + .build(); TrackedFileStruct file = new TrackedFileStruct( diff --git a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java index 5af41d0dcf02..98a7eff2af45 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java @@ -53,12 +53,13 @@ void testFieldAccess() { @Test void testCopy() { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, 42L); - tracking.set(2, 10L); - tracking.set(6, ByteBuffer.wrap(new byte[] {1, 2})); + TrackingStruct tracking = + TrackingStruct.builder() + .status(EntryStatus.ADDED) + .snapshotId(42L) + .dataSequenceNumber(10L) + .deletedPositions(new byte[] {1, 2}) + .build(); TrackingStruct copy = tracking.copy(); @@ -98,8 +99,7 @@ void testIsLive() { @Test void testInheritSnapshotId() { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); + TrackingStruct tracking = TrackingStruct.builder().status(EntryStatus.ADDED).build(); tracking.inheritFrom(createManifestTracking(100L, 60L)); // snapshotId is null, should inherit from manifest @@ -108,8 +108,7 @@ void testInheritSnapshotId() { @Test void testInheritSequenceNumberForAddedEntries() { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); + TrackingStruct tracking = TrackingStruct.builder().status(EntryStatus.ADDED).build(); tracking.inheritFrom(createManifestTracking(100L, 60L)); // sequence numbers are null and status is ADDED, should inherit @@ -119,10 +118,12 @@ void testInheritSequenceNumberForAddedEntries() { @Test void testDoNotInheritSequenceNumberForExistingEntries() { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.EXISTING.id()); - tracking.set(2, 5L); - tracking.set(3, 6L); + TrackingStruct tracking = + TrackingStruct.builder() + .status(EntryStatus.EXISTING) + .dataSequenceNumber(5L) + .fileSequenceNumber(6L) + .build(); tracking.inheritFrom(createManifestTracking(100L, 60L)); // sequence numbers are not inherited for EXISTING entries @@ -132,11 +133,13 @@ void testDoNotInheritSequenceNumberForExistingEntries() { @Test void testExplicitValuesOverrideInheritance() { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, 200L); - tracking.set(2, 75L); - tracking.set(3, 76L); + TrackingStruct tracking = + TrackingStruct.builder() + .status(EntryStatus.ADDED) + .snapshotId(200L) + .dataSequenceNumber(75L) + .fileSequenceNumber(76L) + .build(); tracking.inheritFrom(createManifestTracking(100L, 60L)); // explicit values should take precedence @@ -163,8 +166,7 @@ void testInheritFromRejectsUnequalSequenceNumbers() { @Test void testNoDefaultingWithoutInheritance() { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); + TrackingStruct tracking = TrackingStruct.builder().status(EntryStatus.ADDED).build(); // no inheritance, nulls stay null assertThat(tracking.snapshotId()).isNull(); @@ -173,12 +175,19 @@ void testNoDefaultingWithoutInheritance() { } private static Tracking createManifestTracking(long snapshotId, long sequenceNumber) { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, snapshotId); - tracking.set(2, sequenceNumber); - tracking.set(3, sequenceNumber); - return tracking; + return TrackingStruct.builder() + .status(EntryStatus.ADDED) + .snapshotId(snapshotId) + .dataSequenceNumber(sequenceNumber) + .fileSequenceNumber(sequenceNumber) + .build(); + } + + @Test + void testBuilderValidation() { + assertThatThrownBy(() -> TrackingStruct.builder().build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid status: null"); } @Test @@ -202,11 +211,13 @@ void testProjectedStructLike() { @Test void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, 42L); - tracking.set(2, 10L); - tracking.set(6, ByteBuffer.wrap(new byte[] {1, 2})); + TrackingStruct tracking = + TrackingStruct.builder() + .status(EntryStatus.ADDED) + .snapshotId(42L) + .dataSequenceNumber(10L) + .deletedPositions(new byte[] {1, 2}) + .build(); TrackingStruct deserialized = TestHelpers.roundTripSerialize(tracking); @@ -218,11 +229,13 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException @Test void testKryoSerializationRoundTrip() throws IOException { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, 42L); - tracking.set(2, 10L); - tracking.set(6, ByteBuffer.wrap(new byte[] {1, 2})); + TrackingStruct tracking = + TrackingStruct.builder() + .status(EntryStatus.ADDED) + .snapshotId(42L) + .dataSequenceNumber(10L) + .deletedPositions(new byte[] {1, 2}) + .build(); TrackingStruct deserialized = TestHelpers.KryoHelpers.roundTripSerialize(tracking);