From 35926a9eb715d3b54d03636e4601e274dccc66ac Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Thu, 23 Apr 2026 14:27:02 -0700 Subject: [PATCH 01/10] Add constructors for DeletionVectorStruct and TrackingStruct Use constructors with required fields instead of positional set() calls in tests for clarity. --- .../apache/iceberg/DeletionVectorStruct.java | 8 +++++ .../org/apache/iceberg/TrackingStruct.java | 6 ++++ .../iceberg/TestDeletionVectorStruct.java | 30 +++++-------------- .../apache/iceberg/TestTrackedFileStruct.java | 25 ++++------------ .../apache/iceberg/TestTrackingStruct.java | 20 ++++--------- 5 files changed, 33 insertions(+), 56 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java index 389036ce237b..65da357453ad 100644 --- a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java +++ b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java @@ -41,6 +41,14 @@ class DeletionVectorStruct extends SupportsIndexProjection implements DeletionVe super(BASE_TYPE, type); } + DeletionVectorStruct(String location, long offset, long sizeInBytes, long cardinality) { + super(BASE_TYPE.fields().size()); + this.location = location; + this.offset = offset; + this.sizeInBytes = sizeInBytes; + this.cardinality = cardinality; + } + private DeletionVectorStruct(DeletionVectorStruct toCopy) { super(toCopy); this.location = toCopy.location; diff --git a/core/src/main/java/org/apache/iceberg/TrackingStruct.java b/core/src/main/java/org/apache/iceberg/TrackingStruct.java index a8624aad15c1..0662c6f8c706 100644 --- a/core/src/main/java/org/apache/iceberg/TrackingStruct.java +++ b/core/src/main/java/org/apache/iceberg/TrackingStruct.java @@ -63,6 +63,12 @@ class TrackingStruct extends SupportsIndexProjection implements Tracking, Serial super(BASE_TYPE.fields().size()); } + TrackingStruct(EntryStatus status, Long snapshotId) { + super(BASE_TYPE.fields().size()); + this.status = status; + this.snapshotId = snapshotId; + } + private TrackingStruct(TrackingStruct toCopy) { super(toCopy); this.status = toCopy.status; diff --git a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java index 5ab6b1f3586c..9fa850032da4 100644 --- a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java @@ -28,12 +28,8 @@ class TestDeletionVectorStruct { @Test void testFieldAccess() { - DeletionVectorStruct dv = new DeletionVectorStruct(DeletionVector.schema()); - - dv.set(0, "s3://bucket/data/dv.puffin"); - dv.set(1, 256L); - dv.set(2, 128L); - dv.set(3, 42L); + DeletionVectorStruct dv = + new DeletionVectorStruct("s3://bucket/data/dv.puffin", 256L, 128L, 42L); assertThat(dv.location()).isEqualTo("s3://bucket/data/dv.puffin"); assertThat(dv.offset()).isEqualTo(256L); @@ -43,12 +39,8 @@ void testFieldAccess() { @Test void testCopy() { - DeletionVectorStruct dv = new DeletionVectorStruct(DeletionVector.schema()); - - dv.set(0, "s3://bucket/data/dv.puffin"); - dv.set(1, 256L); - dv.set(2, 128L); - dv.set(3, 42L); + DeletionVectorStruct dv = + new DeletionVectorStruct("s3://bucket/data/dv.puffin", 256L, 128L, 42L); DeletionVectorStruct copy = dv.copy(); @@ -86,11 +78,8 @@ void testProjectedStructLike() { @Test void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException { - DeletionVectorStruct dv = new DeletionVectorStruct(DeletionVector.schema()); - dv.set(0, "s3://bucket/data/dv.puffin"); - dv.set(1, 256L); - dv.set(2, 128L); - dv.set(3, 42L); + DeletionVectorStruct dv = + new DeletionVectorStruct("s3://bucket/data/dv.puffin", 256L, 128L, 42L); DeletionVectorStruct deserialized = TestHelpers.roundTripSerialize(dv); @@ -102,11 +91,8 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException @Test void testKryoSerializationRoundTrip() throws IOException { - DeletionVectorStruct dv = new DeletionVectorStruct(DeletionVector.schema()); - dv.set(0, "s3://bucket/data/dv.puffin"); - dv.set(1, 256L); - dv.set(2, 128L); - dv.set(3, 42L); + DeletionVectorStruct dv = + new DeletionVectorStruct("s3://bucket/data/dv.puffin", 256L, 128L, 42L); DeletionVectorStruct deserialized = TestHelpers.KryoHelpers.roundTripSerialize(dv); diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java index 05013ae54e79..c3ac314c08fe 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java @@ -33,18 +33,10 @@ class TestTrackedFileStruct { @Test void testFieldAccess() { TrackedFileStruct file = new TrackedFileStruct(); - TrackingStruct tracking = new TrackingStruct(); - DeletionVectorStruct dv = new DeletionVectorStruct(DeletionVector.schema()); + TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, 42L); + DeletionVectorStruct dv = new DeletionVectorStruct("s3://bucket/dv.puffin", 100L, 50L, 5L); ManifestInfoStruct info = new ManifestInfoStruct(ManifestInfo.schema()); - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, 42L); - - dv.set(0, "s3://bucket/dv.puffin"); - dv.set(1, 100L); - dv.set(2, 50L); - dv.set(3, 5L); - info.set(0, 10); info.set(1, 20); info.set(2, 3); @@ -90,8 +82,7 @@ void testFieldAccess() { void testReaderSideFields() { TrackedFileStruct file = new TrackedFileStruct(); - TrackingStruct tracking = new TrackingStruct(); - tracking.set(0, EntryStatus.ADDED.id()); + TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, null); tracking.setManifestLocation("s3://bucket/metadata/manifest.avro"); tracking.set(8, 7L); @@ -279,18 +270,12 @@ void testKryoSerializationRoundTrip() throws IOException { } static TrackedFileStruct createFullTrackedFile() { - TrackingStruct tracking = new TrackingStruct(); - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, 42L); + TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, 42L); tracking.set(2, 10L); tracking.setManifestLocation("s3://bucket/manifest.avro"); tracking.set(8, 3L); - DeletionVectorStruct dv = new DeletionVectorStruct(DeletionVector.schema()); - dv.set(0, "s3://bucket/dv.puffin"); - dv.set(1, 100L); - dv.set(2, 50L); - dv.set(3, 5L); + DeletionVectorStruct dv = new DeletionVectorStruct("s3://bucket/dv.puffin", 100L, 50L, 5L); TrackedFileStruct file = new TrackedFileStruct( diff --git a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java index 5af41d0dcf02..15300f4d3db5 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java @@ -98,8 +98,7 @@ void testIsLive() { @Test void testInheritSnapshotId() { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); + TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, null); tracking.inheritFrom(createManifestTracking(100L, 60L)); // snapshotId is null, should inherit from manifest @@ -108,8 +107,7 @@ void testInheritSnapshotId() { @Test void testInheritSequenceNumberForAddedEntries() { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); + TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, null); tracking.inheritFrom(createManifestTracking(100L, 60L)); // sequence numbers are null and status is ADDED, should inherit @@ -119,8 +117,7 @@ void testInheritSequenceNumberForAddedEntries() { @Test void testDoNotInheritSequenceNumberForExistingEntries() { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.EXISTING.id()); + TrackingStruct tracking = new TrackingStruct(EntryStatus.EXISTING, null); tracking.set(2, 5L); tracking.set(3, 6L); tracking.inheritFrom(createManifestTracking(100L, 60L)); @@ -132,9 +129,7 @@ void testDoNotInheritSequenceNumberForExistingEntries() { @Test void testExplicitValuesOverrideInheritance() { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, 200L); + TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, 200L); tracking.set(2, 75L); tracking.set(3, 76L); tracking.inheritFrom(createManifestTracking(100L, 60L)); @@ -163,8 +158,7 @@ void testInheritFromRejectsUnequalSequenceNumbers() { @Test void testNoDefaultingWithoutInheritance() { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); + TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, null); // no inheritance, nulls stay null assertThat(tracking.snapshotId()).isNull(); @@ -173,9 +167,7 @@ void testNoDefaultingWithoutInheritance() { } private static Tracking createManifestTracking(long snapshotId, long sequenceNumber) { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, snapshotId); + TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, snapshotId); tracking.set(2, sequenceNumber); tracking.set(3, sequenceNumber); return tracking; From 7ee9241622ce07a17981dde06b41e18264a317fe Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Fri, 24 Apr 2026 08:33:12 -0700 Subject: [PATCH 02/10] Use builders --- .../apache/iceberg/DeletionVectorStruct.java | 50 +++++-- .../apache/iceberg/ManifestInfoStruct.java | 105 +++++++++++++++ .../org/apache/iceberg/TrackingStruct.java | 76 +++++++++-- .../iceberg/TestDeletionVectorStruct.java | 41 +++++- .../iceberg/TestManifestInfoStruct.java | 125 +++++++++++------- .../apache/iceberg/TestTrackedFileStruct.java | 52 +++++--- .../apache/iceberg/TestTrackingStruct.java | 79 +++++++---- 7 files changed, 412 insertions(+), 116 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java index 65da357453ad..4ca4e57f8b97 100644 --- a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java +++ b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java @@ -21,6 +21,7 @@ import java.io.Serializable; import org.apache.iceberg.avro.SupportsIndexProjection; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Types; /** Mutable {@link StructLike} implementation of {@link DeletionVector}. */ @@ -41,14 +42,6 @@ class DeletionVectorStruct extends SupportsIndexProjection implements DeletionVe super(BASE_TYPE, type); } - DeletionVectorStruct(String location, long offset, long sizeInBytes, long cardinality) { - super(BASE_TYPE.fields().size()); - this.location = location; - this.offset = offset; - this.sizeInBytes = sizeInBytes; - this.cardinality = cardinality; - } - private DeletionVectorStruct(DeletionVectorStruct toCopy) { super(toCopy); this.location = toCopy.location; @@ -132,4 +125,45 @@ public String toString() { .add("cardinality", cardinality) .toString(); } + + static class Builder { + private String location = null; + private long offset = -1L; + private long sizeInBytes = -1L; + private long cardinality = -1L; + + Builder location(String dvLocation) { + this.location = dvLocation; + return this; + } + + Builder offset(long dvOffset) { + this.offset = dvOffset; + return this; + } + + Builder sizeInBytes(long dvSizeInBytes) { + this.sizeInBytes = dvSizeInBytes; + return this; + } + + Builder cardinality(long dvCardinality) { + this.cardinality = dvCardinality; + return this; + } + + DeletionVectorStruct build() { + Preconditions.checkArgument(location != null, "Invalid location: null"); + Preconditions.checkArgument(offset >= 0, "Invalid offset: %s", offset); + Preconditions.checkArgument(sizeInBytes >= 0, "Invalid size in bytes: %s", sizeInBytes); + Preconditions.checkArgument(cardinality >= 0, "Invalid cardinality: %s", cardinality); + + DeletionVectorStruct struct = new DeletionVectorStruct(BASE_TYPE); + struct.location = location; + struct.offset = offset; + struct.sizeInBytes = sizeInBytes; + struct.cardinality = cardinality; + return struct; + } + } } diff --git a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java index 8f51df749e33..613630a05faf 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java +++ b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java @@ -23,6 +23,7 @@ import java.util.Arrays; import org.apache.iceberg.avro.SupportsIndexProjection; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.ByteBuffers; @@ -224,4 +225,108 @@ public String toString() { .add("dv_cardinality", dvCardinality == null ? "null" : dvCardinality) .toString(); } + + static class Builder { + private int addedFilesCount = -1; + private int existingFilesCount = -1; + private int deletedFilesCount = -1; + private int replacedFilesCount = -1; + private long addedRowsCount = -1L; + private long existingRowsCount = -1L; + private long deletedRowsCount = -1L; + private long replacedRowsCount = -1L; + private long minSequenceNumber = -1L; + private byte[] dv = null; + private Long dvCardinality = null; + + Builder addedFilesCount(int count) { + this.addedFilesCount = count; + return this; + } + + Builder existingFilesCount(int count) { + this.existingFilesCount = count; + return this; + } + + Builder deletedFilesCount(int count) { + this.deletedFilesCount = count; + return this; + } + + Builder replacedFilesCount(int count) { + this.replacedFilesCount = count; + return this; + } + + Builder addedRowsCount(long count) { + this.addedRowsCount = count; + return this; + } + + Builder existingRowsCount(long count) { + this.existingRowsCount = count; + return this; + } + + Builder deletedRowsCount(long count) { + this.deletedRowsCount = count; + return this; + } + + Builder replacedRowsCount(long count) { + this.replacedRowsCount = count; + return this; + } + + Builder minSequenceNumber(long sequenceNumber) { + this.minSequenceNumber = sequenceNumber; + return this; + } + + Builder dv(ByteBuffer buffer) { + this.dv = buffer != null ? ByteBuffers.toByteArray(buffer) : null; + return this; + } + + Builder dvCardinality(Long cardinality) { + this.dvCardinality = cardinality; + return this; + } + + ManifestInfoStruct build() { + Preconditions.checkArgument( + addedFilesCount >= 0, "Invalid added files count: %s", addedFilesCount); + Preconditions.checkArgument( + existingFilesCount >= 0, "Invalid existing files count: %s", existingFilesCount); + Preconditions.checkArgument( + deletedFilesCount >= 0, "Invalid deleted files count: %s", deletedFilesCount); + Preconditions.checkArgument( + replacedFilesCount >= 0, "Invalid replaced files count: %s", replacedFilesCount); + Preconditions.checkArgument( + addedRowsCount >= 0, "Invalid added rows count: %s", addedRowsCount); + Preconditions.checkArgument( + existingRowsCount >= 0, "Invalid existing rows count: %s", existingRowsCount); + Preconditions.checkArgument( + deletedRowsCount >= 0, "Invalid deleted rows count: %s", deletedRowsCount); + Preconditions.checkArgument( + replacedRowsCount >= 0, "Invalid replaced rows count: %s", replacedRowsCount); + Preconditions.checkArgument( + minSequenceNumber >= 0, "Invalid min sequence number: %s", minSequenceNumber); + + ManifestInfoStruct struct = new ManifestInfoStruct(BASE_TYPE); + struct.addedFilesCount = addedFilesCount; + struct.existingFilesCount = existingFilesCount; + struct.deletedFilesCount = deletedFilesCount; + struct.replacedFilesCount = replacedFilesCount; + struct.addedRowsCount = addedRowsCount; + struct.existingRowsCount = existingRowsCount; + struct.deletedRowsCount = deletedRowsCount; + struct.replacedRowsCount = replacedRowsCount; + struct.minSequenceNumber = minSequenceNumber; + struct.dv = dv; + struct.dvCardinality = dvCardinality; + return struct; + } + } } diff --git a/core/src/main/java/org/apache/iceberg/TrackingStruct.java b/core/src/main/java/org/apache/iceberg/TrackingStruct.java index 0662c6f8c706..06529929b945 100644 --- a/core/src/main/java/org/apache/iceberg/TrackingStruct.java +++ b/core/src/main/java/org/apache/iceberg/TrackingStruct.java @@ -59,16 +59,6 @@ class TrackingStruct extends SupportsIndexProjection implements Tracking, Serial super(BASE_TYPE, type); } - TrackingStruct() { - super(BASE_TYPE.fields().size()); - } - - TrackingStruct(EntryStatus status, Long snapshotId) { - super(BASE_TYPE.fields().size()); - this.status = status; - this.snapshotId = snapshotId; - } - private TrackingStruct(TrackingStruct toCopy) { super(toCopy); this.status = toCopy.status; @@ -252,4 +242,70 @@ public String toString() { .add("replaced_positions", replacedPositions == null ? "null" : "(binary)") .toString(); } + + static class Builder { + private EntryStatus status = null; + private Long snapshotId = null; + private Long dataSequenceNumber = null; + private Long fileSequenceNumber = null; + private Long dvSnapshotId = null; + private Long firstRowId = null; + private byte[] deletedPositions = null; + private byte[] replacedPositions = null; + + Builder status(EntryStatus entryStatus) { + this.status = entryStatus; + return this; + } + + Builder snapshotId(Long id) { + this.snapshotId = id; + return this; + } + + Builder dataSequenceNumber(Long sequenceNumber) { + this.dataSequenceNumber = sequenceNumber; + return this; + } + + Builder fileSequenceNumber(Long sequenceNumber) { + this.fileSequenceNumber = sequenceNumber; + return this; + } + + Builder dvSnapshotId(Long id) { + this.dvSnapshotId = id; + return this; + } + + Builder firstRowId(Long rowId) { + this.firstRowId = rowId; + return this; + } + + Builder deletedPositions(ByteBuffer positions) { + this.deletedPositions = positions != null ? ByteBuffers.toByteArray(positions) : null; + return this; + } + + Builder replacedPositions(ByteBuffer positions) { + this.replacedPositions = positions != null ? ByteBuffers.toByteArray(positions) : null; + return this; + } + + TrackingStruct build() { + Preconditions.checkArgument(status != null, "Invalid status: null"); + + TrackingStruct struct = new TrackingStruct(BASE_TYPE); + struct.status = status; + struct.snapshotId = snapshotId; + struct.dataSequenceNumber = dataSequenceNumber; + struct.fileSequenceNumber = fileSequenceNumber; + struct.dvSnapshotId = dvSnapshotId; + struct.firstRowId = firstRowId; + struct.deletedPositions = deletedPositions; + struct.replacedPositions = replacedPositions; + return struct; + } + } } diff --git a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java index 9fa850032da4..d5d756d272fd 100644 --- a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java @@ -19,6 +19,7 @@ package org.apache.iceberg; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.IOException; import org.apache.iceberg.types.Types; @@ -29,7 +30,12 @@ class TestDeletionVectorStruct { @Test void testFieldAccess() { DeletionVectorStruct dv = - new DeletionVectorStruct("s3://bucket/data/dv.puffin", 256L, 128L, 42L); + new DeletionVectorStruct.Builder() + .location("s3://bucket/data/dv.puffin") + .offset(256L) + .sizeInBytes(128L) + .cardinality(42L) + .build(); assertThat(dv.location()).isEqualTo("s3://bucket/data/dv.puffin"); assertThat(dv.offset()).isEqualTo(256L); @@ -40,7 +46,12 @@ void testFieldAccess() { @Test void testCopy() { DeletionVectorStruct dv = - new DeletionVectorStruct("s3://bucket/data/dv.puffin", 256L, 128L, 42L); + new DeletionVectorStruct.Builder() + .location("s3://bucket/data/dv.puffin") + .offset(256L) + .sizeInBytes(128L) + .cardinality(42L) + .build(); DeletionVectorStruct copy = dv.copy(); @@ -79,7 +90,12 @@ void testProjectedStructLike() { @Test void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException { DeletionVectorStruct dv = - new DeletionVectorStruct("s3://bucket/data/dv.puffin", 256L, 128L, 42L); + new DeletionVectorStruct.Builder() + .location("s3://bucket/data/dv.puffin") + .offset(256L) + .sizeInBytes(128L) + .cardinality(42L) + .build(); DeletionVectorStruct deserialized = TestHelpers.roundTripSerialize(dv); @@ -92,7 +108,12 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException @Test void testKryoSerializationRoundTrip() throws IOException { DeletionVectorStruct dv = - new DeletionVectorStruct("s3://bucket/data/dv.puffin", 256L, 128L, 42L); + new DeletionVectorStruct.Builder() + .location("s3://bucket/data/dv.puffin") + .offset(256L) + .sizeInBytes(128L) + .cardinality(42L) + .build(); DeletionVectorStruct deserialized = TestHelpers.KryoHelpers.roundTripSerialize(dv); @@ -101,4 +122,16 @@ void testKryoSerializationRoundTrip() throws IOException { assertThat(deserialized.sizeInBytes()).isEqualTo(128L); assertThat(deserialized.cardinality()).isEqualTo(42L); } + + @Test + void testBuildValidatesRequiredFields() { + assertThatThrownBy(() -> new DeletionVectorStruct.Builder().build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid location: null"); + + assertThatThrownBy( + () -> new DeletionVectorStruct.Builder().location("s3://bucket/dv.puffin").build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid offset: -1"); + } } diff --git a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java index 23917de9cd40..8869c08382bf 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java @@ -19,6 +19,7 @@ package org.apache.iceberg; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.IOException; import java.nio.ByteBuffer; @@ -58,19 +59,20 @@ void testFieldAccess() { @Test void testCopy() { - ManifestInfoStruct info = new ManifestInfoStruct(ManifestInfo.schema()); - - info.set(0, 10); - info.set(1, 20); - info.set(2, 3); - info.set(3, 2); - info.set(4, 1000L); - info.set(5, 2000L); - info.set(6, 300L); - info.set(7, 200L); - info.set(8, 5L); - info.set(9, ByteBuffer.wrap(new byte[] {0xF})); - info.set(10, 1L); + ManifestInfoStruct info = + new ManifestInfoStruct.Builder() + .addedFilesCount(10) + .existingFilesCount(20) + .deletedFilesCount(3) + .replacedFilesCount(2) + .addedRowsCount(1000L) + .existingRowsCount(2000L) + .deletedRowsCount(300L) + .replacedRowsCount(200L) + .minSequenceNumber(5L) + .dv(ByteBuffer.wrap(new byte[] {0xF})) + .dvCardinality(1L) + .build(); ManifestInfoStruct copy = info.copy(); @@ -91,17 +93,18 @@ void testCopy() { @Test void testNullableFields() { - ManifestInfoStruct info = new ManifestInfoStruct(ManifestInfo.schema()); - - info.set(0, 0); - info.set(1, 0); - info.set(2, 0); - info.set(3, 0); - info.set(4, 0L); - info.set(5, 0L); - info.set(6, 0L); - info.set(7, 0L); - info.set(8, 0L); + ManifestInfoStruct info = + new ManifestInfoStruct.Builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build(); assertThat(info.dv()).isNull(); assertThat(info.dvCardinality()).isNull(); @@ -129,18 +132,20 @@ void testProjectedStructLike() { @Test void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException { - ManifestInfoStruct info = new ManifestInfoStruct(ManifestInfo.schema()); - info.set(0, 10); - info.set(1, 20); - info.set(2, 3); - info.set(3, 2); - info.set(4, 1000L); - info.set(5, 2000L); - info.set(6, 300L); - info.set(7, 200L); - info.set(8, 5L); - info.set(9, ByteBuffer.wrap(new byte[] {0xF})); - info.set(10, 1L); + ManifestInfoStruct info = + new ManifestInfoStruct.Builder() + .addedFilesCount(10) + .existingFilesCount(20) + .deletedFilesCount(3) + .replacedFilesCount(2) + .addedRowsCount(1000L) + .existingRowsCount(2000L) + .deletedRowsCount(300L) + .replacedRowsCount(200L) + .minSequenceNumber(5L) + .dv(ByteBuffer.wrap(new byte[] {0xF})) + .dvCardinality(1L) + .build(); ManifestInfoStruct deserialized = TestHelpers.roundTripSerialize(info); @@ -159,18 +164,20 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException @Test void testKryoSerializationRoundTrip() throws IOException { - ManifestInfoStruct info = new ManifestInfoStruct(ManifestInfo.schema()); - info.set(0, 10); - info.set(1, 20); - info.set(2, 3); - info.set(3, 2); - info.set(4, 1000L); - info.set(5, 2000L); - info.set(6, 300L); - info.set(7, 200L); - info.set(8, 5L); - info.set(9, ByteBuffer.wrap(new byte[] {0xF})); - info.set(10, 1L); + ManifestInfoStruct info = + new ManifestInfoStruct.Builder() + .addedFilesCount(10) + .existingFilesCount(20) + .deletedFilesCount(3) + .replacedFilesCount(2) + .addedRowsCount(1000L) + .existingRowsCount(2000L) + .deletedRowsCount(300L) + .replacedRowsCount(200L) + .minSequenceNumber(5L) + .dv(ByteBuffer.wrap(new byte[] {0xF})) + .dvCardinality(1L) + .build(); ManifestInfoStruct deserialized = TestHelpers.KryoHelpers.roundTripSerialize(info); @@ -186,4 +193,26 @@ void testKryoSerializationRoundTrip() throws IOException { assertThat(deserialized.dv()).isEqualTo(ByteBuffer.wrap(new byte[] {0xF})); assertThat(deserialized.dvCardinality()).isEqualTo(1L); } + + @Test + void testBuildValidatesRequiredFields() { + assertThatThrownBy(() -> new ManifestInfoStruct.Builder().build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid added files count: -1"); + + assertThatThrownBy( + () -> + new ManifestInfoStruct.Builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid min sequence number: -1"); + } } diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java index c3ac314c08fe..e4d41afd20b6 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java @@ -33,19 +33,27 @@ class TestTrackedFileStruct { @Test void testFieldAccess() { TrackedFileStruct file = new TrackedFileStruct(); - TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, 42L); - DeletionVectorStruct dv = new DeletionVectorStruct("s3://bucket/dv.puffin", 100L, 50L, 5L); - ManifestInfoStruct info = new ManifestInfoStruct(ManifestInfo.schema()); - - info.set(0, 10); - info.set(1, 20); - info.set(2, 3); - info.set(3, 2); - info.set(4, 1000L); - info.set(5, 2000L); - info.set(6, 300L); - info.set(7, 200L); - info.set(8, 5L); + TrackingStruct tracking = + new TrackingStruct.Builder().status(EntryStatus.ADDED).snapshotId(42L).build(); + DeletionVectorStruct dv = + new DeletionVectorStruct.Builder() + .location("s3://bucket/dv.puffin") + .offset(100L) + .sizeInBytes(50L) + .cardinality(5L) + .build(); + ManifestInfoStruct info = + new ManifestInfoStruct.Builder() + .addedFilesCount(10) + .existingFilesCount(20) + .deletedFilesCount(3) + .replacedFilesCount(2) + .addedRowsCount(1000L) + .existingRowsCount(2000L) + .deletedRowsCount(300L) + .replacedRowsCount(200L) + .minSequenceNumber(5L) + .build(); file.set(0, tracking); file.set(1, FileContent.EQUALITY_DELETES.id()); @@ -82,7 +90,7 @@ void testFieldAccess() { void testReaderSideFields() { TrackedFileStruct file = new TrackedFileStruct(); - TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, null); + TrackingStruct tracking = new TrackingStruct.Builder().status(EntryStatus.ADDED).build(); tracking.setManifestLocation("s3://bucket/metadata/manifest.avro"); tracking.set(8, 7L); @@ -270,12 +278,22 @@ void testKryoSerializationRoundTrip() throws IOException { } static TrackedFileStruct createFullTrackedFile() { - TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, 42L); - tracking.set(2, 10L); + TrackingStruct tracking = + new TrackingStruct.Builder() + .status(EntryStatus.ADDED) + .snapshotId(42L) + .dataSequenceNumber(10L) + .build(); tracking.setManifestLocation("s3://bucket/manifest.avro"); tracking.set(8, 3L); - DeletionVectorStruct dv = new DeletionVectorStruct("s3://bucket/dv.puffin", 100L, 50L, 5L); + DeletionVectorStruct dv = + new DeletionVectorStruct.Builder() + .location("s3://bucket/dv.puffin") + .offset(100L) + .sizeInBytes(50L) + .cardinality(5L) + .build(); TrackedFileStruct file = new TrackedFileStruct( diff --git a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java index 15300f4d3db5..67e10c277e86 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java @@ -53,12 +53,13 @@ void testFieldAccess() { @Test void testCopy() { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, 42L); - tracking.set(2, 10L); - tracking.set(6, ByteBuffer.wrap(new byte[] {1, 2})); + TrackingStruct tracking = + new TrackingStruct.Builder() + .status(EntryStatus.ADDED) + .snapshotId(42L) + .dataSequenceNumber(10L) + .deletedPositions(ByteBuffer.wrap(new byte[] {1, 2})) + .build(); TrackingStruct copy = tracking.copy(); @@ -98,7 +99,7 @@ void testIsLive() { @Test void testInheritSnapshotId() { - TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, null); + TrackingStruct tracking = new TrackingStruct.Builder().status(EntryStatus.ADDED).build(); tracking.inheritFrom(createManifestTracking(100L, 60L)); // snapshotId is null, should inherit from manifest @@ -107,7 +108,7 @@ void testInheritSnapshotId() { @Test void testInheritSequenceNumberForAddedEntries() { - TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, null); + TrackingStruct tracking = new TrackingStruct.Builder().status(EntryStatus.ADDED).build(); tracking.inheritFrom(createManifestTracking(100L, 60L)); // sequence numbers are null and status is ADDED, should inherit @@ -117,9 +118,12 @@ void testInheritSequenceNumberForAddedEntries() { @Test void testDoNotInheritSequenceNumberForExistingEntries() { - TrackingStruct tracking = new TrackingStruct(EntryStatus.EXISTING, null); - tracking.set(2, 5L); - tracking.set(3, 6L); + TrackingStruct tracking = + new TrackingStruct.Builder() + .status(EntryStatus.EXISTING) + .dataSequenceNumber(5L) + .fileSequenceNumber(6L) + .build(); tracking.inheritFrom(createManifestTracking(100L, 60L)); // sequence numbers are not inherited for EXISTING entries @@ -129,9 +133,13 @@ void testDoNotInheritSequenceNumberForExistingEntries() { @Test void testExplicitValuesOverrideInheritance() { - TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, 200L); - tracking.set(2, 75L); - tracking.set(3, 76L); + TrackingStruct tracking = + new TrackingStruct.Builder() + .status(EntryStatus.ADDED) + .snapshotId(200L) + .dataSequenceNumber(75L) + .fileSequenceNumber(76L) + .build(); tracking.inheritFrom(createManifestTracking(100L, 60L)); // explicit values should take precedence @@ -158,7 +166,7 @@ void testInheritFromRejectsUnequalSequenceNumbers() { @Test void testNoDefaultingWithoutInheritance() { - TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, null); + TrackingStruct tracking = new TrackingStruct.Builder().status(EntryStatus.ADDED).build(); // no inheritance, nulls stay null assertThat(tracking.snapshotId()).isNull(); @@ -166,11 +174,20 @@ void testNoDefaultingWithoutInheritance() { assertThat(tracking.fileSequenceNumber()).isNull(); } + @Test + void testBuildValidatesRequiredFields() { + assertThatThrownBy(() -> new TrackingStruct.Builder().build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid status: null"); + } + private static Tracking createManifestTracking(long snapshotId, long sequenceNumber) { - TrackingStruct tracking = new TrackingStruct(EntryStatus.ADDED, snapshotId); - tracking.set(2, sequenceNumber); - tracking.set(3, sequenceNumber); - return tracking; + return new TrackingStruct.Builder() + .status(EntryStatus.ADDED) + .snapshotId(snapshotId) + .dataSequenceNumber(sequenceNumber) + .fileSequenceNumber(sequenceNumber) + .build(); } @Test @@ -194,11 +211,13 @@ void testProjectedStructLike() { @Test void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, 42L); - tracking.set(2, 10L); - tracking.set(6, ByteBuffer.wrap(new byte[] {1, 2})); + TrackingStruct tracking = + new TrackingStruct.Builder() + .status(EntryStatus.ADDED) + .snapshotId(42L) + .dataSequenceNumber(10L) + .deletedPositions(ByteBuffer.wrap(new byte[] {1, 2})) + .build(); TrackingStruct deserialized = TestHelpers.roundTripSerialize(tracking); @@ -210,11 +229,13 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException @Test void testKryoSerializationRoundTrip() throws IOException { - TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, 42L); - tracking.set(2, 10L); - tracking.set(6, ByteBuffer.wrap(new byte[] {1, 2})); + TrackingStruct tracking = + new TrackingStruct.Builder() + .status(EntryStatus.ADDED) + .snapshotId(42L) + .dataSequenceNumber(10L) + .deletedPositions(ByteBuffer.wrap(new byte[] {1, 2})) + .build(); TrackingStruct deserialized = TestHelpers.KryoHelpers.roundTripSerialize(tracking); From 2da1af6d330f56611e0910fa4c6e6d80acf06c77 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Fri, 24 Apr 2026 08:38:13 -0700 Subject: [PATCH 03/10] Remove the validation to be consistent with BaseFile --- .../apache/iceberg/DeletionVectorStruct.java | 6 ----- .../apache/iceberg/ManifestInfoStruct.java | 20 ---------------- .../org/apache/iceberg/TrackingStruct.java | 2 -- .../iceberg/TestDeletionVectorStruct.java | 13 ----------- .../iceberg/TestManifestInfoStruct.java | 23 ------------------- .../apache/iceberg/TestTrackingStruct.java | 7 ------ 6 files changed, 71 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java index 4ca4e57f8b97..e8cd50d42bbe 100644 --- a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java +++ b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java @@ -21,7 +21,6 @@ import java.io.Serializable; import org.apache.iceberg.avro.SupportsIndexProjection; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Types; /** Mutable {@link StructLike} implementation of {@link DeletionVector}. */ @@ -153,11 +152,6 @@ Builder cardinality(long dvCardinality) { } DeletionVectorStruct build() { - Preconditions.checkArgument(location != null, "Invalid location: null"); - Preconditions.checkArgument(offset >= 0, "Invalid offset: %s", offset); - Preconditions.checkArgument(sizeInBytes >= 0, "Invalid size in bytes: %s", sizeInBytes); - Preconditions.checkArgument(cardinality >= 0, "Invalid cardinality: %s", cardinality); - DeletionVectorStruct struct = new DeletionVectorStruct(BASE_TYPE); struct.location = location; struct.offset = offset; diff --git a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java index 613630a05faf..01d6d48ba5fe 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java +++ b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java @@ -23,7 +23,6 @@ import java.util.Arrays; import org.apache.iceberg.avro.SupportsIndexProjection; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.ByteBuffers; @@ -295,25 +294,6 @@ Builder dvCardinality(Long cardinality) { } ManifestInfoStruct build() { - Preconditions.checkArgument( - addedFilesCount >= 0, "Invalid added files count: %s", addedFilesCount); - Preconditions.checkArgument( - existingFilesCount >= 0, "Invalid existing files count: %s", existingFilesCount); - Preconditions.checkArgument( - deletedFilesCount >= 0, "Invalid deleted files count: %s", deletedFilesCount); - Preconditions.checkArgument( - replacedFilesCount >= 0, "Invalid replaced files count: %s", replacedFilesCount); - Preconditions.checkArgument( - addedRowsCount >= 0, "Invalid added rows count: %s", addedRowsCount); - Preconditions.checkArgument( - existingRowsCount >= 0, "Invalid existing rows count: %s", existingRowsCount); - Preconditions.checkArgument( - deletedRowsCount >= 0, "Invalid deleted rows count: %s", deletedRowsCount); - Preconditions.checkArgument( - replacedRowsCount >= 0, "Invalid replaced rows count: %s", replacedRowsCount); - Preconditions.checkArgument( - minSequenceNumber >= 0, "Invalid min sequence number: %s", minSequenceNumber); - ManifestInfoStruct struct = new ManifestInfoStruct(BASE_TYPE); struct.addedFilesCount = addedFilesCount; struct.existingFilesCount = existingFilesCount; diff --git a/core/src/main/java/org/apache/iceberg/TrackingStruct.java b/core/src/main/java/org/apache/iceberg/TrackingStruct.java index 06529929b945..c903f7349e0d 100644 --- a/core/src/main/java/org/apache/iceberg/TrackingStruct.java +++ b/core/src/main/java/org/apache/iceberg/TrackingStruct.java @@ -294,8 +294,6 @@ Builder replacedPositions(ByteBuffer positions) { } TrackingStruct build() { - Preconditions.checkArgument(status != null, "Invalid status: null"); - TrackingStruct struct = new TrackingStruct(BASE_TYPE); struct.status = status; struct.snapshotId = snapshotId; diff --git a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java index d5d756d272fd..44b1ceacfb4a 100644 --- a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java @@ -19,7 +19,6 @@ package org.apache.iceberg; import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.IOException; import org.apache.iceberg.types.Types; @@ -122,16 +121,4 @@ void testKryoSerializationRoundTrip() throws IOException { assertThat(deserialized.sizeInBytes()).isEqualTo(128L); assertThat(deserialized.cardinality()).isEqualTo(42L); } - - @Test - void testBuildValidatesRequiredFields() { - assertThatThrownBy(() -> new DeletionVectorStruct.Builder().build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid location: null"); - - assertThatThrownBy( - () -> new DeletionVectorStruct.Builder().location("s3://bucket/dv.puffin").build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid offset: -1"); - } } diff --git a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java index 8869c08382bf..fea7652ba199 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java @@ -19,7 +19,6 @@ package org.apache.iceberg; import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.IOException; import java.nio.ByteBuffer; @@ -193,26 +192,4 @@ void testKryoSerializationRoundTrip() throws IOException { assertThat(deserialized.dv()).isEqualTo(ByteBuffer.wrap(new byte[] {0xF})); assertThat(deserialized.dvCardinality()).isEqualTo(1L); } - - @Test - void testBuildValidatesRequiredFields() { - assertThatThrownBy(() -> new ManifestInfoStruct.Builder().build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid added files count: -1"); - - assertThatThrownBy( - () -> - new ManifestInfoStruct.Builder() - .addedFilesCount(0) - .existingFilesCount(0) - .deletedFilesCount(0) - .replacedFilesCount(0) - .addedRowsCount(0L) - .existingRowsCount(0L) - .deletedRowsCount(0L) - .replacedRowsCount(0L) - .build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid min sequence number: -1"); - } } diff --git a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java index 67e10c277e86..f2dabbd6e79a 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java @@ -174,13 +174,6 @@ void testNoDefaultingWithoutInheritance() { assertThat(tracking.fileSequenceNumber()).isNull(); } - @Test - void testBuildValidatesRequiredFields() { - assertThatThrownBy(() -> new TrackingStruct.Builder().build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid status: null"); - } - private static Tracking createManifestTracking(long snapshotId, long sequenceNumber) { return new TrackingStruct.Builder() .status(EntryStatus.ADDED) From 6363aeff6ee75809581750ec5e68c5266880a45d Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Mon, 27 Apr 2026 07:30:53 -0700 Subject: [PATCH 04/10] Feedback from Eduard --- .../apache/iceberg/DeletionVectorStruct.java | 19 +++++-- .../apache/iceberg/ManifestInfoStruct.java | 55 ++++++++++++++----- .../org/apache/iceberg/TrackingStruct.java | 43 +++++++++++---- .../iceberg/TestDeletionVectorStruct.java | 8 +-- .../iceberg/TestManifestInfoStruct.java | 8 +-- .../apache/iceberg/TestTrackedFileStruct.java | 12 ++-- .../apache/iceberg/TestTrackingStruct.java | 18 +++--- 7 files changed, 111 insertions(+), 52 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java index e8cd50d42bbe..554a096a4141 100644 --- a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java +++ b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java @@ -49,6 +49,14 @@ private DeletionVectorStruct(DeletionVectorStruct toCopy) { this.cardinality = toCopy.cardinality; } + private DeletionVectorStruct(String location, long offset, long sizeInBytes, long cardinality) { + super(BASE_TYPE, BASE_TYPE); + this.location = location; + this.offset = offset; + this.sizeInBytes = sizeInBytes; + this.cardinality = cardinality; + } + @Override public String location() { return location; @@ -115,6 +123,10 @@ protected void internalSet(int pos, T value) { } } + static Builder builder() { + return new Builder(); + } + @Override public String toString() { return MoreObjects.toStringHelper(this) @@ -152,12 +164,7 @@ Builder cardinality(long dvCardinality) { } DeletionVectorStruct build() { - DeletionVectorStruct struct = new DeletionVectorStruct(BASE_TYPE); - struct.location = location; - struct.offset = offset; - struct.sizeInBytes = sizeInBytes; - struct.cardinality = cardinality; - return struct; + return new DeletionVectorStruct(location, offset, sizeInBytes, cardinality); } } } diff --git a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java index 01d6d48ba5fe..98446ad805d9 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java +++ b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java @@ -73,6 +73,32 @@ private ManifestInfoStruct(ManifestInfoStruct toCopy) { this.dvCardinality = toCopy.dvCardinality; } + private ManifestInfoStruct( + int addedFilesCount, + int existingFilesCount, + int deletedFilesCount, + int replacedFilesCount, + long addedRowsCount, + long existingRowsCount, + long deletedRowsCount, + long replacedRowsCount, + long minSequenceNumber, + byte[] dv, + Long dvCardinality) { + super(BASE_TYPE, BASE_TYPE); + this.addedFilesCount = addedFilesCount; + this.existingFilesCount = existingFilesCount; + this.deletedFilesCount = deletedFilesCount; + this.replacedFilesCount = replacedFilesCount; + this.addedRowsCount = addedRowsCount; + this.existingRowsCount = existingRowsCount; + this.deletedRowsCount = deletedRowsCount; + this.replacedRowsCount = replacedRowsCount; + this.minSequenceNumber = minSequenceNumber; + this.dv = dv; + this.dvCardinality = dvCardinality; + } + @Override public int addedFilesCount() { return addedFilesCount; @@ -208,6 +234,10 @@ protected void internalSet(int pos, T value) { } } + static Builder builder() { + return new Builder(); + } + @Override public String toString() { return MoreObjects.toStringHelper(this) @@ -294,19 +324,18 @@ Builder dvCardinality(Long cardinality) { } ManifestInfoStruct build() { - ManifestInfoStruct struct = new ManifestInfoStruct(BASE_TYPE); - struct.addedFilesCount = addedFilesCount; - struct.existingFilesCount = existingFilesCount; - struct.deletedFilesCount = deletedFilesCount; - struct.replacedFilesCount = replacedFilesCount; - struct.addedRowsCount = addedRowsCount; - struct.existingRowsCount = existingRowsCount; - struct.deletedRowsCount = deletedRowsCount; - struct.replacedRowsCount = replacedRowsCount; - struct.minSequenceNumber = minSequenceNumber; - struct.dv = dv; - struct.dvCardinality = dvCardinality; - return struct; + return new ManifestInfoStruct( + addedFilesCount, + existingFilesCount, + deletedFilesCount, + replacedFilesCount, + addedRowsCount, + existingRowsCount, + deletedRowsCount, + replacedRowsCount, + minSequenceNumber, + dv, + dvCardinality); } } } diff --git a/core/src/main/java/org/apache/iceberg/TrackingStruct.java b/core/src/main/java/org/apache/iceberg/TrackingStruct.java index c903f7349e0d..a020e85962d7 100644 --- a/core/src/main/java/org/apache/iceberg/TrackingStruct.java +++ b/core/src/main/java/org/apache/iceberg/TrackingStruct.java @@ -79,6 +79,26 @@ private TrackingStruct(TrackingStruct toCopy) { this.manifestPos = toCopy.manifestPos; } + private TrackingStruct( + EntryStatus status, + Long snapshotId, + Long dataSequenceNumber, + Long fileSequenceNumber, + Long dvSnapshotId, + Long firstRowId, + byte[] deletedPositions, + byte[] replacedPositions) { + super(BASE_TYPE, BASE_TYPE); + this.status = status; + this.snapshotId = snapshotId; + this.dataSequenceNumber = dataSequenceNumber; + this.fileSequenceNumber = fileSequenceNumber; + this.dvSnapshotId = dvSnapshotId; + this.firstRowId = firstRowId; + this.deletedPositions = deletedPositions; + this.replacedPositions = replacedPositions; + } + void inheritFrom(Tracking manifestTracking) { if (manifestTracking != null) { if (snapshotId == null) { @@ -229,6 +249,10 @@ protected void internalSet(int pos, T value) { } } + static Builder builder() { + return new Builder(); + } + @Override public String toString() { return MoreObjects.toStringHelper(this) @@ -294,16 +318,15 @@ Builder replacedPositions(ByteBuffer positions) { } TrackingStruct build() { - TrackingStruct struct = new TrackingStruct(BASE_TYPE); - struct.status = status; - struct.snapshotId = snapshotId; - struct.dataSequenceNumber = dataSequenceNumber; - struct.fileSequenceNumber = fileSequenceNumber; - struct.dvSnapshotId = dvSnapshotId; - struct.firstRowId = firstRowId; - struct.deletedPositions = deletedPositions; - struct.replacedPositions = replacedPositions; - return struct; + return new TrackingStruct( + status, + snapshotId, + dataSequenceNumber, + fileSequenceNumber, + dvSnapshotId, + firstRowId, + deletedPositions, + replacedPositions); } } } diff --git a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java index 44b1ceacfb4a..998577f36838 100644 --- a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java @@ -29,7 +29,7 @@ class TestDeletionVectorStruct { @Test void testFieldAccess() { DeletionVectorStruct dv = - new DeletionVectorStruct.Builder() + DeletionVectorStruct.builder() .location("s3://bucket/data/dv.puffin") .offset(256L) .sizeInBytes(128L) @@ -45,7 +45,7 @@ void testFieldAccess() { @Test void testCopy() { DeletionVectorStruct dv = - new DeletionVectorStruct.Builder() + DeletionVectorStruct.builder() .location("s3://bucket/data/dv.puffin") .offset(256L) .sizeInBytes(128L) @@ -89,7 +89,7 @@ void testProjectedStructLike() { @Test void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException { DeletionVectorStruct dv = - new DeletionVectorStruct.Builder() + DeletionVectorStruct.builder() .location("s3://bucket/data/dv.puffin") .offset(256L) .sizeInBytes(128L) @@ -107,7 +107,7 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException @Test void testKryoSerializationRoundTrip() throws IOException { DeletionVectorStruct dv = - new DeletionVectorStruct.Builder() + DeletionVectorStruct.builder() .location("s3://bucket/data/dv.puffin") .offset(256L) .sizeInBytes(128L) diff --git a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java index fea7652ba199..0d65df2e9904 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java @@ -59,7 +59,7 @@ void testFieldAccess() { @Test void testCopy() { ManifestInfoStruct info = - new ManifestInfoStruct.Builder() + ManifestInfoStruct.builder() .addedFilesCount(10) .existingFilesCount(20) .deletedFilesCount(3) @@ -93,7 +93,7 @@ void testCopy() { @Test void testNullableFields() { ManifestInfoStruct info = - new ManifestInfoStruct.Builder() + ManifestInfoStruct.builder() .addedFilesCount(0) .existingFilesCount(0) .deletedFilesCount(0) @@ -132,7 +132,7 @@ void testProjectedStructLike() { @Test void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException { ManifestInfoStruct info = - new ManifestInfoStruct.Builder() + ManifestInfoStruct.builder() .addedFilesCount(10) .existingFilesCount(20) .deletedFilesCount(3) @@ -164,7 +164,7 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException @Test void testKryoSerializationRoundTrip() throws IOException { ManifestInfoStruct info = - new ManifestInfoStruct.Builder() + ManifestInfoStruct.builder() .addedFilesCount(10) .existingFilesCount(20) .deletedFilesCount(3) diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java index e4d41afd20b6..62324e5607ef 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java @@ -34,16 +34,16 @@ class TestTrackedFileStruct { void testFieldAccess() { TrackedFileStruct file = new TrackedFileStruct(); TrackingStruct tracking = - new TrackingStruct.Builder().status(EntryStatus.ADDED).snapshotId(42L).build(); + TrackingStruct.builder().status(EntryStatus.ADDED).snapshotId(42L).build(); DeletionVectorStruct dv = - new DeletionVectorStruct.Builder() + DeletionVectorStruct.builder() .location("s3://bucket/dv.puffin") .offset(100L) .sizeInBytes(50L) .cardinality(5L) .build(); ManifestInfoStruct info = - new ManifestInfoStruct.Builder() + ManifestInfoStruct.builder() .addedFilesCount(10) .existingFilesCount(20) .deletedFilesCount(3) @@ -90,7 +90,7 @@ void testFieldAccess() { void testReaderSideFields() { TrackedFileStruct file = new TrackedFileStruct(); - TrackingStruct tracking = new TrackingStruct.Builder().status(EntryStatus.ADDED).build(); + TrackingStruct tracking = TrackingStruct.builder().status(EntryStatus.ADDED).build(); tracking.setManifestLocation("s3://bucket/metadata/manifest.avro"); tracking.set(8, 7L); @@ -279,7 +279,7 @@ void testKryoSerializationRoundTrip() throws IOException { static TrackedFileStruct createFullTrackedFile() { TrackingStruct tracking = - new TrackingStruct.Builder() + TrackingStruct.builder() .status(EntryStatus.ADDED) .snapshotId(42L) .dataSequenceNumber(10L) @@ -288,7 +288,7 @@ static TrackedFileStruct createFullTrackedFile() { tracking.set(8, 3L); DeletionVectorStruct dv = - new DeletionVectorStruct.Builder() + DeletionVectorStruct.builder() .location("s3://bucket/dv.puffin") .offset(100L) .sizeInBytes(50L) diff --git a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java index f2dabbd6e79a..abdaa700dd11 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java @@ -54,7 +54,7 @@ void testFieldAccess() { @Test void testCopy() { TrackingStruct tracking = - new TrackingStruct.Builder() + TrackingStruct.builder() .status(EntryStatus.ADDED) .snapshotId(42L) .dataSequenceNumber(10L) @@ -99,7 +99,7 @@ void testIsLive() { @Test void testInheritSnapshotId() { - TrackingStruct tracking = new TrackingStruct.Builder().status(EntryStatus.ADDED).build(); + TrackingStruct tracking = TrackingStruct.builder().status(EntryStatus.ADDED).build(); tracking.inheritFrom(createManifestTracking(100L, 60L)); // snapshotId is null, should inherit from manifest @@ -108,7 +108,7 @@ void testInheritSnapshotId() { @Test void testInheritSequenceNumberForAddedEntries() { - TrackingStruct tracking = new TrackingStruct.Builder().status(EntryStatus.ADDED).build(); + TrackingStruct tracking = TrackingStruct.builder().status(EntryStatus.ADDED).build(); tracking.inheritFrom(createManifestTracking(100L, 60L)); // sequence numbers are null and status is ADDED, should inherit @@ -119,7 +119,7 @@ void testInheritSequenceNumberForAddedEntries() { @Test void testDoNotInheritSequenceNumberForExistingEntries() { TrackingStruct tracking = - new TrackingStruct.Builder() + TrackingStruct.builder() .status(EntryStatus.EXISTING) .dataSequenceNumber(5L) .fileSequenceNumber(6L) @@ -134,7 +134,7 @@ void testDoNotInheritSequenceNumberForExistingEntries() { @Test void testExplicitValuesOverrideInheritance() { TrackingStruct tracking = - new TrackingStruct.Builder() + TrackingStruct.builder() .status(EntryStatus.ADDED) .snapshotId(200L) .dataSequenceNumber(75L) @@ -166,7 +166,7 @@ void testInheritFromRejectsUnequalSequenceNumbers() { @Test void testNoDefaultingWithoutInheritance() { - TrackingStruct tracking = new TrackingStruct.Builder().status(EntryStatus.ADDED).build(); + TrackingStruct tracking = TrackingStruct.builder().status(EntryStatus.ADDED).build(); // no inheritance, nulls stay null assertThat(tracking.snapshotId()).isNull(); @@ -175,7 +175,7 @@ void testNoDefaultingWithoutInheritance() { } private static Tracking createManifestTracking(long snapshotId, long sequenceNumber) { - return new TrackingStruct.Builder() + return TrackingStruct.builder() .status(EntryStatus.ADDED) .snapshotId(snapshotId) .dataSequenceNumber(sequenceNumber) @@ -205,7 +205,7 @@ void testProjectedStructLike() { @Test void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException { TrackingStruct tracking = - new TrackingStruct.Builder() + TrackingStruct.builder() .status(EntryStatus.ADDED) .snapshotId(42L) .dataSequenceNumber(10L) @@ -223,7 +223,7 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException @Test void testKryoSerializationRoundTrip() throws IOException { TrackingStruct tracking = - new TrackingStruct.Builder() + TrackingStruct.builder() .status(EntryStatus.ADDED) .snapshotId(42L) .dataSequenceNumber(10L) From 0cf56f631edfdab8ee61abd9ad27ff7c07b5a5f9 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Mon, 27 Apr 2026 07:56:07 -0700 Subject: [PATCH 05/10] Add validation --- .../apache/iceberg/DeletionVectorStruct.java | 5 + .../apache/iceberg/ManifestInfoStruct.java | 19 +++ .../org/apache/iceberg/TrackingStruct.java | 1 + .../iceberg/TestDeletionVectorStruct.java | 39 +++++ .../iceberg/TestManifestInfoStruct.java | 139 ++++++++++++++++++ .../apache/iceberg/TestTrackingStruct.java | 7 + 6 files changed, 210 insertions(+) diff --git a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java index 554a096a4141..c42e6a59b5a1 100644 --- a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java +++ b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java @@ -21,6 +21,7 @@ import java.io.Serializable; import org.apache.iceberg.avro.SupportsIndexProjection; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Types; /** Mutable {@link StructLike} implementation of {@link DeletionVector}. */ @@ -164,6 +165,10 @@ Builder cardinality(long dvCardinality) { } DeletionVectorStruct build() { + Preconditions.checkArgument(location != null, "Invalid location: null"); + Preconditions.checkArgument(offset >= 0, "Invalid offset: %s", offset); + Preconditions.checkArgument(sizeInBytes >= 0, "Invalid size in bytes: %s", sizeInBytes); + Preconditions.checkArgument(cardinality >= 0, "Invalid cardinality: %s", cardinality); return new DeletionVectorStruct(location, offset, sizeInBytes, cardinality); } } diff --git a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java index 98446ad805d9..d3177eb1e2b2 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java +++ b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java @@ -23,6 +23,7 @@ import java.util.Arrays; import org.apache.iceberg.avro.SupportsIndexProjection; import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.ByteBuffers; @@ -324,6 +325,24 @@ Builder dvCardinality(Long cardinality) { } ManifestInfoStruct build() { + Preconditions.checkArgument( + addedFilesCount >= 0, "Invalid added files count: %s", addedFilesCount); + Preconditions.checkArgument( + existingFilesCount >= 0, "Invalid existing files count: %s", existingFilesCount); + Preconditions.checkArgument( + deletedFilesCount >= 0, "Invalid deleted files count: %s", deletedFilesCount); + Preconditions.checkArgument( + replacedFilesCount >= 0, "Invalid replaced files count: %s", replacedFilesCount); + Preconditions.checkArgument( + addedRowsCount >= 0, "Invalid added rows count: %s", addedRowsCount); + Preconditions.checkArgument( + existingRowsCount >= 0, "Invalid existing rows count: %s", existingRowsCount); + Preconditions.checkArgument( + deletedRowsCount >= 0, "Invalid deleted rows count: %s", deletedRowsCount); + Preconditions.checkArgument( + replacedRowsCount >= 0, "Invalid replaced rows count: %s", replacedRowsCount); + Preconditions.checkArgument( + minSequenceNumber >= 0, "Invalid min sequence number: %s", minSequenceNumber); return new ManifestInfoStruct( addedFilesCount, existingFilesCount, diff --git a/core/src/main/java/org/apache/iceberg/TrackingStruct.java b/core/src/main/java/org/apache/iceberg/TrackingStruct.java index a020e85962d7..a658cd2bcd0f 100644 --- a/core/src/main/java/org/apache/iceberg/TrackingStruct.java +++ b/core/src/main/java/org/apache/iceberg/TrackingStruct.java @@ -318,6 +318,7 @@ Builder replacedPositions(ByteBuffer positions) { } TrackingStruct build() { + Preconditions.checkArgument(status != null, "Invalid status: null"); return new TrackingStruct( status, snapshotId, diff --git a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java index 998577f36838..effc03986b1c 100644 --- a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java @@ -19,6 +19,7 @@ package org.apache.iceberg; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.IOException; import org.apache.iceberg.types.Types; @@ -104,6 +105,44 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException assertThat(deserialized.cardinality()).isEqualTo(42L); } + @Test + void testBuilderValidation() { + assertThatThrownBy( + () -> DeletionVectorStruct.builder().offset(0).sizeInBytes(1).cardinality(1).build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid location: null"); + + assertThatThrownBy( + () -> + DeletionVectorStruct.builder() + .location("s3://bucket/dv.puffin") + .sizeInBytes(1) + .cardinality(1) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid offset: -1"); + + assertThatThrownBy( + () -> + DeletionVectorStruct.builder() + .location("s3://bucket/dv.puffin") + .offset(0) + .cardinality(1) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid size in bytes: -1"); + + assertThatThrownBy( + () -> + DeletionVectorStruct.builder() + .location("s3://bucket/dv.puffin") + .offset(0) + .sizeInBytes(1) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid cardinality: -1"); + } + @Test void testKryoSerializationRoundTrip() throws IOException { DeletionVectorStruct dv = diff --git a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java index 0d65df2e9904..4bb7910a9f4c 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java @@ -19,6 +19,7 @@ package org.apache.iceberg; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; import java.io.IOException; import java.nio.ByteBuffer; @@ -161,6 +162,144 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException assertThat(deserialized.dvCardinality()).isEqualTo(1L); } + @Test + void testBuilderValidation() { + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid added files count: -1"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid existing files count: -1"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid deleted files count: -1"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid replaced files count: -1"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid added rows count: -1"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid existing rows count: -1"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid deleted rows count: -1"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .minSequenceNumber(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid replaced rows count: -1"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid min sequence number: -1"); + } + @Test void testKryoSerializationRoundTrip() throws IOException { ManifestInfoStruct info = diff --git a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java index abdaa700dd11..b6d702ca0a49 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java @@ -183,6 +183,13 @@ private static Tracking createManifestTracking(long snapshotId, long sequenceNum .build(); } + @Test + void testBuilderValidation() { + assertThatThrownBy(() -> TrackingStruct.builder().build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid status: null"); + } + @Test void testProjectedStructLike() { // project only snapshot_id (field ID 1) and first_row_id (field ID 142) From d453f43705e6aaa5b2413c7fa64371bd675f91ac Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Tue, 28 Apr 2026 09:05:34 -0700 Subject: [PATCH 06/10] Feedback from Eduard --- .../apache/iceberg/DeletionVectorStruct.java | 8 +++-- .../apache/iceberg/ManifestInfoStruct.java | 35 ++++++++++++++----- .../org/apache/iceberg/TrackingStruct.java | 10 ++++++ .../iceberg/TestDeletionVectorStruct.java | 6 ++-- .../iceberg/TestManifestInfoStruct.java | 24 ++++++------- .../apache/iceberg/TestTrackingStruct.java | 6 ++-- 6 files changed, 59 insertions(+), 30 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java index c42e6a59b5a1..0eb7c2fe1eb6 100644 --- a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java +++ b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java @@ -166,9 +166,11 @@ Builder cardinality(long dvCardinality) { DeletionVectorStruct build() { Preconditions.checkArgument(location != null, "Invalid location: null"); - Preconditions.checkArgument(offset >= 0, "Invalid offset: %s", offset); - Preconditions.checkArgument(sizeInBytes >= 0, "Invalid size in bytes: %s", sizeInBytes); - Preconditions.checkArgument(cardinality >= 0, "Invalid cardinality: %s", cardinality); + Preconditions.checkArgument(offset >= 0, "Invalid offset: %s (must be >= 0)", offset); + Preconditions.checkArgument( + sizeInBytes >= 0, "Invalid size in bytes: %s (must be >= 0)", sizeInBytes); + Preconditions.checkArgument( + cardinality >= 0, "Invalid cardinality: %s (must be >= 0)", cardinality); return new DeletionVectorStruct(location, offset, sizeInBytes, cardinality); } } diff --git a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java index d3177eb1e2b2..bfc0e4dba19a 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java +++ b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java @@ -319,6 +319,11 @@ Builder dv(ByteBuffer buffer) { return this; } + Builder dv(byte[] buffer) { + this.dv = buffer; + return this; + } + Builder dvCardinality(Long cardinality) { this.dvCardinality = cardinality; return this; @@ -326,23 +331,35 @@ Builder dvCardinality(Long cardinality) { ManifestInfoStruct build() { Preconditions.checkArgument( - addedFilesCount >= 0, "Invalid added files count: %s", addedFilesCount); + addedFilesCount >= 0, "Invalid added files count: %s (must be >= 0)", addedFilesCount); Preconditions.checkArgument( - existingFilesCount >= 0, "Invalid existing files count: %s", existingFilesCount); + existingFilesCount >= 0, + "Invalid existing files count: %s (must be >= 0)", + existingFilesCount); Preconditions.checkArgument( - deletedFilesCount >= 0, "Invalid deleted files count: %s", deletedFilesCount); + deletedFilesCount >= 0, + "Invalid deleted files count: %s (must be >= 0)", + deletedFilesCount); Preconditions.checkArgument( - replacedFilesCount >= 0, "Invalid replaced files count: %s", replacedFilesCount); + replacedFilesCount >= 0, + "Invalid replaced files count: %s (must be >= 0)", + replacedFilesCount); Preconditions.checkArgument( - addedRowsCount >= 0, "Invalid added rows count: %s", addedRowsCount); + addedRowsCount >= 0, "Invalid added rows count: %s (must be >= 0)", addedRowsCount); Preconditions.checkArgument( - existingRowsCount >= 0, "Invalid existing rows count: %s", existingRowsCount); + existingRowsCount >= 0, + "Invalid existing rows count: %s (must be >= 0)", + existingRowsCount); Preconditions.checkArgument( - deletedRowsCount >= 0, "Invalid deleted rows count: %s", deletedRowsCount); + deletedRowsCount >= 0, "Invalid deleted rows count: %s (must be >= 0)", deletedRowsCount); Preconditions.checkArgument( - replacedRowsCount >= 0, "Invalid replaced rows count: %s", replacedRowsCount); + replacedRowsCount >= 0, + "Invalid replaced rows count: %s (must be >= 0)", + replacedRowsCount); Preconditions.checkArgument( - minSequenceNumber >= 0, "Invalid min sequence number: %s", minSequenceNumber); + minSequenceNumber >= 0, + "Invalid min sequence number: %s (must be >= 0)", + minSequenceNumber); return new ManifestInfoStruct( addedFilesCount, existingFilesCount, diff --git a/core/src/main/java/org/apache/iceberg/TrackingStruct.java b/core/src/main/java/org/apache/iceberg/TrackingStruct.java index a658cd2bcd0f..65513c8d4a7c 100644 --- a/core/src/main/java/org/apache/iceberg/TrackingStruct.java +++ b/core/src/main/java/org/apache/iceberg/TrackingStruct.java @@ -312,11 +312,21 @@ Builder deletedPositions(ByteBuffer positions) { return this; } + Builder deletedPositions(byte[] positions) { + this.deletedPositions = positions; + return this; + } + Builder replacedPositions(ByteBuffer positions) { this.replacedPositions = positions != null ? ByteBuffers.toByteArray(positions) : null; return this; } + Builder replacedPositions(byte[] positions) { + this.replacedPositions = positions; + return this; + } + TrackingStruct build() { Preconditions.checkArgument(status != null, "Invalid status: null"); return new TrackingStruct( diff --git a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java index effc03986b1c..325f9afd9ca9 100644 --- a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java @@ -120,7 +120,7 @@ void testBuilderValidation() { .cardinality(1) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid offset: -1"); + .hasMessage("Invalid offset: -1 (must be >= 0)"); assertThatThrownBy( () -> @@ -130,7 +130,7 @@ void testBuilderValidation() { .cardinality(1) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid size in bytes: -1"); + .hasMessage("Invalid size in bytes: -1 (must be >= 0)"); assertThatThrownBy( () -> @@ -140,7 +140,7 @@ void testBuilderValidation() { .sizeInBytes(1) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid cardinality: -1"); + .hasMessage("Invalid cardinality: -1 (must be >= 0)"); } @Test diff --git a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java index 4bb7910a9f4c..30a2ac4da8c5 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java @@ -70,7 +70,7 @@ void testCopy() { .deletedRowsCount(300L) .replacedRowsCount(200L) .minSequenceNumber(5L) - .dv(ByteBuffer.wrap(new byte[] {0xF})) + .dv(new byte[] {0xF}) .dvCardinality(1L) .build(); @@ -143,7 +143,7 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException .deletedRowsCount(300L) .replacedRowsCount(200L) .minSequenceNumber(5L) - .dv(ByteBuffer.wrap(new byte[] {0xF})) + .dv(new byte[] {0xF}) .dvCardinality(1L) .build(); @@ -177,7 +177,7 @@ void testBuilderValidation() { .minSequenceNumber(0L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid added files count: -1"); + .hasMessage("Invalid added files count: -1 (must be >= 0)"); assertThatThrownBy( () -> @@ -192,7 +192,7 @@ void testBuilderValidation() { .minSequenceNumber(0L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid existing files count: -1"); + .hasMessage("Invalid existing files count: -1 (must be >= 0)"); assertThatThrownBy( () -> @@ -207,7 +207,7 @@ void testBuilderValidation() { .minSequenceNumber(0L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid deleted files count: -1"); + .hasMessage("Invalid deleted files count: -1 (must be >= 0)"); assertThatThrownBy( () -> @@ -222,7 +222,7 @@ void testBuilderValidation() { .minSequenceNumber(0L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid replaced files count: -1"); + .hasMessage("Invalid replaced files count: -1 (must be >= 0)"); assertThatThrownBy( () -> @@ -237,7 +237,7 @@ void testBuilderValidation() { .minSequenceNumber(0L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid added rows count: -1"); + .hasMessage("Invalid added rows count: -1 (must be >= 0)"); assertThatThrownBy( () -> @@ -252,7 +252,7 @@ void testBuilderValidation() { .minSequenceNumber(0L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid existing rows count: -1"); + .hasMessage("Invalid existing rows count: -1 (must be >= 0)"); assertThatThrownBy( () -> @@ -267,7 +267,7 @@ void testBuilderValidation() { .minSequenceNumber(0L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid deleted rows count: -1"); + .hasMessage("Invalid deleted rows count: -1 (must be >= 0)"); assertThatThrownBy( () -> @@ -282,7 +282,7 @@ void testBuilderValidation() { .minSequenceNumber(0L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid replaced rows count: -1"); + .hasMessage("Invalid replaced rows count: -1 (must be >= 0)"); assertThatThrownBy( () -> @@ -297,7 +297,7 @@ void testBuilderValidation() { .replacedRowsCount(0L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid min sequence number: -1"); + .hasMessage("Invalid min sequence number: -1 (must be >= 0)"); } @Test @@ -313,7 +313,7 @@ void testKryoSerializationRoundTrip() throws IOException { .deletedRowsCount(300L) .replacedRowsCount(200L) .minSequenceNumber(5L) - .dv(ByteBuffer.wrap(new byte[] {0xF})) + .dv(new byte[] {0xF}) .dvCardinality(1L) .build(); diff --git a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java index b6d702ca0a49..98a7eff2af45 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java @@ -58,7 +58,7 @@ void testCopy() { .status(EntryStatus.ADDED) .snapshotId(42L) .dataSequenceNumber(10L) - .deletedPositions(ByteBuffer.wrap(new byte[] {1, 2})) + .deletedPositions(new byte[] {1, 2}) .build(); TrackingStruct copy = tracking.copy(); @@ -216,7 +216,7 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException .status(EntryStatus.ADDED) .snapshotId(42L) .dataSequenceNumber(10L) - .deletedPositions(ByteBuffer.wrap(new byte[] {1, 2})) + .deletedPositions(new byte[] {1, 2}) .build(); TrackingStruct deserialized = TestHelpers.roundTripSerialize(tracking); @@ -234,7 +234,7 @@ void testKryoSerializationRoundTrip() throws IOException { .status(EntryStatus.ADDED) .snapshotId(42L) .dataSequenceNumber(10L) - .deletedPositions(ByteBuffer.wrap(new byte[] {1, 2})) + .deletedPositions(new byte[] {1, 2}) .build(); TrackingStruct deserialized = TestHelpers.KryoHelpers.roundTripSerialize(tracking); From f802f85a7789ec1ef117601ace45305dcf4b6a4a Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Tue, 28 Apr 2026 17:22:03 -0700 Subject: [PATCH 07/10] Feedback from Steven --- .../apache/iceberg/ManifestInfoStruct.java | 3 ++ .../iceberg/TestManifestInfoStruct.java | 37 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java index bfc0e4dba19a..be99217f260a 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java +++ b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java @@ -360,6 +360,9 @@ ManifestInfoStruct build() { minSequenceNumber >= 0, "Invalid min sequence number: %s (must be >= 0)", minSequenceNumber); + Preconditions.checkArgument( + (dv == null) == (dvCardinality == null), + "dv and dvCardinality must both be null or both be non-null"); return new ManifestInfoStruct( addedFilesCount, existingFilesCount, diff --git a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java index 30a2ac4da8c5..2a5e69b22351 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java @@ -300,6 +300,43 @@ void testBuilderValidation() { .hasMessage("Invalid min sequence number: -1 (must be >= 0)"); } + @Test + void testBuilderDvPairingValidation() { + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .dv(new byte[] {0xF}) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("dv and dvCardinality must both be null or both be non-null"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .dvCardinality(1L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("dv and dvCardinality must both be null or both be non-null"); + } + @Test void testKryoSerializationRoundTrip() throws IOException { ManifestInfoStruct info = From 37f114a893a0dcf01f962e5b00dc72474b02fd96 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Wed, 29 Apr 2026 08:27:35 -0700 Subject: [PATCH 08/10] Update core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java Co-authored-by: Eduard Tudenhoefner --- core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java index be99217f260a..2f5a04bfba0a 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java +++ b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java @@ -362,7 +362,7 @@ ManifestInfoStruct build() { minSequenceNumber); Preconditions.checkArgument( (dv == null) == (dvCardinality == null), - "dv and dvCardinality must both be null or both be non-null"); + "Invalid DV and cardinality: must both be null or non-null"); return new ManifestInfoStruct( addedFilesCount, existingFilesCount, From 4cd8c8a4fe890ae489a46b1251530585ee0ad979 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Wed, 29 Apr 2026 08:59:29 -0700 Subject: [PATCH 09/10] Fix test --- .../test/java/org/apache/iceberg/TestManifestInfoStruct.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java index 2a5e69b22351..57146ca192cb 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java @@ -317,7 +317,7 @@ void testBuilderDvPairingValidation() { .dv(new byte[] {0xF}) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("dv and dvCardinality must both be null or both be non-null"); + .hasMessage("Invalid DV and cardinality: must both be null or non-null"); assertThatThrownBy( () -> @@ -334,7 +334,7 @@ void testBuilderDvPairingValidation() { .dvCardinality(1L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("dv and dvCardinality must both be null or both be non-null"); + .hasMessage("Invalid DV and cardinality: must both be null or non-null"); } @Test From 42173ee3c562d55e6ffcf8bead9e98d9b0662c96 Mon Sep 17 00:00:00 2001 From: Anoop Johnson Date: Wed, 29 Apr 2026 14:40:26 -0700 Subject: [PATCH 10/10] Feedback from Amogh --- .../org/apache/iceberg/ManifestInfoStruct.java | 4 ++++ .../apache/iceberg/TestManifestInfoStruct.java | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java index 2f5a04bfba0a..922047bffedd 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java +++ b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java @@ -363,6 +363,10 @@ ManifestInfoStruct build() { Preconditions.checkArgument( (dv == null) == (dvCardinality == null), "Invalid DV and cardinality: must both be null or non-null"); + Preconditions.checkArgument( + dvCardinality == null || dvCardinality > 0, + "Invalid DV cardinality: %s (must be positive)", + dvCardinality); return new ManifestInfoStruct( addedFilesCount, existingFilesCount, diff --git a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java index 57146ca192cb..3a694f1a38f2 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java @@ -335,6 +335,24 @@ void testBuilderDvPairingValidation() { .build()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid DV and cardinality: must both be null or non-null"); + + assertThatThrownBy( + () -> + ManifestInfoStruct.builder() + .addedFilesCount(0) + .existingFilesCount(0) + .deletedFilesCount(0) + .replacedFilesCount(0) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .dv(new byte[] {0xF}) + .dvCardinality(0L) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid DV cardinality: 0 (must be positive)"); } @Test