diff --git a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java index 0eb7c2fe1eb6..557ead1eaadb 100644 --- a/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java +++ b/core/src/main/java/org/apache/iceberg/DeletionVectorStruct.java @@ -51,7 +51,7 @@ private DeletionVectorStruct(DeletionVectorStruct toCopy) { } private DeletionVectorStruct(String location, long offset, long sizeInBytes, long cardinality) { - super(BASE_TYPE, BASE_TYPE); + super(BASE_TYPE.fields().size()); this.location = location; this.offset = offset; this.sizeInBytes = sizeInBytes; @@ -140,37 +140,41 @@ public String toString() { static class Builder { private String location = null; - private long offset = -1L; - private long sizeInBytes = -1L; - private long cardinality = -1L; + private Long offset = null; + private Long sizeInBytes = null; + private Long cardinality = null; Builder location(String dvLocation) { + Preconditions.checkArgument(dvLocation != null, "Invalid location: null"); this.location = dvLocation; return this; } Builder offset(long dvOffset) { + Preconditions.checkArgument(dvOffset >= 0, "Invalid offset: %s (must be >= 0)", dvOffset); this.offset = dvOffset; return this; } Builder sizeInBytes(long dvSizeInBytes) { + Preconditions.checkArgument( + dvSizeInBytes >= 0, "Invalid size in bytes: %s (must be >= 0)", dvSizeInBytes); this.sizeInBytes = dvSizeInBytes; return this; } Builder cardinality(long dvCardinality) { + Preconditions.checkArgument( + dvCardinality > 0, "Invalid cardinality: %s (must be positive)", dvCardinality); this.cardinality = dvCardinality; return this; } DeletionVectorStruct build() { - Preconditions.checkArgument(location != null, "Invalid location: null"); - Preconditions.checkArgument(offset >= 0, "Invalid offset: %s (must be >= 0)", offset); - Preconditions.checkArgument( - sizeInBytes >= 0, "Invalid size in bytes: %s (must be >= 0)", sizeInBytes); - Preconditions.checkArgument( - cardinality >= 0, "Invalid cardinality: %s (must be >= 0)", cardinality); + Preconditions.checkArgument(location != null, "Missing required value: location"); + Preconditions.checkArgument(offset != null, "Missing required value: offset"); + Preconditions.checkArgument(sizeInBytes != null, "Missing required value: size in bytes"); + Preconditions.checkArgument(cardinality != null, "Missing required value: cardinality"); return new DeletionVectorStruct(location, offset, sizeInBytes, cardinality); } } diff --git a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java index 922047bffedd..528c57c34df6 100644 --- a/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java +++ b/core/src/main/java/org/apache/iceberg/ManifestInfoStruct.java @@ -86,7 +86,7 @@ private ManifestInfoStruct( long minSequenceNumber, byte[] dv, Long dvCardinality) { - super(BASE_TYPE, BASE_TYPE); + super(BASE_TYPE.fields().size()); this.addedFilesCount = addedFilesCount; this.existingFilesCount = existingFilesCount; this.deletedFilesCount = deletedFilesCount; @@ -257,116 +257,135 @@ public String toString() { } static class Builder { - private int addedFilesCount = -1; - private int existingFilesCount = -1; - private int deletedFilesCount = -1; - private int replacedFilesCount = -1; - private long addedRowsCount = -1L; - private long existingRowsCount = -1L; - private long deletedRowsCount = -1L; - private long replacedRowsCount = -1L; - private long minSequenceNumber = -1L; + private Integer addedFilesCount = null; + private Integer existingFilesCount = null; + private Integer deletedFilesCount = null; + private Integer replacedFilesCount = null; + private Long addedRowsCount = null; + private Long existingRowsCount = null; + private Long deletedRowsCount = null; + private Long replacedRowsCount = null; + private Long minSequenceNumber = null; private byte[] dv = null; private Long dvCardinality = null; Builder addedFilesCount(int count) { + Preconditions.checkArgument( + count >= 0, "Invalid added files count: %s (must be >= 0)", count); this.addedFilesCount = count; return this; } Builder existingFilesCount(int count) { + Preconditions.checkArgument( + count >= 0, "Invalid existing files count: %s (must be >= 0)", count); this.existingFilesCount = count; return this; } Builder deletedFilesCount(int count) { + Preconditions.checkArgument( + count >= 0, "Invalid deleted files count: %s (must be >= 0)", count); this.deletedFilesCount = count; return this; } Builder replacedFilesCount(int count) { + Preconditions.checkArgument( + count >= 0, "Invalid replaced files count: %s (must be >= 0)", count); this.replacedFilesCount = count; return this; } Builder addedRowsCount(long count) { + Preconditions.checkArgument(count >= 0, "Invalid added rows count: %s (must be >= 0)", count); this.addedRowsCount = count; return this; } Builder existingRowsCount(long count) { + Preconditions.checkArgument( + count >= 0, "Invalid existing rows count: %s (must be >= 0)", count); this.existingRowsCount = count; return this; } Builder deletedRowsCount(long count) { + Preconditions.checkArgument( + count >= 0, "Invalid deleted rows count: %s (must be >= 0)", count); this.deletedRowsCount = count; return this; } Builder replacedRowsCount(long count) { + Preconditions.checkArgument( + count >= 0, "Invalid replaced rows count: %s (must be >= 0)", count); this.replacedRowsCount = count; return this; } Builder minSequenceNumber(long sequenceNumber) { + Preconditions.checkArgument( + sequenceNumber >= 0, "Invalid min sequence number: %s (must be >= 0)", sequenceNumber); this.minSequenceNumber = sequenceNumber; return this; } Builder dv(ByteBuffer buffer) { - this.dv = buffer != null ? ByteBuffers.toByteArray(buffer) : null; - return this; - } - - Builder dv(byte[] buffer) { - this.dv = buffer; + Preconditions.checkArgument(buffer != null, "Invalid DV: null"); + this.dv = ByteBuffers.toByteArray(buffer); return this; } - Builder dvCardinality(Long cardinality) { + Builder dvCardinality(long cardinality) { + Preconditions.checkArgument( + cardinality > 0, "Invalid DV cardinality: %s (must be positive)", cardinality); this.dvCardinality = cardinality; return this; } ManifestInfoStruct build() { Preconditions.checkArgument( - addedFilesCount >= 0, "Invalid added files count: %s (must be >= 0)", addedFilesCount); + addedFilesCount != null, "Missing required value: added files count"); Preconditions.checkArgument( - existingFilesCount >= 0, - "Invalid existing files count: %s (must be >= 0)", - existingFilesCount); + existingFilesCount != null, "Missing required value: existing files count"); Preconditions.checkArgument( - deletedFilesCount >= 0, - "Invalid deleted files count: %s (must be >= 0)", - deletedFilesCount); + deletedFilesCount != null, "Missing required value: deleted files count"); Preconditions.checkArgument( - replacedFilesCount >= 0, - "Invalid replaced files count: %s (must be >= 0)", - replacedFilesCount); + replacedFilesCount != null, "Missing required value: replaced files count"); + Preconditions.checkArgument( + addedRowsCount != null, "Missing required value: added rows count"); + Preconditions.checkArgument( + existingRowsCount != null, "Missing required value: existing rows count"); + Preconditions.checkArgument( + deletedRowsCount != null, "Missing required value: deleted rows count"); Preconditions.checkArgument( - addedRowsCount >= 0, "Invalid added rows count: %s (must be >= 0)", addedRowsCount); + replacedRowsCount != null, "Missing required value: replaced rows count"); Preconditions.checkArgument( - existingRowsCount >= 0, - "Invalid existing rows count: %s (must be >= 0)", - existingRowsCount); + minSequenceNumber != null, "Missing required value: min sequence number"); Preconditions.checkArgument( - deletedRowsCount >= 0, "Invalid deleted rows count: %s (must be >= 0)", deletedRowsCount); + addedRowsCount == 0 || addedFilesCount > 0, + "Invalid added counts: %s rows in %s files", + addedRowsCount, + addedFilesCount); + Preconditions.checkArgument( + existingRowsCount == 0 || existingFilesCount > 0, + "Invalid existing counts: %s rows in %s files", + existingRowsCount, + existingFilesCount); Preconditions.checkArgument( - replacedRowsCount >= 0, - "Invalid replaced rows count: %s (must be >= 0)", - replacedRowsCount); + deletedRowsCount == 0 || deletedFilesCount > 0, + "Invalid deleted counts: %s rows in %s files", + deletedRowsCount, + deletedFilesCount); Preconditions.checkArgument( - minSequenceNumber >= 0, - "Invalid min sequence number: %s (must be >= 0)", - minSequenceNumber); + replacedRowsCount == 0 || replacedFilesCount > 0, + "Invalid replaced counts: %s rows in %s files", + replacedRowsCount, + replacedFilesCount); Preconditions.checkArgument( (dv == null) == (dvCardinality == null), "Invalid DV and cardinality: must both be null or non-null"); - Preconditions.checkArgument( - dvCardinality == null || dvCardinality > 0, - "Invalid DV cardinality: %s (must be positive)", - dvCardinality); return new ManifestInfoStruct( addedFilesCount, existingFilesCount, diff --git a/core/src/main/java/org/apache/iceberg/TrackingStruct.java b/core/src/main/java/org/apache/iceberg/TrackingStruct.java index 65513c8d4a7c..c950da854c2b 100644 --- a/core/src/main/java/org/apache/iceberg/TrackingStruct.java +++ b/core/src/main/java/org/apache/iceberg/TrackingStruct.java @@ -59,6 +59,11 @@ class TrackingStruct extends SupportsIndexProjection implements Tracking, Serial super(BASE_TYPE, type); } + /** Constructor for Java serialization. */ + TrackingStruct() { + super(BASE_TYPE.fields().size()); + } + private TrackingStruct(TrackingStruct toCopy) { super(toCopy); this.status = toCopy.status; @@ -88,7 +93,7 @@ private TrackingStruct( Long firstRowId, byte[] deletedPositions, byte[] replacedPositions) { - super(BASE_TYPE, BASE_TYPE); + super(BASE_TYPE.fields().size()); this.status = status; this.snapshotId = snapshotId; this.dataSequenceNumber = dataSequenceNumber; @@ -249,8 +254,24 @@ protected void internalSet(int pos, T value) { } } - static Builder builder() { - return new Builder(); + /** Creates a builder for a newly added file in the given snapshot. */ + static Builder added(long snapshotId) { + return new Builder(snapshotId); + } + + /** Creates a builder for an existing file based on tracking read from a manifest. */ + static Builder existing(Tracking source) { + return new Builder(source); + } + + /** Creates a builder for a deleted file in the given snapshot. */ + static Builder deleted(Tracking source, long snapshotId) { + return new Builder(EntryStatus.DELETED, source, snapshotId); + } + + /** Creates a builder for a replaced file in the given snapshot. */ + static Builder replaced(Tracking source, long snapshotId) { + return new Builder(EntryStatus.REPLACED, source, snapshotId); } @Override @@ -268,67 +289,120 @@ public String toString() { } static class Builder { - private EntryStatus status = null; - private Long snapshotId = null; - private Long dataSequenceNumber = null; - private Long fileSequenceNumber = null; - private Long dvSnapshotId = null; - private Long firstRowId = null; - private byte[] deletedPositions = null; - private byte[] replacedPositions = null; - - Builder status(EntryStatus entryStatus) { - this.status = entryStatus; - return this; + private final EntryStatus status; + private final Long snapshotId; + private final Long dataSequenceNumber; + private final Long fileSequenceNumber; + private final Long firstRowId; + private Long dvSnapshotId; + private byte[] deletedPositions; + private byte[] replacedPositions; + + private Builder(long snapshotId) { + this.status = EntryStatus.ADDED; + this.snapshotId = snapshotId; + this.dataSequenceNumber = null; + this.fileSequenceNumber = null; + this.firstRowId = null; + this.deletedPositions = null; + this.replacedPositions = null; } - Builder snapshotId(Long id) { - this.snapshotId = id; - return this; + private Builder(Tracking source) { + this(EntryStatus.EXISTING, source, source == null ? null : source.snapshotId()); } - Builder dataSequenceNumber(Long sequenceNumber) { - this.dataSequenceNumber = sequenceNumber; - return this; + private Builder(EntryStatus status, Tracking source, Long snapshotId) { + Preconditions.checkArgument(source != null, "Invalid source tracking: null"); + Preconditions.checkArgument( + source.dataSequenceNumber() != null, + "Invalid tracking source: data sequence number is null"); + Preconditions.checkArgument( + source.fileSequenceNumber() != null, + "Invalid tracking source: file sequence number is null"); + checkStatus(source.status(), status); + this.status = status; + this.snapshotId = snapshotId; + this.dataSequenceNumber = source.dataSequenceNumber(); + this.fileSequenceNumber = source.fileSequenceNumber(); + this.firstRowId = source.firstRowId(); + this.dvSnapshotId = source.dvSnapshotId(); + this.deletedPositions = null; + this.replacedPositions = null; } - Builder fileSequenceNumber(Long sequenceNumber) { - this.fileSequenceNumber = sequenceNumber; - return this; + // TODO: extend allowed transitions once MODIFIED status is added. + private static void checkStatus(EntryStatus from, EntryStatus to) { + Preconditions.checkState(from != null, "Invalid tracking source: status is null"); + switch (from) { + case ADDED: + Preconditions.checkState( + to == EntryStatus.EXISTING || to == EntryStatus.DELETED || to == EntryStatus.REPLACED, + "Invalid status transition: ADDED -> %s (ADDED is the starting status)", + to); + break; + case EXISTING: + Preconditions.checkState( + to == EntryStatus.EXISTING || to == EntryStatus.DELETED || to == EntryStatus.REPLACED, + "Invalid status transition: EXISTING -> %s", + to); + break; + case DELETED: + case REPLACED: + throw new IllegalStateException( + String.format( + "Invalid status transition: %s -> %s (%s is terminal)", from, to, from)); + default: + throw new IllegalStateException(String.format("Unknown source status: %s", from)); + } } - Builder dvSnapshotId(Long id) { - this.dvSnapshotId = id; - return this; - } + Builder dvSnapshotId(long id) { + // DV applies to data files; deleted/replaced positions apply to manifest files + Preconditions.checkState( + deletedPositions == null && replacedPositions == null, + "Cannot set DV snapshot ID on a manifest entry (deleted/replaced positions are set)"); + Preconditions.checkState( + status == EntryStatus.ADDED || status == EntryStatus.EXISTING, + "Cannot set DV snapshot ID on %s entry", + status); + if (status == EntryStatus.ADDED) { + Preconditions.checkArgument( + id == snapshotId, + "Invalid DV snapshot ID for ADDED entry: %s (must equal entry snapshot ID %s)", + id, + snapshotId); + } - Builder firstRowId(Long rowId) { - this.firstRowId = rowId; + this.dvSnapshotId = id; return this; } + // TODO: revisit when MODIFIED status is added; MDV setters will need to handle MODIFIED. Builder deletedPositions(ByteBuffer positions) { + Preconditions.checkState( + status == EntryStatus.EXISTING, "Cannot set deleted positions on %s entry", status); + // DV applies to data files; deleted positions apply to manifest files + Preconditions.checkState( + dvSnapshotId == null, + "Cannot set deleted positions on a data file entry (DV snapshot ID is set)"); this.deletedPositions = positions != null ? ByteBuffers.toByteArray(positions) : null; return this; } - Builder deletedPositions(byte[] positions) { - this.deletedPositions = positions; - return this; - } - + // TODO: revisit when MODIFIED status is added; MDV setters will need to handle MODIFIED. Builder replacedPositions(ByteBuffer positions) { + Preconditions.checkState( + status == EntryStatus.EXISTING, "Cannot set replaced positions on %s entry", status); + // DV applies to data files; replaced positions apply to manifest files + Preconditions.checkState( + dvSnapshotId == null, + "Cannot set replaced positions on a data file entry (DV snapshot ID is set)"); this.replacedPositions = positions != null ? ByteBuffers.toByteArray(positions) : null; return this; } - Builder replacedPositions(byte[] positions) { - this.replacedPositions = positions; - return this; - } - TrackingStruct build() { - Preconditions.checkArgument(status != null, "Invalid status: null"); return new TrackingStruct( status, snapshotId, diff --git a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java index 325f9afd9ca9..ee329d57d6ae 100644 --- a/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestDeletionVectorStruct.java @@ -106,11 +106,11 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException } @Test - void testBuilderValidation() { + void testBuilderMissingRequiredFields() { assertThatThrownBy( () -> DeletionVectorStruct.builder().offset(0).sizeInBytes(1).cardinality(1).build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid location: null"); + .hasMessage("Missing required value: location"); assertThatThrownBy( () -> @@ -120,7 +120,7 @@ void testBuilderValidation() { .cardinality(1) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid offset: -1 (must be >= 0)"); + .hasMessage("Missing required value: offset"); assertThatThrownBy( () -> @@ -130,7 +130,7 @@ void testBuilderValidation() { .cardinality(1) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid size in bytes: -1 (must be >= 0)"); + .hasMessage("Missing required value: size in bytes"); assertThatThrownBy( () -> @@ -140,7 +140,26 @@ void testBuilderValidation() { .sizeInBytes(1) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid cardinality: -1 (must be >= 0)"); + .hasMessage("Missing required value: cardinality"); + } + + @Test + void testBuilderRejectsInvalidValuesAtSetter() { + assertThatThrownBy(() -> DeletionVectorStruct.builder().location(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid location: null"); + + assertThatThrownBy(() -> DeletionVectorStruct.builder().offset(-1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid offset: -1 (must be >= 0)"); + + assertThatThrownBy(() -> DeletionVectorStruct.builder().sizeInBytes(-1)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid size in bytes: -1 (must be >= 0)"); + + assertThatThrownBy(() -> DeletionVectorStruct.builder().cardinality(0)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid cardinality: 0 (must be positive)"); } @Test diff --git a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java index 3a694f1a38f2..6e7b69aade90 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestInfoStruct.java @@ -23,26 +23,57 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.stream.Stream; import org.apache.iceberg.types.Types; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; class TestManifestInfoStruct { + // Ordinals looked up from ManifestInfo.schema() so tests don't hard-code positions. + private static final List INFO_FIELDS = ManifestInfo.schema().fields(); + private static final int ADDED_FILES_COUNT_ORDINAL = + INFO_FIELDS.indexOf(ManifestInfo.ADDED_FILES_COUNT); + private static final int EXISTING_FILES_COUNT_ORDINAL = + INFO_FIELDS.indexOf(ManifestInfo.EXISTING_FILES_COUNT); + private static final int DELETED_FILES_COUNT_ORDINAL = + INFO_FIELDS.indexOf(ManifestInfo.DELETED_FILES_COUNT); + private static final int REPLACED_FILES_COUNT_ORDINAL = + INFO_FIELDS.indexOf(ManifestInfo.REPLACED_FILES_COUNT); + private static final int ADDED_ROWS_COUNT_ORDINAL = + INFO_FIELDS.indexOf(ManifestInfo.ADDED_ROWS_COUNT); + private static final int EXISTING_ROWS_COUNT_ORDINAL = + INFO_FIELDS.indexOf(ManifestInfo.EXISTING_ROWS_COUNT); + private static final int DELETED_ROWS_COUNT_ORDINAL = + INFO_FIELDS.indexOf(ManifestInfo.DELETED_ROWS_COUNT); + private static final int REPLACED_ROWS_COUNT_ORDINAL = + INFO_FIELDS.indexOf(ManifestInfo.REPLACED_ROWS_COUNT); + private static final int MIN_SEQUENCE_NUMBER_ORDINAL = + INFO_FIELDS.indexOf(ManifestInfo.MIN_SEQUENCE_NUMBER); + private static final int DV_ORDINAL = INFO_FIELDS.indexOf(ManifestInfo.DV); + private static final int DV_CARDINALITY_ORDINAL = + INFO_FIELDS.indexOf(ManifestInfo.DV_CARDINALITY); + @Test void testFieldAccess() { ManifestInfoStruct info = new ManifestInfoStruct(ManifestInfo.schema()); - info.set(0, 10); - info.set(1, 20); - info.set(2, 3); - info.set(3, 2); - info.set(4, 1000L); - info.set(5, 2000L); - info.set(6, 300L); - info.set(7, 200L); - info.set(8, 5L); - info.set(9, ByteBuffer.wrap(new byte[] {0xF})); - info.set(10, 1L); + info.set(ADDED_FILES_COUNT_ORDINAL, 10); + info.set(EXISTING_FILES_COUNT_ORDINAL, 20); + info.set(DELETED_FILES_COUNT_ORDINAL, 3); + info.set(REPLACED_FILES_COUNT_ORDINAL, 2); + info.set(ADDED_ROWS_COUNT_ORDINAL, 1000L); + info.set(EXISTING_ROWS_COUNT_ORDINAL, 2000L); + info.set(DELETED_ROWS_COUNT_ORDINAL, 300L); + info.set(REPLACED_ROWS_COUNT_ORDINAL, 200L); + info.set(MIN_SEQUENCE_NUMBER_ORDINAL, 5L); + info.set(DV_ORDINAL, ByteBuffer.wrap(new byte[] {0xF})); + info.set(DV_CARDINALITY_ORDINAL, 1L); assertThat(info.addedFilesCount()).isEqualTo(10); assertThat(info.existingFilesCount()).isEqualTo(20); @@ -70,7 +101,7 @@ void testCopy() { .deletedRowsCount(300L) .replacedRowsCount(200L) .minSequenceNumber(5L) - .dv(new byte[] {0xF}) + .dv(ByteBuffer.wrap(new byte[] {0xF})) .dvCardinality(1L) .build(); @@ -143,7 +174,7 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException .deletedRowsCount(300L) .replacedRowsCount(200L) .minSequenceNumber(5L) - .dv(new byte[] {0xF}) + .dv(ByteBuffer.wrap(new byte[] {0xF})) .dvCardinality(1L) .build(); @@ -162,68 +193,85 @@ void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException assertThat(deserialized.dvCardinality()).isEqualTo(1L); } - @Test - void testBuilderValidation() { - assertThatThrownBy( - () -> - ManifestInfoStruct.builder() - .existingFilesCount(0) - .deletedFilesCount(0) - .replacedFilesCount(0) - .addedRowsCount(0L) - .existingRowsCount(0L) - .deletedRowsCount(0L) - .replacedRowsCount(0L) - .minSequenceNumber(0L) - .build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid added files count: -1 (must be >= 0)"); + // Keyed by the field name used in the "Missing required value: ..." build() error so each + // case has a single source of truth. No dependency on schema field order. + private static final Map> REQUIRED_SETTERS = + Map.ofEntries( + Map.entry("added files count", b -> b.addedFilesCount(0)), + Map.entry("existing files count", b -> b.existingFilesCount(0)), + Map.entry("deleted files count", b -> b.deletedFilesCount(0)), + Map.entry("replaced files count", b -> b.replacedFilesCount(0)), + Map.entry("added rows count", b -> b.addedRowsCount(0L)), + Map.entry("existing rows count", b -> b.existingRowsCount(0L)), + Map.entry("deleted rows count", b -> b.deletedRowsCount(0L)), + Map.entry("replaced rows count", b -> b.replacedRowsCount(0L)), + Map.entry("min sequence number", b -> b.minSequenceNumber(0L))); + + private static Stream missingRequiredFieldCases() { + return REQUIRED_SETTERS.keySet().stream(); + } - assertThatThrownBy( - () -> - ManifestInfoStruct.builder() - .addedFilesCount(0) - .deletedFilesCount(0) - .replacedFilesCount(0) - .addedRowsCount(0L) - .existingRowsCount(0L) - .deletedRowsCount(0L) - .replacedRowsCount(0L) - .minSequenceNumber(0L) - .build()) + @ParameterizedTest + @MethodSource("missingRequiredFieldCases") + void testBuilderMissingRequiredFields(String missingField) { + ManifestInfoStruct.Builder builder = ManifestInfoStruct.builder(); + REQUIRED_SETTERS.forEach( + (name, setter) -> { + if (!name.equals(missingField)) { + setter.accept(builder); + } + }); + + assertThatThrownBy(builder::build) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid existing files count: -1 (must be >= 0)"); + .hasMessage("Missing required value: " + missingField); + } - assertThatThrownBy( - () -> - ManifestInfoStruct.builder() - .addedFilesCount(0) - .existingFilesCount(0) - .replacedFilesCount(0) - .addedRowsCount(0L) - .existingRowsCount(0L) - .deletedRowsCount(0L) - .replacedRowsCount(0L) - .minSequenceNumber(0L) - .build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid deleted files count: -1 (must be >= 0)"); + private static Stream negativeValueAtSetterCases() { + return Stream.of( + Arguments.of( + (Consumer) b -> b.addedFilesCount(-1), + "Invalid added files count: -1 (must be >= 0)"), + Arguments.of( + (Consumer) b -> b.existingFilesCount(-1), + "Invalid existing files count: -1 (must be >= 0)"), + Arguments.of( + (Consumer) b -> b.deletedFilesCount(-1), + "Invalid deleted files count: -1 (must be >= 0)"), + Arguments.of( + (Consumer) b -> b.replacedFilesCount(-1), + "Invalid replaced files count: -1 (must be >= 0)"), + Arguments.of( + (Consumer) b -> b.addedRowsCount(-1L), + "Invalid added rows count: -1 (must be >= 0)"), + Arguments.of( + (Consumer) b -> b.existingRowsCount(-1L), + "Invalid existing rows count: -1 (must be >= 0)"), + Arguments.of( + (Consumer) b -> b.deletedRowsCount(-1L), + "Invalid deleted rows count: -1 (must be >= 0)"), + Arguments.of( + (Consumer) b -> b.replacedRowsCount(-1L), + "Invalid replaced rows count: -1 (must be >= 0)"), + Arguments.of( + (Consumer) b -> b.minSequenceNumber(-1L), + "Invalid min sequence number: -1 (must be >= 0)"), + Arguments.of( + (Consumer) b -> b.dvCardinality(0L), + "Invalid DV cardinality: 0 (must be positive)")); + } - assertThatThrownBy( - () -> - ManifestInfoStruct.builder() - .addedFilesCount(0) - .existingFilesCount(0) - .deletedFilesCount(0) - .addedRowsCount(0L) - .existingRowsCount(0L) - .deletedRowsCount(0L) - .replacedRowsCount(0L) - .minSequenceNumber(0L) - .build()) + @ParameterizedTest + @MethodSource("negativeValueAtSetterCases") + void testBuilderRejectsNegativeValuesAtSetter( + Consumer apply, String expectedMessage) { + assertThatThrownBy(() -> apply.accept(ManifestInfoStruct.builder())) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid replaced files count: -1 (must be >= 0)"); + .hasMessage(expectedMessage); + } + @Test + void testBuilderRejectsRowsWithoutFiles() { assertThatThrownBy( () -> ManifestInfoStruct.builder() @@ -231,13 +279,14 @@ void testBuilderValidation() { .existingFilesCount(0) .deletedFilesCount(0) .replacedFilesCount(0) + .addedRowsCount(10L) .existingRowsCount(0L) .deletedRowsCount(0L) .replacedRowsCount(0L) .minSequenceNumber(0L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid added rows count: -1 (must be >= 0)"); + .hasMessage("Invalid added counts: 10 rows in 0 files"); assertThatThrownBy( () -> @@ -247,12 +296,13 @@ void testBuilderValidation() { .deletedFilesCount(0) .replacedFilesCount(0) .addedRowsCount(0L) + .existingRowsCount(5L) .deletedRowsCount(0L) .replacedRowsCount(0L) .minSequenceNumber(0L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid existing rows count: -1 (must be >= 0)"); + .hasMessage("Invalid existing counts: 5 rows in 0 files"); assertThatThrownBy( () -> @@ -263,11 +313,12 @@ void testBuilderValidation() { .replacedFilesCount(0) .addedRowsCount(0L) .existingRowsCount(0L) + .deletedRowsCount(3L) .replacedRowsCount(0L) .minSequenceNumber(0L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid deleted rows count: -1 (must be >= 0)"); + .hasMessage("Invalid deleted counts: 3 rows in 0 files"); assertThatThrownBy( () -> @@ -279,25 +330,36 @@ void testBuilderValidation() { .addedRowsCount(0L) .existingRowsCount(0L) .deletedRowsCount(0L) + .replacedRowsCount(7L) .minSequenceNumber(0L) .build()) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid replaced rows count: -1 (must be >= 0)"); + .hasMessage("Invalid replaced counts: 7 rows in 0 files"); + } - assertThatThrownBy( - () -> - ManifestInfoStruct.builder() - .addedFilesCount(0) - .existingFilesCount(0) - .deletedFilesCount(0) - .replacedFilesCount(0) - .addedRowsCount(0L) - .existingRowsCount(0L) - .deletedRowsCount(0L) - .replacedRowsCount(0L) - .build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid min sequence number: -1 (must be >= 0)"); + @Test + void testBuilderAllowsFilesWithoutRows() { + ManifestInfoStruct info = + ManifestInfoStruct.builder() + .addedFilesCount(5) + .existingFilesCount(5) + .deletedFilesCount(5) + .replacedFilesCount(5) + .addedRowsCount(0L) + .existingRowsCount(0L) + .deletedRowsCount(0L) + .replacedRowsCount(0L) + .minSequenceNumber(0L) + .build(); + + assertThat(info.addedFilesCount()).isEqualTo(5); + assertThat(info.existingFilesCount()).isEqualTo(5); + assertThat(info.deletedFilesCount()).isEqualTo(5); + assertThat(info.replacedFilesCount()).isEqualTo(5); + assertThat(info.addedRowsCount()).isEqualTo(0L); + assertThat(info.existingRowsCount()).isEqualTo(0L); + assertThat(info.deletedRowsCount()).isEqualTo(0L); + assertThat(info.replacedRowsCount()).isEqualTo(0L); } @Test @@ -314,7 +376,7 @@ void testBuilderDvPairingValidation() { .deletedRowsCount(0L) .replacedRowsCount(0L) .minSequenceNumber(0L) - .dv(new byte[] {0xF}) + .dv(ByteBuffer.wrap(new byte[] {0xF})) .build()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid DV and cardinality: must both be null or non-null"); @@ -335,24 +397,6 @@ void testBuilderDvPairingValidation() { .build()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid DV and cardinality: must both be null or non-null"); - - assertThatThrownBy( - () -> - ManifestInfoStruct.builder() - .addedFilesCount(0) - .existingFilesCount(0) - .deletedFilesCount(0) - .replacedFilesCount(0) - .addedRowsCount(0L) - .existingRowsCount(0L) - .deletedRowsCount(0L) - .replacedRowsCount(0L) - .minSequenceNumber(0L) - .dv(new byte[] {0xF}) - .dvCardinality(0L) - .build()) - .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid DV cardinality: 0 (must be positive)"); } @Test @@ -368,7 +412,7 @@ void testKryoSerializationRoundTrip() throws IOException { .deletedRowsCount(300L) .replacedRowsCount(200L) .minSequenceNumber(5L) - .dv(new byte[] {0xF}) + .dv(ByteBuffer.wrap(new byte[] {0xF})) .dvCardinality(1L) .build(); diff --git a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java index 3abb36aa51ff..f7f8e13834c7 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackedFileStruct.java @@ -35,11 +35,14 @@ class TestTrackedFileStruct { Types.NestedField.optional(1000, "id_bucket", Types.IntegerType.get()), Types.NestedField.optional(1001, "category", Types.StringType.get())); + // Ordinal of MetadataColumns.ROW_POSITION within TrackingStruct's BASE_TYPE, + // which appends ROW_POSITION after the Tracking schema fields. + private static final int MANIFEST_POS_ORDINAL = Tracking.schema().fields().size(); + @Test void testFieldAccess() { TrackedFileStruct file = new TrackedFileStruct(); - TrackingStruct tracking = - TrackingStruct.builder().status(EntryStatus.ADDED).snapshotId(42L).build(); + TrackingStruct tracking = TrackingStruct.added(42L).build(); DeletionVectorStruct dv = DeletionVectorStruct.builder() .location("s3://bucket/dv.puffin") @@ -98,9 +101,9 @@ void testFieldAccess() { void testReaderSideFields() { TrackedFileStruct file = new TrackedFileStruct(); - TrackingStruct tracking = TrackingStruct.builder().status(EntryStatus.ADDED).build(); + TrackingStruct tracking = TrackingStruct.added(42L).build(); tracking.setManifestLocation("s3://bucket/metadata/manifest.avro"); - tracking.set(8, 7L); + tracking.set(MANIFEST_POS_ORDINAL, 7L); file.set(0, tracking); file.set(1, FileContent.DATA.id()); @@ -328,14 +331,9 @@ void testKryoSerializationRoundTrip() throws IOException { } static TrackedFileStruct createFullTrackedFile() { - TrackingStruct tracking = - TrackingStruct.builder() - .status(EntryStatus.ADDED) - .snapshotId(42L) - .dataSequenceNumber(10L) - .build(); + TrackingStruct tracking = TrackingStruct.added(42L).build(); tracking.setManifestLocation("s3://bucket/manifest.avro"); - tracking.set(8, 3L); + tracking.set(MANIFEST_POS_ORDINAL, 3L); DeletionVectorStruct dv = DeletionVectorStruct.builder() diff --git a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java index 98a7eff2af45..f71b5edca4f0 100644 --- a/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java +++ b/core/src/test/java/org/apache/iceberg/TestTrackingStruct.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.List; import org.apache.iceberg.types.Types; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -30,16 +31,32 @@ class TestTrackingStruct { + // Ordinals looked up from Tracking.schema() so tests don't hard-code positions. + private static final List TRACKING_FIELDS = Tracking.schema().fields(); + private static final int STATUS_ORDINAL = TRACKING_FIELDS.indexOf(Tracking.STATUS); + private static final int SNAPSHOT_ID_ORDINAL = TRACKING_FIELDS.indexOf(Tracking.SNAPSHOT_ID); + private static final int DATA_SEQUENCE_NUMBER_ORDINAL = + TRACKING_FIELDS.indexOf(Tracking.SEQUENCE_NUMBER); + private static final int FILE_SEQUENCE_NUMBER_ORDINAL = + TRACKING_FIELDS.indexOf(Tracking.FILE_SEQUENCE_NUMBER); + private static final int DV_SNAPSHOT_ID_ORDINAL = + TRACKING_FIELDS.indexOf(Tracking.DV_SNAPSHOT_ID); + private static final int FIRST_ROW_ID_ORDINAL = TRACKING_FIELDS.indexOf(Tracking.FIRST_ROW_ID); + private static final int DELETED_POSITIONS_ORDINAL = + TRACKING_FIELDS.indexOf(Tracking.DELETED_POSITIONS); + private static final int REPLACED_POSITIONS_ORDINAL = + TRACKING_FIELDS.indexOf(Tracking.REPLACED_POSITIONS); + @Test void testFieldAccess() { TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); - tracking.set(1, 42L); - tracking.set(2, 10L); - tracking.set(3, 11L); - tracking.set(4, 43L); - tracking.set(5, 1000L); + tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); + tracking.set(SNAPSHOT_ID_ORDINAL, 42L); + tracking.set(DATA_SEQUENCE_NUMBER_ORDINAL, 10L); + tracking.set(FILE_SEQUENCE_NUMBER_ORDINAL, 11L); + tracking.set(DV_SNAPSHOT_ID_ORDINAL, 43L); + tracking.set(FIRST_ROW_ID_ORDINAL, 1000L); assertThat(tracking.status()).isEqualTo(EntryStatus.ADDED); assertThat(tracking.snapshotId()).isEqualTo(42L); @@ -54,29 +71,32 @@ void testFieldAccess() { @Test void testCopy() { TrackingStruct tracking = - TrackingStruct.builder() - .status(EntryStatus.ADDED) - .snapshotId(42L) - .dataSequenceNumber(10L) - .deletedPositions(new byte[] {1, 2}) + TrackingStruct.existing(manifestSourceTracking()) + .deletedPositions(ByteBuffer.wrap(new byte[] {1, 2})) + .replacedPositions(ByteBuffer.wrap(new byte[] {3, 4})) .build(); TrackingStruct copy = tracking.copy(); - assertThat(copy.status()).isEqualTo(EntryStatus.ADDED); - assertThat(copy.snapshotId()).isEqualTo(42L); - assertThat(copy.dataSequenceNumber()).isEqualTo(10L); - assertThat(copy.deletedPositions()).isNotNull(); - - // verify deep copy of ByteBuffer - assertThat(copy.deletedPositions()).isNotSameAs(tracking.deletedPositions()); + assertThat(copy.status()).isEqualTo(EntryStatus.EXISTING); + assertThat(copy.snapshotId()).isEqualTo(tracking.snapshotId()); + assertThat(copy.dataSequenceNumber()).isEqualTo(tracking.dataSequenceNumber()); + assertThat(copy.fileSequenceNumber()).isEqualTo(tracking.fileSequenceNumber()); + assertThat(copy.dvSnapshotId()).isNull(); + assertThat(copy.firstRowId()).isEqualTo(tracking.firstRowId()); + assertThat(copy.deletedPositions()).isEqualTo(tracking.deletedPositions()); + assertThat(copy.replacedPositions()).isEqualTo(tracking.replacedPositions()); + + // verify deep copy of ByteBuffer backing arrays + assertThat(copy.deletedPositions().array()).isNotSameAs(tracking.deletedPositions().array()); + assertThat(copy.replacedPositions().array()).isNotSameAs(tracking.replacedPositions().array()); } @ParameterizedTest @EnumSource(EntryStatus.class) void testAllStatuses(EntryStatus status) { TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, status.id()); + tracking.set(STATUS_ORDINAL, status.id()); assertThat(tracking.status()).isEqualTo(status); } @@ -84,22 +104,24 @@ void testAllStatuses(EntryStatus status) { void testIsLive() { TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); + tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); assertThat(tracking.isLive()).isTrue(); - tracking.set(0, EntryStatus.EXISTING.id()); + tracking.set(STATUS_ORDINAL, EntryStatus.EXISTING.id()); assertThat(tracking.isLive()).isTrue(); - tracking.set(0, EntryStatus.DELETED.id()); + tracking.set(STATUS_ORDINAL, EntryStatus.DELETED.id()); assertThat(tracking.isLive()).isFalse(); - tracking.set(0, EntryStatus.REPLACED.id()); + tracking.set(STATUS_ORDINAL, EntryStatus.REPLACED.id()); assertThat(tracking.isLive()).isFalse(); } @Test void testInheritSnapshotId() { - TrackingStruct tracking = TrackingStruct.builder().status(EntryStatus.ADDED).build(); + TrackingStruct tracking = new TrackingStruct(Tracking.schema()); + tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); + tracking.inheritFrom(createManifestTracking(100L, 60L)); // snapshotId is null, should inherit from manifest @@ -108,7 +130,9 @@ void testInheritSnapshotId() { @Test void testInheritSequenceNumberForAddedEntries() { - TrackingStruct tracking = TrackingStruct.builder().status(EntryStatus.ADDED).build(); + TrackingStruct tracking = new TrackingStruct(Tracking.schema()); + tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); + tracking.inheritFrom(createManifestTracking(100L, 60L)); // sequence numbers are null and status is ADDED, should inherit @@ -118,12 +142,11 @@ void testInheritSequenceNumberForAddedEntries() { @Test void testDoNotInheritSequenceNumberForExistingEntries() { - TrackingStruct tracking = - TrackingStruct.builder() - .status(EntryStatus.EXISTING) - .dataSequenceNumber(5L) - .fileSequenceNumber(6L) - .build(); + TrackingStruct tracking = new TrackingStruct(Tracking.schema()); + tracking.set(STATUS_ORDINAL, EntryStatus.EXISTING.id()); + tracking.set(DATA_SEQUENCE_NUMBER_ORDINAL, 5L); + tracking.set(FILE_SEQUENCE_NUMBER_ORDINAL, 6L); + tracking.inheritFrom(createManifestTracking(100L, 60L)); // sequence numbers are not inherited for EXISTING entries @@ -133,13 +156,12 @@ void testDoNotInheritSequenceNumberForExistingEntries() { @Test void testExplicitValuesOverrideInheritance() { - TrackingStruct tracking = - TrackingStruct.builder() - .status(EntryStatus.ADDED) - .snapshotId(200L) - .dataSequenceNumber(75L) - .fileSequenceNumber(76L) - .build(); + TrackingStruct tracking = new TrackingStruct(Tracking.schema()); + tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); + tracking.set(SNAPSHOT_ID_ORDINAL, 200L); + tracking.set(DATA_SEQUENCE_NUMBER_ORDINAL, 75L); + tracking.set(FILE_SEQUENCE_NUMBER_ORDINAL, 76L); + tracking.inheritFrom(createManifestTracking(100L, 60L)); // explicit values should take precedence @@ -151,13 +173,13 @@ void testExplicitValuesOverrideInheritance() { @Test void testInheritFromRejectsUnequalSequenceNumbers() { TrackingStruct tracking = new TrackingStruct(Tracking.schema()); - tracking.set(0, EntryStatus.ADDED.id()); + tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); TrackingStruct manifestTracking = new TrackingStruct(Tracking.schema()); - manifestTracking.set(0, EntryStatus.ADDED.id()); - manifestTracking.set(1, 100L); - manifestTracking.set(2, 50L); - manifestTracking.set(3, 60L); + manifestTracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); + manifestTracking.set(SNAPSHOT_ID_ORDINAL, 100L); + manifestTracking.set(DATA_SEQUENCE_NUMBER_ORDINAL, 50L); + manifestTracking.set(FILE_SEQUENCE_NUMBER_ORDINAL, 60L); assertThatThrownBy(() -> tracking.inheritFrom(manifestTracking)) .isInstanceOf(IllegalArgumentException.class) @@ -166,7 +188,8 @@ void testInheritFromRejectsUnequalSequenceNumbers() { @Test void testNoDefaultingWithoutInheritance() { - TrackingStruct tracking = TrackingStruct.builder().status(EntryStatus.ADDED).build(); + TrackingStruct tracking = new TrackingStruct(Tracking.schema()); + tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); // no inheritance, nulls stay null assertThat(tracking.snapshotId()).isNull(); @@ -175,19 +198,264 @@ void testNoDefaultingWithoutInheritance() { } private static Tracking createManifestTracking(long snapshotId, long sequenceNumber) { - return TrackingStruct.builder() - .status(EntryStatus.ADDED) - .snapshotId(snapshotId) - .dataSequenceNumber(sequenceNumber) - .fileSequenceNumber(sequenceNumber) - .build(); + TrackingStruct tracking = new TrackingStruct(Tracking.schema()); + tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); + tracking.set(SNAPSHOT_ID_ORDINAL, snapshotId); + tracking.set(DATA_SEQUENCE_NUMBER_ORDINAL, sequenceNumber); + tracking.set(FILE_SEQUENCE_NUMBER_ORDINAL, sequenceNumber); + return tracking; } @Test - void testBuilderValidation() { - assertThatThrownBy(() -> TrackingStruct.builder().build()) + void testAddedBuilder() { + TrackingStruct tracking = TrackingStruct.added(42L).dvSnapshotId(42L).build(); + + assertThat(tracking.status()).isEqualTo(EntryStatus.ADDED); + assertThat(tracking.snapshotId()).isEqualTo(42L); + assertThat(tracking.dvSnapshotId()).isEqualTo(42L); + assertThat(tracking.deletedPositions()).isNull(); + assertThat(tracking.replacedPositions()).isNull(); + // sequence numbers and firstRowId remain null; populated by inheritance + assertThat(tracking.dataSequenceNumber()).isNull(); + assertThat(tracking.fileSequenceNumber()).isNull(); + assertThat(tracking.firstRowId()).isNull(); + } + + @Test + void testAddedBuilderRejectsMismatchedDvSnapshotId() { + assertThatThrownBy(() -> TrackingStruct.added(42L).dvSnapshotId(43L)) .isInstanceOf(IllegalArgumentException.class) - .hasMessage("Invalid status: null"); + .hasMessage("Invalid DV snapshot ID for ADDED entry: 43 (must equal entry snapshot ID 42)"); + } + + @Test + void testExistingBuilderPreservesSourceFields() { + Tracking source = sourceTracking(); + + TrackingStruct existing = TrackingStruct.existing(source).build(); + + assertThat(existing.status()).isEqualTo(EntryStatus.EXISTING); + assertThat(existing.snapshotId()).isEqualTo(source.snapshotId()); + assertThat(existing.dataSequenceNumber()).isEqualTo(source.dataSequenceNumber()); + assertThat(existing.fileSequenceNumber()).isEqualTo(source.fileSequenceNumber()); + assertThat(existing.dvSnapshotId()).isEqualTo(source.dvSnapshotId()); + assertThat(existing.firstRowId()).isEqualTo(source.firstRowId()); + } + + @Test + void testDeletedBuilderUpdatesSnapshotIdAndPreservesRest() { + Tracking source = sourceTracking(); + + TrackingStruct deleted = TrackingStruct.deleted(source, 999L).build(); + + assertThat(deleted.status()).isEqualTo(EntryStatus.DELETED); + assertThat(deleted.snapshotId()).isEqualTo(999L); + assertThat(deleted.dataSequenceNumber()).isEqualTo(source.dataSequenceNumber()); + assertThat(deleted.fileSequenceNumber()).isEqualTo(source.fileSequenceNumber()); + assertThat(deleted.dvSnapshotId()).isEqualTo(source.dvSnapshotId()); + assertThat(deleted.firstRowId()).isEqualTo(source.firstRowId()); + } + + @Test + void testReplacedBuilderUpdatesSnapshotIdAndPreservesRest() { + Tracking source = sourceTracking(); + + TrackingStruct replaced = TrackingStruct.replaced(source, 999L).build(); + + assertThat(replaced.status()).isEqualTo(EntryStatus.REPLACED); + assertThat(replaced.snapshotId()).isEqualTo(999L); + assertThat(replaced.dataSequenceNumber()).isEqualTo(source.dataSequenceNumber()); + assertThat(replaced.fileSequenceNumber()).isEqualTo(source.fileSequenceNumber()); + assertThat(replaced.dvSnapshotId()).isEqualTo(source.dvSnapshotId()); + assertThat(replaced.firstRowId()).isEqualTo(source.firstRowId()); + } + + @Test + void testSourceDvPositionsAreNotCarriedForward() { + TrackingStruct source = sourceTracking(); + source.set(DELETED_POSITIONS_ORDINAL, ByteBuffer.wrap(new byte[] {1, 2})); + source.set(REPLACED_POSITIONS_ORDINAL, ByteBuffer.wrap(new byte[] {3, 4})); + + TrackingStruct existing = TrackingStruct.existing(source).build(); + assertThat(existing.deletedPositions()).isNull(); + assertThat(existing.replacedPositions()).isNull(); + + TrackingStruct deleted = TrackingStruct.deleted(source, 999L).build(); + assertThat(deleted.deletedPositions()).isNull(); + assertThat(deleted.replacedPositions()).isNull(); + + TrackingStruct replaced = TrackingStruct.replaced(source, 999L).build(); + assertThat(replaced.deletedPositions()).isNull(); + assertThat(replaced.replacedPositions()).isNull(); + } + + @Test + void testExistingBuilderAllowsDvMutation() { + TrackingStruct existing = TrackingStruct.existing(sourceTracking()).dvSnapshotId(999L).build(); + assertThat(existing.dvSnapshotId()).isEqualTo(999L); + } + + @Test + void testDeletedBuilderRejectsMutators() { + Tracking source = sourceTracking(); + + assertThatThrownBy(() -> TrackingStruct.deleted(source, 1L).dvSnapshotId(123L)) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Cannot set DV snapshot ID on DELETED entry"); + + assertThatThrownBy( + () -> + TrackingStruct.deleted(source, 1L) + .deletedPositions(ByteBuffer.wrap(new byte[] {1}))) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Cannot set deleted positions on DELETED entry"); + + assertThatThrownBy( + () -> + TrackingStruct.deleted(source, 1L) + .replacedPositions(ByteBuffer.wrap(new byte[] {1}))) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Cannot set replaced positions on DELETED entry"); + } + + @Test + void testMdvMutatorsRejectedOnAdded() { + assertThatThrownBy( + () -> TrackingStruct.added(42L).deletedPositions(ByteBuffer.wrap(new byte[] {1}))) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Cannot set deleted positions on ADDED entry"); + + assertThatThrownBy( + () -> TrackingStruct.added(42L).replacedPositions(ByteBuffer.wrap(new byte[] {1}))) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Cannot set replaced positions on ADDED entry"); + } + + @Test + void testMdvMutatorsRejectedOnReplaced() { + Tracking source = sourceTracking(); + + assertThatThrownBy( + () -> + TrackingStruct.replaced(source, 1L) + .deletedPositions(ByteBuffer.wrap(new byte[] {1}))) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Cannot set deleted positions on REPLACED entry"); + + assertThatThrownBy( + () -> + TrackingStruct.replaced(source, 1L) + .replacedPositions(ByteBuffer.wrap(new byte[] {1}))) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Cannot set replaced positions on REPLACED entry"); + } + + @Test + void testDvSnapshotIdRejectedOnReplaced() { + assertThatThrownBy(() -> TrackingStruct.replaced(sourceTracking(), 1L).dvSnapshotId(123L)) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Cannot set DV snapshot ID on REPLACED entry"); + } + + @Test + void testDvSnapshotIdAndMdvPositionsAreMutuallyExclusive() { + // sourceTracking has dvSnapshotId=43, inherited by existing(source) + assertThatThrownBy( + () -> + TrackingStruct.existing(sourceTracking()) + .deletedPositions(ByteBuffer.wrap(new byte[] {1}))) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Cannot set deleted positions on a data file entry (DV snapshot ID is set)"); + + assertThatThrownBy( + () -> + TrackingStruct.existing(sourceTracking()) + .replacedPositions(ByteBuffer.wrap(new byte[] {1}))) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Cannot set replaced positions on a data file entry (DV snapshot ID is set)"); + + // Setting MDV positions first then dvSnapshotId is also rejected + assertThatThrownBy( + () -> + TrackingStruct.existing(manifestSourceTracking()) + .deletedPositions(ByteBuffer.wrap(new byte[] {1})) + .dvSnapshotId(123L)) + .isInstanceOf(IllegalStateException.class) + .hasMessage( + "Cannot set DV snapshot ID on a manifest entry (deleted/replaced positions are set)"); + } + + @Test + void testBuilderRejectsNullSource() { + assertThatThrownBy(() -> TrackingStruct.existing(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid source tracking: null"); + + assertThatThrownBy(() -> TrackingStruct.deleted(null, 1L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid source tracking: null"); + + assertThatThrownBy(() -> TrackingStruct.replaced(null, 1L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid source tracking: null"); + } + + @Test + void testSourceBuildersRejectSourceWithoutSequenceNumbers() { + Tracking missingBoth = TrackingStruct.added(42L).build(); + + assertThatThrownBy(() -> TrackingStruct.existing(missingBoth)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid tracking source: data sequence number is null"); + + assertThatThrownBy(() -> TrackingStruct.deleted(missingBoth, 1L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid tracking source: data sequence number is null"); + + assertThatThrownBy(() -> TrackingStruct.replaced(missingBoth, 1L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid tracking source: data sequence number is null"); + + TrackingStruct missingFileSeq = new TrackingStruct(Tracking.schema()); + missingFileSeq.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); + missingFileSeq.set(SNAPSHOT_ID_ORDINAL, 42L); + missingFileSeq.set(DATA_SEQUENCE_NUMBER_ORDINAL, 10L); + + assertThatThrownBy(() -> TrackingStruct.existing(missingFileSeq)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid tracking source: file sequence number is null"); + + assertThatThrownBy(() -> TrackingStruct.deleted(missingFileSeq, 1L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid tracking source: file sequence number is null"); + + assertThatThrownBy(() -> TrackingStruct.replaced(missingFileSeq, 1L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid tracking source: file sequence number is null"); + } + + @Test + void testRejectsTransitionsFromTerminalStatus() { + Tracking deletedSource = sourceTrackingWithStatus(EntryStatus.DELETED); + Tracking replacedSource = sourceTrackingWithStatus(EntryStatus.REPLACED); + + assertThatThrownBy(() -> TrackingStruct.existing(deletedSource)) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Invalid status transition: DELETED -> EXISTING (DELETED is terminal)"); + + assertThatThrownBy(() -> TrackingStruct.deleted(replacedSource, 1L)) + .isInstanceOf(IllegalStateException.class) + .hasMessage("Invalid status transition: REPLACED -> DELETED (REPLACED is terminal)"); + } + + @Test + void testExistingToExistingIsAllowed() { + Tracking existingSource = sourceTrackingWithStatus(EntryStatus.EXISTING); + + TrackingStruct existing = TrackingStruct.existing(existingSource).build(); + + assertThat(existing.status()).isEqualTo(EntryStatus.EXISTING); + assertThat(existing.snapshotId()).isEqualTo(existingSource.snapshotId()); } @Test @@ -210,38 +478,85 @@ void testProjectedStructLike() { } @Test - void testJavaSerializationRoundTrip() throws IOException, ClassNotFoundException { - TrackingStruct tracking = - TrackingStruct.builder() - .status(EntryStatus.ADDED) - .snapshotId(42L) - .dataSequenceNumber(10L) - .deletedPositions(new byte[] {1, 2}) - .build(); + void testAddedWithDvSnapshotIdJavaSerializationRoundTrip() + throws IOException, ClassNotFoundException { + TrackingStruct tracking = TrackingStruct.added(42L).dvSnapshotId(42L).build(); TrackingStruct deserialized = TestHelpers.roundTripSerialize(tracking); assertThat(deserialized.status()).isEqualTo(EntryStatus.ADDED); assertThat(deserialized.snapshotId()).isEqualTo(42L); - assertThat(deserialized.dataSequenceNumber()).isEqualTo(10L); - assertThat(deserialized.deletedPositions()).isEqualTo(ByteBuffer.wrap(new byte[] {1, 2})); + assertThat(deserialized.dvSnapshotId()).isEqualTo(42L); + assertThat(deserialized.deletedPositions()).isNull(); + assertThat(deserialized.replacedPositions()).isNull(); } @Test - void testKryoSerializationRoundTrip() throws IOException { + void testExistingWithMdvPositionsJavaSerializationRoundTrip() + throws IOException, ClassNotFoundException { TrackingStruct tracking = - TrackingStruct.builder() - .status(EntryStatus.ADDED) - .snapshotId(42L) - .dataSequenceNumber(10L) - .deletedPositions(new byte[] {1, 2}) + TrackingStruct.existing(manifestSourceTracking()) + .deletedPositions(ByteBuffer.wrap(new byte[] {1, 2})) + .replacedPositions(ByteBuffer.wrap(new byte[] {3, 4})) .build(); + TrackingStruct deserialized = TestHelpers.roundTripSerialize(tracking); + + assertThat(deserialized.status()).isEqualTo(EntryStatus.EXISTING); + assertThat(deserialized.dvSnapshotId()).isNull(); + assertThat(deserialized.deletedPositions()).isEqualTo(ByteBuffer.wrap(new byte[] {1, 2})); + assertThat(deserialized.replacedPositions()).isEqualTo(ByteBuffer.wrap(new byte[] {3, 4})); + } + + @Test + void testAddedWithDvSnapshotIdKryoSerializationRoundTrip() throws IOException { + TrackingStruct tracking = TrackingStruct.added(42L).dvSnapshotId(42L).build(); + TrackingStruct deserialized = TestHelpers.KryoHelpers.roundTripSerialize(tracking); assertThat(deserialized.status()).isEqualTo(EntryStatus.ADDED); assertThat(deserialized.snapshotId()).isEqualTo(42L); - assertThat(deserialized.dataSequenceNumber()).isEqualTo(10L); + assertThat(deserialized.dvSnapshotId()).isEqualTo(42L); + assertThat(deserialized.deletedPositions()).isNull(); + assertThat(deserialized.replacedPositions()).isNull(); + } + + @Test + void testExistingWithMdvPositionsKryoSerializationRoundTrip() throws IOException { + TrackingStruct tracking = + TrackingStruct.existing(manifestSourceTracking()) + .deletedPositions(ByteBuffer.wrap(new byte[] {1, 2})) + .replacedPositions(ByteBuffer.wrap(new byte[] {3, 4})) + .build(); + + TrackingStruct deserialized = TestHelpers.KryoHelpers.roundTripSerialize(tracking); + + assertThat(deserialized.status()).isEqualTo(EntryStatus.EXISTING); + assertThat(deserialized.dvSnapshotId()).isNull(); assertThat(deserialized.deletedPositions()).isEqualTo(ByteBuffer.wrap(new byte[] {1, 2})); + assertThat(deserialized.replacedPositions()).isEqualTo(ByteBuffer.wrap(new byte[] {3, 4})); + } + + private static TrackingStruct sourceTracking() { + TrackingStruct tracking = new TrackingStruct(Tracking.schema()); + tracking.set(STATUS_ORDINAL, EntryStatus.ADDED.id()); + tracking.set(SNAPSHOT_ID_ORDINAL, 42L); + tracking.set(DATA_SEQUENCE_NUMBER_ORDINAL, 10L); + tracking.set(FILE_SEQUENCE_NUMBER_ORDINAL, 10L); + tracking.set(DV_SNAPSHOT_ID_ORDINAL, 43L); + tracking.set(FIRST_ROW_ID_ORDINAL, 1000L); + return tracking; + } + + private static TrackingStruct sourceTrackingWithStatus(EntryStatus status) { + TrackingStruct tracking = sourceTracking(); + tracking.set(STATUS_ORDINAL, status.id()); + return tracking; + } + + private static TrackingStruct manifestSourceTracking() { + TrackingStruct tracking = sourceTracking(); + tracking.set(DV_SNAPSHOT_ID_ORDINAL, null); + return tracking; } }