-
Notifications
You must be signed in to change notification settings - Fork 3.2k
Core: Add builders for v4 structs #16092
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
35926a9
7ee9241
2da1af6
6363aef
0cf56f6
d453f43
f802f85
37f114a
4cd8c8a
42173ee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ | |
| import java.io.Serializable; | ||
| import org.apache.iceberg.avro.SupportsIndexProjection; | ||
| import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
| import org.apache.iceberg.types.Types; | ||
|
|
||
| /** Mutable {@link StructLike} implementation of {@link DeletionVector}. */ | ||
|
|
@@ -49,6 +50,14 @@ private DeletionVectorStruct(DeletionVectorStruct toCopy) { | |
| this.cardinality = toCopy.cardinality; | ||
| } | ||
|
|
||
| private DeletionVectorStruct(String location, long offset, long sizeInBytes, long cardinality) { | ||
| super(BASE_TYPE, BASE_TYPE); | ||
| this.location = location; | ||
| this.offset = offset; | ||
| this.sizeInBytes = sizeInBytes; | ||
| this.cardinality = cardinality; | ||
| } | ||
|
|
||
| @Override | ||
| public String location() { | ||
| return location; | ||
|
|
@@ -115,6 +124,10 @@ protected <T> void internalSet(int pos, T value) { | |
| } | ||
| } | ||
|
|
||
| static Builder builder() { | ||
| return new Builder(); | ||
| } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return MoreObjects.toStringHelper(this) | ||
|
|
@@ -124,4 +137,41 @@ public String toString() { | |
| .add("cardinality", cardinality) | ||
| .toString(); | ||
| } | ||
|
|
||
|
anoopj marked this conversation as resolved.
|
||
| static class Builder { | ||
| private String location = null; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are these all valid values? because we're currently not preventing the builder from creating an instance where e.g. only
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is not a valid value. I was initially on the fence about adding validations because the builders are only used for explicit construction (mainly in tests, and not in the read/write path, which uses But adding validation in build() makes sense. I'll add checks that required fields have been set, along with tests. |
||
| private long offset = -1L; | ||
| private long sizeInBytes = -1L; | ||
| private long cardinality = -1L; | ||
|
|
||
| Builder location(String dvLocation) { | ||
| this.location = dvLocation; | ||
| return this; | ||
| } | ||
|
|
||
| Builder offset(long dvOffset) { | ||
| this.offset = dvOffset; | ||
| return this; | ||
| } | ||
|
|
||
| Builder sizeInBytes(long dvSizeInBytes) { | ||
| this.sizeInBytes = dvSizeInBytes; | ||
| return this; | ||
| } | ||
|
|
||
| Builder cardinality(long dvCardinality) { | ||
| this.cardinality = dvCardinality; | ||
| return this; | ||
| } | ||
|
|
||
| DeletionVectorStruct build() { | ||
| Preconditions.checkArgument(location != null, "Invalid location: null"); | ||
| Preconditions.checkArgument(offset >= 0, "Invalid offset: %s (must be >= 0)", offset); | ||
| Preconditions.checkArgument( | ||
| sizeInBytes >= 0, "Invalid size in bytes: %s (must be >= 0)", sizeInBytes); | ||
| Preconditions.checkArgument( | ||
| cardinality >= 0, "Invalid cardinality: %s (must be >= 0)", cardinality); | ||
| return new DeletionVectorStruct(location, offset, sizeInBytes, cardinality); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,7 @@ | |
| import java.util.Arrays; | ||
| import org.apache.iceberg.avro.SupportsIndexProjection; | ||
| import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
| import org.apache.iceberg.types.Types; | ||
| import org.apache.iceberg.util.ByteBuffers; | ||
|
|
||
|
|
@@ -73,6 +74,32 @@ private ManifestInfoStruct(ManifestInfoStruct toCopy) { | |
| this.dvCardinality = toCopy.dvCardinality; | ||
| } | ||
|
|
||
| private ManifestInfoStruct( | ||
| int addedFilesCount, | ||
| int existingFilesCount, | ||
| int deletedFilesCount, | ||
| int replacedFilesCount, | ||
| long addedRowsCount, | ||
| long existingRowsCount, | ||
| long deletedRowsCount, | ||
| long replacedRowsCount, | ||
| long minSequenceNumber, | ||
| byte[] dv, | ||
| Long dvCardinality) { | ||
| super(BASE_TYPE, BASE_TYPE); | ||
| this.addedFilesCount = addedFilesCount; | ||
| this.existingFilesCount = existingFilesCount; | ||
| this.deletedFilesCount = deletedFilesCount; | ||
| this.replacedFilesCount = replacedFilesCount; | ||
| this.addedRowsCount = addedRowsCount; | ||
| this.existingRowsCount = existingRowsCount; | ||
| this.deletedRowsCount = deletedRowsCount; | ||
| this.replacedRowsCount = replacedRowsCount; | ||
| this.minSequenceNumber = minSequenceNumber; | ||
| this.dv = dv; | ||
| this.dvCardinality = dvCardinality; | ||
| } | ||
|
|
||
| @Override | ||
| public int addedFilesCount() { | ||
| return addedFilesCount; | ||
|
|
@@ -208,6 +235,10 @@ protected <T> void internalSet(int pos, T value) { | |
| } | ||
| } | ||
|
|
||
| static Builder builder() { | ||
| return new Builder(); | ||
| } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return MoreObjects.toStringHelper(this) | ||
|
|
@@ -224,4 +255,126 @@ public String toString() { | |
| .add("dv_cardinality", dvCardinality == null ? "null" : dvCardinality) | ||
| .toString(); | ||
| } | ||
|
|
||
| static class Builder { | ||
| private int addedFilesCount = -1; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as in the other builder. Is -1 a valid value across all of these? If not, then we need to add some validation checks inside the build() method
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above. It is not a valid value - added validation |
||
| private int existingFilesCount = -1; | ||
| private int deletedFilesCount = -1; | ||
| private int replacedFilesCount = -1; | ||
| private long addedRowsCount = -1L; | ||
| private long existingRowsCount = -1L; | ||
| private long deletedRowsCount = -1L; | ||
| private long replacedRowsCount = -1L; | ||
| private long minSequenceNumber = -1L; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in other places we default
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok then let's go with -1 as the default here, thanks for confirming |
||
| private byte[] dv = null; | ||
| private Long dvCardinality = null; | ||
|
|
||
| Builder addedFilesCount(int count) { | ||
| this.addedFilesCount = count; | ||
| return this; | ||
| } | ||
|
|
||
| Builder existingFilesCount(int count) { | ||
| this.existingFilesCount = count; | ||
| return this; | ||
| } | ||
|
|
||
| Builder deletedFilesCount(int count) { | ||
| this.deletedFilesCount = count; | ||
| return this; | ||
| } | ||
|
|
||
| Builder replacedFilesCount(int count) { | ||
| this.replacedFilesCount = count; | ||
| return this; | ||
| } | ||
|
|
||
| Builder addedRowsCount(long count) { | ||
| this.addedRowsCount = count; | ||
| return this; | ||
| } | ||
|
|
||
| Builder existingRowsCount(long count) { | ||
| this.existingRowsCount = count; | ||
| return this; | ||
| } | ||
|
|
||
| Builder deletedRowsCount(long count) { | ||
| this.deletedRowsCount = count; | ||
| return this; | ||
| } | ||
|
|
||
| Builder replacedRowsCount(long count) { | ||
| this.replacedRowsCount = count; | ||
| return this; | ||
| } | ||
|
|
||
| Builder minSequenceNumber(long sequenceNumber) { | ||
| this.minSequenceNumber = sequenceNumber; | ||
| return this; | ||
| } | ||
|
|
||
| Builder dv(ByteBuffer buffer) { | ||
|
stevenzwu marked this conversation as resolved.
|
||
| this.dv = buffer != null ? ByteBuffers.toByteArray(buffer) : null; | ||
| return this; | ||
| } | ||
|
|
||
| Builder dv(byte[] buffer) { | ||
| this.dv = buffer; | ||
| return this; | ||
| } | ||
|
|
||
| Builder dvCardinality(Long cardinality) { | ||
| this.dvCardinality = cardinality; | ||
| return this; | ||
| } | ||
|
|
||
| ManifestInfoStruct build() { | ||
| Preconditions.checkArgument( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For builders, we usually fail as early as possible. For this, it means moving these checks into Another good side effect is the caller gets a stack trace with I think the rationale for adding the checks here was that we use ManifestInfoStruct.builder().build()This fails with
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense. Fixing it in #16408 |
||
| addedFilesCount >= 0, "Invalid added files count: %s (must be >= 0)", addedFilesCount); | ||
| Preconditions.checkArgument( | ||
| existingFilesCount >= 0, | ||
| "Invalid existing files count: %s (must be >= 0)", | ||
| existingFilesCount); | ||
| Preconditions.checkArgument( | ||
| deletedFilesCount >= 0, | ||
| "Invalid deleted files count: %s (must be >= 0)", | ||
| deletedFilesCount); | ||
| Preconditions.checkArgument( | ||
| replacedFilesCount >= 0, | ||
| "Invalid replaced files count: %s (must be >= 0)", | ||
| replacedFilesCount); | ||
| Preconditions.checkArgument( | ||
| addedRowsCount >= 0, "Invalid added rows count: %s (must be >= 0)", addedRowsCount); | ||
| Preconditions.checkArgument( | ||
| existingRowsCount >= 0, | ||
| "Invalid existing rows count: %s (must be >= 0)", | ||
| existingRowsCount); | ||
| Preconditions.checkArgument( | ||
| deletedRowsCount >= 0, "Invalid deleted rows count: %s (must be >= 0)", deletedRowsCount); | ||
| Preconditions.checkArgument( | ||
| replacedRowsCount >= 0, | ||
| "Invalid replaced rows count: %s (must be >= 0)", | ||
| replacedRowsCount); | ||
| Preconditions.checkArgument( | ||
| minSequenceNumber >= 0, | ||
| "Invalid min sequence number: %s (must be >= 0)", | ||
| minSequenceNumber); | ||
| Preconditions.checkArgument( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A check that the dvCardinality is positive?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
| (dv == null) == (dvCardinality == null), | ||
| "Invalid DV and cardinality: must both be null or non-null"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the kind of consistency check that belongs in the builder. Is it okay to set cardinality to 0? Maybe it doesn't matter because I think we're removing the MDV cardinality.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a validation on dvCardinality for now. |
||
| return new ManifestInfoStruct( | ||
| addedFilesCount, | ||
| existingFilesCount, | ||
| deletedFilesCount, | ||
| replacedFilesCount, | ||
| addedRowsCount, | ||
| existingRowsCount, | ||
| deletedRowsCount, | ||
| replacedRowsCount, | ||
| minSequenceNumber, | ||
| dv, | ||
| dvCardinality); | ||
|
anoopj marked this conversation as resolved.
|
||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -59,10 +59,6 @@ class TrackingStruct extends SupportsIndexProjection implements Tracking, Serial | |
| super(BASE_TYPE, type); | ||
| } | ||
|
|
||
| TrackingStruct() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why was this constructor removed? Isn't it needed?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need it for the serialization path. Added back. |
||
| super(BASE_TYPE.fields().size()); | ||
| } | ||
|
|
||
| private TrackingStruct(TrackingStruct toCopy) { | ||
| super(toCopy); | ||
| this.status = toCopy.status; | ||
|
|
@@ -83,6 +79,26 @@ private TrackingStruct(TrackingStruct toCopy) { | |
| this.manifestPos = toCopy.manifestPos; | ||
| } | ||
|
|
||
| private TrackingStruct( | ||
| EntryStatus status, | ||
| Long snapshotId, | ||
| Long dataSequenceNumber, | ||
| Long fileSequenceNumber, | ||
| Long dvSnapshotId, | ||
| Long firstRowId, | ||
| byte[] deletedPositions, | ||
| byte[] replacedPositions) { | ||
| super(BASE_TYPE, BASE_TYPE); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't this going to compare
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thats correct. Fixed. |
||
| this.status = status; | ||
| this.snapshotId = snapshotId; | ||
| this.dataSequenceNumber = dataSequenceNumber; | ||
| this.fileSequenceNumber = fileSequenceNumber; | ||
| this.dvSnapshotId = dvSnapshotId; | ||
| this.firstRowId = firstRowId; | ||
| this.deletedPositions = deletedPositions; | ||
| this.replacedPositions = replacedPositions; | ||
| } | ||
|
|
||
| void inheritFrom(Tracking manifestTracking) { | ||
| if (manifestTracking != null) { | ||
| if (snapshotId == null) { | ||
|
|
@@ -233,6 +249,10 @@ protected <T> void internalSet(int pos, T value) { | |
| } | ||
| } | ||
|
|
||
| static Builder builder() { | ||
| return new Builder(); | ||
| } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return MoreObjects.toStringHelper(this) | ||
|
|
@@ -246,4 +266,78 @@ public String toString() { | |
| .add("replaced_positions", replacedPositions == null ? "null" : "(binary)") | ||
| .toString(); | ||
| } | ||
|
|
||
| static class Builder { | ||
| private EntryStatus status = null; | ||
| private Long snapshotId = null; | ||
| private Long dataSequenceNumber = null; | ||
| private Long fileSequenceNumber = null; | ||
| private Long dvSnapshotId = null; | ||
| private Long firstRowId = null; | ||
|
nastra marked this conversation as resolved.
|
||
| private byte[] deletedPositions = null; | ||
| private byte[] replacedPositions = null; | ||
|
|
||
| Builder status(EntryStatus entryStatus) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Entry status has a specific lifecycle: when files are written, they are I think that it makes more sense to have the builder handle these cases than to allow creating possibly misconfigured Builder added(long snapshotId); // creates a new Tracking for an addition
Builder existing(Tracking); // creates Tracking for an existing file based on Tracking read from a manifest
Builder deleted(Tracking, long snapshotId); // creates Tracking to delete a filePassing the existing tracking in makes it so the builder can keep values that must be preserved. And this approach allows the builder to use the initial intent (added/existing/deleted) to perform better validation. For instance, you can't call I should also note that a builder may not be the right way to create
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just wrote this elsewhere and I think it is a good addition to understand the why for my comment above: We don't just want to validate that a single
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Restructured along those lines. The common |
||
| this.status = entryStatus; | ||
| return this; | ||
| } | ||
|
|
||
| Builder snapshotId(Long id) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. Made it |
||
| this.snapshotId = id; | ||
| return this; | ||
| } | ||
|
|
||
| Builder dataSequenceNumber(Long sequenceNumber) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I already commented on another review: #16100 (comment) To summarize, I don't think that this method or
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed both |
||
| this.dataSequenceNumber = sequenceNumber; | ||
| return this; | ||
| } | ||
|
|
||
| Builder fileSequenceNumber(Long sequenceNumber) { | ||
| this.fileSequenceNumber = sequenceNumber; | ||
| return this; | ||
| } | ||
|
|
||
| Builder dvSnapshotId(Long id) { | ||
| this.dvSnapshotId = id; | ||
| return this; | ||
| } | ||
|
|
||
| Builder firstRowId(Long rowId) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is another one that should never be set through the builder. It can only be set through inheritance.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed. |
||
| this.firstRowId = rowId; | ||
| return this; | ||
| } | ||
|
|
||
| Builder deletedPositions(ByteBuffer positions) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we have an overloaded method that takes byte[] instead of the ByteBuffer? Seems like all calling sites wrap a byte[] into a ByteBuffer just to extract it here again
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense. Done |
||
| this.deletedPositions = positions != null ? ByteBuffers.toByteArray(positions) : null; | ||
| return this; | ||
| } | ||
|
|
||
| Builder deletedPositions(byte[] positions) { | ||
| this.deletedPositions = positions; | ||
| return this; | ||
| } | ||
|
|
||
| Builder replacedPositions(ByteBuffer positions) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems to me that this is premature. We should know whether we are going to use
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dropped the |
||
| this.replacedPositions = positions != null ? ByteBuffers.toByteArray(positions) : null; | ||
| return this; | ||
| } | ||
|
|
||
| Builder replacedPositions(byte[] positions) { | ||
| this.replacedPositions = positions; | ||
| return this; | ||
| } | ||
|
|
||
| TrackingStruct build() { | ||
| Preconditions.checkArgument(status != null, "Invalid status: null"); | ||
| return new TrackingStruct( | ||
| status, | ||
| snapshotId, | ||
| dataSequenceNumber, | ||
| fileSequenceNumber, | ||
| dvSnapshotId, | ||
| firstRowId, | ||
| deletedPositions, | ||
| replacedPositions); | ||
| } | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.