diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index e6122f062c38..a6a33eafd4d9 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -52,6 +52,31 @@ pub(crate) fn int_size(v: usize) -> OffsetSizeBytes { } } +const ONE_TOP_LEVEL_VALUE_MSG: &str = + "VariantBuilder already contains a top-level variant value; only one is allowed"; +const EMPTY_BUILDER_MSG: &str = + "VariantBuilder is empty; append a top-level value before calling finish()"; + +// Error/panic construction is kept out-of-line and cold so the per-call guards on the +// builder hot paths stay small enough to inline as a single predictable branch. +#[cold] +#[inline(never)] +fn top_level_value_panic() -> ! { + panic!("{ONE_TOP_LEVEL_VALUE_MSG}"); +} + +#[cold] +#[inline(never)] +fn top_level_value_error() -> ArrowError { + ArrowError::InvalidArgumentError(ONE_TOP_LEVEL_VALUE_MSG.into()) +} + +#[cold] +#[inline(never)] +fn empty_builder_error() -> ArrowError { + ArrowError::InvalidArgumentError(EMPTY_BUILDER_MSG.into()) +} + /// Wrapper around a `Vec` that provides methods for appending /// primitive values, variant types, and metadata. /// @@ -792,30 +817,85 @@ impl VariantBuilder { self.metadata_builder.upsert_field_name(field_name); } + /// Returns true if a top-level variant value has already been written to this builder. + /// + /// A [`VariantBuilder`] holds exactly one top-level variant value. Any committed top-level + /// value leaves bytes in the value buffer; a child builder dropped without `finish()` has + /// its bytes rolled back by [`ParentState`], so the offset is a faithful indicator. + #[inline] + fn has_top_level_value(&self) -> bool { + self.value_builder.offset() != 0 + } + + #[inline] + fn ensure_no_top_level_value(&self) { + if self.has_top_level_value() { + top_level_value_panic(); + } + } + + #[inline] + fn check_no_top_level_value(&self) -> Result<(), ArrowError> { + if self.has_top_level_value() { + return Err(top_level_value_error()); + } + Ok(()) + } + /// Create an [`ListBuilder`] for creating [`Variant::List`] values. /// /// See the examples on [`VariantBuilder`] for usage. + /// + /// # Panics + /// + /// Panics if a top-level variant value has already been written to this builder. For a + /// fallible version, use [`VariantBuilder::try_new_list`]. pub fn new_list(&mut self) -> ListBuilder<'_, ()> { + self.try_new_list().unwrap() + } + + /// Create an [`ListBuilder`] for creating [`Variant::List`] values. + /// + /// Returns an error if a top-level variant value has already been written to this builder. + pub fn try_new_list(&mut self) -> Result, ArrowError> { + self.check_no_top_level_value()?; let parent_state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder); - ListBuilder::new(parent_state, self.validate_unique_fields) + Ok(ListBuilder::new(parent_state, self.validate_unique_fields)) } /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values. /// /// See the examples on [`VariantBuilder`] for usage. + /// + /// # Panics + /// + /// Panics if a top-level variant value has already been written to this builder. For a + /// fallible version, use [`VariantBuilder::try_new_object`]. pub fn new_object(&mut self) -> ObjectBuilder<'_, ()> { + self.try_new_object().unwrap() + } + + /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values. + /// + /// Returns an error if a top-level variant value has already been written to this builder. + pub fn try_new_object(&mut self) -> Result, ArrowError> { + self.check_no_top_level_value()?; let parent_state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder); - ObjectBuilder::new(parent_state, self.validate_unique_fields) + Ok(ObjectBuilder::new( + parent_state, + self.validate_unique_fields, + )) } /// Append a value to the builder. /// /// # Panics /// - /// This method will panic if the variant contains duplicate field names in objects - /// when validation is enabled. For a fallible version, use [`VariantBuilder::try_append_value`] + /// Panics if a top-level variant value has already been written to this builder, or if the + /// variant contains duplicate field names in objects when validation is enabled. For a + /// fallible version, use [`VariantBuilder::try_append_value`]. /// /// # Example /// ``` @@ -825,15 +905,19 @@ impl VariantBuilder { /// builder.append_value(42i8); /// ``` pub fn append_value<'m, 'd, T: Into>>(&mut self, value: T) { + self.ensure_no_top_level_value(); let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder); ValueBuilder::append_variant(state, value.into()) } /// Append a value to the builder. + /// + /// Returns an error if a top-level variant value has already been written to this builder. pub fn try_append_value<'m, 'd, T: Into>>( &mut self, value: T, ) -> Result<(), ArrowError> { + self.check_no_top_level_value()?; let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder); ValueBuilder::try_append_variant(state, value.into()) } @@ -846,18 +930,51 @@ impl VariantBuilder { /// /// The caller must ensure that the metadata dictionary entries are already built and correct for /// any objects or lists being appended. + /// + /// # Panics + /// + /// Panics if a top-level variant value has already been written to this builder. For a + /// fallible version, use [`VariantBuilder::try_append_value_bytes`]. pub fn append_value_bytes<'m, 'd>(&mut self, value: impl Into>) { + self.try_append_value_bytes(value).unwrap() + } + + /// Tries to append a variant value to the builder by copying raw bytes when possible. + /// + /// This is the fallible version of [`VariantBuilder::append_value_bytes`]. Returns an error + /// if a top-level variant value has already been written to this builder. + pub fn try_append_value_bytes<'m, 'd>( + &mut self, + value: impl Into>, + ) -> Result<(), ArrowError> { + self.check_no_top_level_value()?; let state = ParentState::variant(&mut self.value_builder, &mut self.metadata_builder); ValueBuilder::append_variant_bytes(state, value.into()); + Ok(()) + } + + /// Finish the builder and return the metadata and value buffers. + /// + /// # Panics + /// + /// Panics if no top-level variant value has been appended. For a fallible version, use + /// [`VariantBuilder::try_finish`]. + pub fn finish(self) -> (Vec, Vec) { + self.try_finish().expect(EMPTY_BUILDER_MSG) } /// Finish the builder and return the metadata and value buffers. - pub fn finish(mut self) -> (Vec, Vec) { + /// + /// Returns an error if no top-level variant value has been appended. + pub fn try_finish(mut self) -> Result<(Vec, Vec), ArrowError> { + if !self.has_top_level_value() { + return Err(empty_builder_error()); + } self.metadata_builder.finish(); - ( + Ok(( self.metadata_builder.into_inner(), self.value_builder.into_inner(), - ) + )) } } @@ -915,11 +1032,11 @@ impl VariantBuilderExt for VariantBuilder { } fn try_new_list(&mut self) -> Result>, ArrowError> { - Ok(self.new_list()) + self.try_new_list() } fn try_new_object(&mut self) -> Result>, ArrowError> { - Ok(self.new_object()) + self.try_new_object() } } @@ -1044,14 +1161,9 @@ mod tests { variant2.add_field_name("a"); assert!(!variant2.metadata_builder.is_sorted); - // per the spec, make sure the variant will fail to build if only metadata is provided - let (m, v) = variant2.finish(); - let res = Variant::try_new(&m, &v); - assert!(res.is_err()); - - // since it is not sorted, make sure the metadata says so - let header = VariantMetadata::try_new(&m).unwrap(); - assert!(!header.is_sorted()); + // per the spec, a variant must have a top-level value; try_finish() rejects empty + let err = variant2.try_finish().unwrap_err(); + assert!(err.to_string().contains("empty"), "unexpected error: {err}"); } // write out variant1 and make sure the sorted flag is properly encoded diff --git a/parquet-variant/src/builder/metadata.rs b/parquet-variant/src/builder/metadata.rs index efccc2e4c63e..802d0904c1c8 100644 --- a/parquet-variant/src/builder/metadata.rs +++ b/parquet-variant/src/builder/metadata.rs @@ -268,7 +268,7 @@ impl> Extend for WritableMetadataBuilder { #[cfg(test)] mod test { use crate::{ - ParentState, ValueBuilder, Variant, VariantBuilder, VariantMetadata, + ParentState, ValueBuilder, Variant, VariantMetadata, builder::{ metadata::{ReadOnlyMetadataBuilder, WritableMetadataBuilder}, object::ObjectBuilder, @@ -359,11 +359,12 @@ mod test { #[test] fn test_read_only_metadata_builder() { // First create some metadata with a few field names - let mut default_builder = VariantBuilder::new(); - default_builder.add_field_name("name"); - default_builder.add_field_name("age"); - default_builder.add_field_name("active"); - let (metadata_bytes, _) = default_builder.finish(); + let mut default_builder = WritableMetadataBuilder::default(); + default_builder.upsert_field_name("name"); + default_builder.upsert_field_name("age"); + default_builder.upsert_field_name("active"); + default_builder.finish(); + let metadata_bytes = default_builder.into_inner(); // Use the metadata to build new variant values let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap(); @@ -394,9 +395,10 @@ mod test { #[test] fn test_read_only_metadata_builder_fails_on_unknown_field() { // Create metadata with only one field - let mut default_builder = VariantBuilder::new(); - default_builder.add_field_name("known_field"); - let (metadata_bytes, _) = default_builder.finish(); + let mut default_builder = WritableMetadataBuilder::default(); + default_builder.upsert_field_name("known_field"); + default_builder.finish(); + let metadata_bytes = default_builder.into_inner(); // Use the metadata to build new variant values let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap(); diff --git a/parquet-variant/src/builder/object.rs b/parquet-variant/src/builder/object.rs index 876c2e2d4c7c..670e3f7d768c 100644 --- a/parquet-variant/src/builder/object.rs +++ b/parquet-variant/src/builder/object.rs @@ -431,6 +431,7 @@ impl VariantBuilderExt for ObjectFieldBuilder<'_, '_, ' mod tests { use crate::{ ParentState, ValueBuilder, Variant, VariantBuilder, VariantMetadata, + WritableMetadataBuilder, builder::{metadata::ReadOnlyMetadataBuilder, object::ObjectBuilder}, decoder::VariantBasicType, }; @@ -501,11 +502,12 @@ mod tests { #[test] fn test_read_only_metadata_builder() { // First create some metadata with a few field names - let mut default_builder = VariantBuilder::new(); - default_builder.add_field_name("name"); - default_builder.add_field_name("age"); - default_builder.add_field_name("active"); - let (metadata_bytes, _) = default_builder.finish(); + let mut default_builder = WritableMetadataBuilder::default(); + default_builder.upsert_field_name("name"); + default_builder.upsert_field_name("age"); + default_builder.upsert_field_name("active"); + default_builder.finish(); + let metadata_bytes = default_builder.into_inner(); // Use the metadata to build new variant values let metadata = VariantMetadata::try_new(&metadata_bytes).unwrap(); @@ -899,7 +901,8 @@ mod tests { inner_list.finish(); outer_list.finish(); - // Valid object should succeed + // Valid object should succeed (fresh builder — one top-level value per VariantBuilder) + let mut builder = VariantBuilder::new().with_validate_unique_fields(true); let mut list = builder.new_list(); let mut valid_obj = list.new_object(); valid_obj.insert("m", 1); diff --git a/parquet-variant/tests/variant_interop.rs b/parquet-variant/tests/variant_interop.rs index 70b42e1f3c28..4c7c10d51a15 100644 --- a/parquet-variant/tests/variant_interop.rs +++ b/parquet-variant/tests/variant_interop.rs @@ -466,11 +466,12 @@ fn generate_random_value(rng: &mut StdRng, builder: &mut VariantBuilder, max_dep ) .unwrap(); - // timestamp w/o timezone - builder.append_value(data_time.naive_local()); - - // timestamp with timezone - builder.append_value(data_time.naive_utc().and_utc()); + // randomly pick timestamp with or without timezone + if rng.random_bool(0.5) { + builder.append_value(data_time.naive_local()); + } else { + builder.append_value(data_time.naive_utc().and_utc()); + } } 17 => { builder.append_value(Uuid::new_v4());