diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index 59b9f6b3b7b5..110dfc9855bb 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -162,7 +162,13 @@ fn bit_width(data_type: &DataType, i: usize) -> Result { "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented." ))); } - (DataType::FixedSizeBinary(num_bytes), 1) => *num_bytes as usize * u8::BITS as usize, + (DataType::FixedSizeBinary(num_bytes), 1) => { + TryInto::::try_into(*num_bytes).map_err(|_| { + ArrowError::InvalidArgumentError(format!( + "cannot determine bit_width for FixedSizeBinary({num_bytes})" + )) + })? * u8::BITS as usize + } (DataType::FixedSizeList(f, num_elems), 1) => { let child_bit_width = bit_width(f.data_type(), 1)?; child_bit_width * (*num_elems as usize) diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index 34d5cb6d3aba..7b208b7af0be 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -134,6 +134,9 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff MutableBuffer::new(capacity * mem::size_of::()), ], DataType::FixedSizeBinary(size) => { + if *size < 0 { + panic!("cannot construct buffers from FixedSizeBinary({size})"); + } [MutableBuffer::new(capacity * *size as usize), empty_buffer] } DataType::Dictionary(k, _) => [ @@ -634,6 +637,10 @@ impl ArrayData { } /// Returns a new [`ArrayData`] valid for `data_type` containing `len` null values + /// + /// # Panics + /// This function panics if: + /// * the datatype `data_type` has incorrect layout pub fn new_null(data_type: &DataType, len: usize) -> Self { let bit_len = bit_util::ceil(len, 8); let zeroed = |len: usize| Buffer::from(MutableBuffer::from_len_zeroed(len)); @@ -650,7 +657,12 @@ impl ArrayData { DataType::LargeBinary | DataType::LargeUtf8 => { (vec![zeroed((len + 1) * 8), zeroed(0)], vec![], true) } - DataType::FixedSizeBinary(i) => (vec![zeroed(*i as usize * len)], vec![], true), + DataType::FixedSizeBinary(i) => { + if *i < 0 { + panic!("cannot construct null data from FixedSizeBinary({i})"); + } + (vec![zeroed(*i as usize * len)], vec![], true) + } DataType::List(f) | DataType::Map(f, _) => ( vec![zeroed((len + 1) * 4)], vec![ArrayData::new_empty(f.data_type())], @@ -2262,6 +2274,22 @@ impl From for ArrayDataBuilder { } } +/// Get byte width of FixedSizeBinary size +/// # Panics: +/// - Panics if the `data_type` is not FixedSizeBinary +/// - Panics if byte width is negative +pub(crate) fn get_fixed_size_binary_width(data_type: &DataType) -> usize { + match data_type { + DataType::FixedSizeBinary(i) => { + if *i < 0 { + panic!("cannot compare FixedSizeBinary({})", *i); + } + *i as usize + } + _ => unreachable!(), + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/arrow-data/src/equal/fixed_binary.rs b/arrow-data/src/equal/fixed_binary.rs index 0778d77e2fdd..8d29a2684db8 100644 --- a/arrow-data/src/equal/fixed_binary.rs +++ b/arrow-data/src/equal/fixed_binary.rs @@ -17,9 +17,8 @@ use crate::bit_iterator::BitSliceIterator; use crate::contains_nulls; -use crate::data::ArrayData; +use crate::data::{ArrayData, get_fixed_size_binary_width}; use crate::equal::primitive::NULL_SLICES_SELECTIVITY_THRESHOLD; -use arrow_schema::DataType; use super::utils::equal_len; @@ -30,10 +29,7 @@ pub(super) fn fixed_binary_equal( rhs_start: usize, len: usize, ) -> bool { - let size = match lhs.data_type() { - DataType::FixedSizeBinary(i) => *i as usize, - _ => unreachable!(), - }; + let size = get_fixed_size_binary_width(lhs.data_type()); let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * size..]; let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * size..]; diff --git a/arrow-data/src/transform/fixed_binary.rs b/arrow-data/src/transform/fixed_binary.rs index 626ecbee0261..7f4bde3de1dd 100644 --- a/arrow-data/src/transform/fixed_binary.rs +++ b/arrow-data/src/transform/fixed_binary.rs @@ -17,14 +17,10 @@ use super::{_MutableArrayData, Extend}; use crate::ArrayData; -use arrow_schema::DataType; +use crate::data::get_fixed_size_binary_width; pub(super) fn build_extend(array: &ArrayData) -> Extend<'_> { - let size = match array.data_type() { - DataType::FixedSizeBinary(i) => *i as usize, - _ => unreachable!(), - }; - + let size = get_fixed_size_binary_width(array.data_type()); let values = &array.buffers()[0].as_slice()[array.offset() * size..]; Box::new( move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| { @@ -35,10 +31,7 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend<'_> { } pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) { - let size = match mutable.data_type { - DataType::FixedSizeBinary(i) => i as usize, - _ => unreachable!(), - }; + let size = get_fixed_size_binary_width(&mutable.data_type); let values_buffer = &mut mutable.buffer1; values_buffer.extend_zeros(len * size); diff --git a/arrow-json/src/reader/binary_array.rs b/arrow-json/src/reader/binary_array.rs index b1b736e83895..7f0d5219e290 100644 --- a/arrow-json/src/reader/binary_array.rs +++ b/arrow-json/src/reader/binary_array.rs @@ -133,7 +133,10 @@ impl ArrayDecoder for FixedSizeBinaryArrayDecoder { fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result { let mut builder = FixedSizeBinaryBuilder::with_capacity(pos.len(), self.len); // Preallocate for the decoded byte width (FixedSizeBinary len), not the hex string length. - let mut scratch = Vec::with_capacity(self.len as usize); + let capacity: usize = self.len.try_into().map_err(|_| { + ArrowError::InvalidArgumentError(format!("Cannot convert size '{}' to usize", self.len)) + })?; + let mut scratch = Vec::with_capacity(capacity); for p in pos { match tape.get(*p) { diff --git a/arrow-row/src/fixed.rs b/arrow-row/src/fixed.rs index 493e674018ab..422c0c1a32c8 100644 --- a/arrow-row/src/fixed.rs +++ b/arrow-row/src/fixed.rs @@ -475,6 +475,9 @@ where } /// Decodes a `FixedLengthBinary` from rows +/// +/// # Panics: +/// Panics if `size` is negative pub fn decode_fixed_size_binary( rows: &mut [&[u8]], size: i32, @@ -482,6 +485,9 @@ pub fn decode_fixed_size_binary( ) -> FixedSizeBinaryArray { let len = rows.len(); + if size < 0 { + panic!("cannot decode FixedSizeBinary({size})"); + } let mut values = MutableBuffer::new(size as usize * rows.len()); let (null_count, nulls) = decode_nulls(rows); diff --git a/arrow-schema/src/datatype_parse.rs b/arrow-schema/src/datatype_parse.rs index 9349635151aa..721bbda11a09 100644 --- a/arrow-schema/src/datatype_parse.rs +++ b/arrow-schema/src/datatype_parse.rs @@ -375,6 +375,12 @@ impl<'a> Parser<'a> { fn parse_fixed_size_binary(&mut self) -> ArrowResult { self.expect_token(Token::LParen)?; let length = self.parse_i32("FixedSizeBinary")?; + if length < 0 { + return Err(make_error( + self.val, + &format!("FixedSizeBinary length must be non-negative, got {length}"), + )); + } self.expect_token(Token::RParen)?; Ok(DataType::FixedSizeBinary(length)) } @@ -997,7 +1003,6 @@ mod test { DataType::BinaryView, DataType::FixedSizeBinary(0), DataType::FixedSizeBinary(1234), - DataType::FixedSizeBinary(-432), DataType::LargeBinary, DataType::Utf8, DataType::Utf8View, @@ -1312,7 +1317,6 @@ mod test { ("BinaryView", BinaryView), ("FixedSizeBinary(0)", FixedSizeBinary(0)), ("FixedSizeBinary(1234)", FixedSizeBinary(1234)), - ("FixedSizeBinary(-432)", FixedSizeBinary(-432)), ("LargeBinary", LargeBinary), ("Utf8", Utf8), ("Utf8View", Utf8View), @@ -1437,6 +1441,11 @@ mod test { "FixedSizeBinary(4000000000), ", "Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted", ), + // can't have negative width + ( + "FixedSizeBinary(-1), ", + "FixedSizeBinary length must be non-negative, got -1", + ), // can't have negative precision ( "Decimal32(-3, 5)", diff --git a/arrow-string/src/concat_elements.rs b/arrow-string/src/concat_elements.rs index cd4676d28752..ee0dc3539983 100644 --- a/arrow-string/src/concat_elements.rs +++ b/arrow-string/src/concat_elements.rs @@ -188,8 +188,18 @@ pub fn concat_elements_fixed_size_binary( ))); } - let left_size = left.value_length() as usize; - let right_size = right.value_length() as usize; + let left_size: usize = left.value_length().try_into().map_err(|_| { + ArrowError::InvalidArgumentError(format!( + "Invalid size of FixedSizeBinaryArray({})", + left.value_length() + )) + })?; + let right_size: usize = right.value_length().try_into().map_err(|_| { + ArrowError::InvalidArgumentError(format!( + "Invalid size of FixedSizeBinaryArray({})", + right.value_length() + )) + })?; let output_size = left_size + right_size; // Pre-compute combined null bitmap so the per-row NULL check is efficient diff --git a/arrow/src/util/data_gen.rs b/arrow/src/util/data_gen.rs index a5a0647aa877..481fb9e3a42d 100644 --- a/arrow/src/util/data_gen.rs +++ b/arrow/src/util/data_gen.rs @@ -159,7 +159,12 @@ pub fn create_random_array( )), Binary => Arc::new(create_binary_array::(size, null_density)), LargeBinary => Arc::new(create_binary_array::(size, null_density)), - FixedSizeBinary(len) => Arc::new(create_fsb_array(size, null_density, *len as usize)), + FixedSizeBinary(len) => { + let len = TryInto::::try_into(*len).map_err(|_| { + ArrowError::InvalidArgumentError(format!("cannot use FixedSizeBinary({len})")) + })?; + Arc::new(create_fsb_array(size, null_density, len)) + } BinaryView => Arc::new( create_string_view_array_with_len(size, null_density, 4, false).to_binary_view(), ),