Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion arrow-array/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,13 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
"The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
)));
}
(DataType::FixedSizeBinary(num_bytes), 1) => *num_bytes as usize * u8::BITS as usize,
(DataType::FixedSizeBinary(num_bytes), 1) => {
TryInto::<usize>::try_into(*num_bytes).map_err(|_| {
ArrowError::InvalidArgumentError(format!(
"cannot determine bit_width for FixedSizeBinary({num_bytes})"
))
})? * u8::BITS as usize
}
(DataType::FixedSizeList(f, num_elems), 1) => {
let child_bit_width = bit_width(f.data_type(), 1)?;
child_bit_width * (*num_elems as usize)
Expand Down
30 changes: 29 additions & 1 deletion arrow-data/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
MutableBuffer::new(capacity * mem::size_of::<i64>()),
],
DataType::FixedSizeBinary(size) => {
if *size < 0 {
panic!("cannot construct buffers from FixedSizeBinary({size})");
}
[MutableBuffer::new(capacity * *size as usize), empty_buffer]
}
DataType::Dictionary(k, _) => [
Expand Down Expand Up @@ -634,6 +637,10 @@ impl ArrayData {
}

/// Returns a new [`ArrayData`] valid for `data_type` containing `len` null values
///
/// # Panics
/// This function panics if:
/// * the datatype `data_type` has incorrect layout
pub fn new_null(data_type: &DataType, len: usize) -> Self {
let bit_len = bit_util::ceil(len, 8);
let zeroed = |len: usize| Buffer::from(MutableBuffer::from_len_zeroed(len));
Expand All @@ -650,7 +657,12 @@ impl ArrayData {
DataType::LargeBinary | DataType::LargeUtf8 => {
(vec![zeroed((len + 1) * 8), zeroed(0)], vec![], true)
}
DataType::FixedSizeBinary(i) => (vec![zeroed(*i as usize * len)], vec![], true),
DataType::FixedSizeBinary(i) => {
if *i < 0 {
panic!("cannot construct null data from FixedSizeBinary({i})");
}
(vec![zeroed(*i as usize * len)], vec![], true)
}
DataType::List(f) | DataType::Map(f, _) => (
vec![zeroed((len + 1) * 4)],
vec![ArrayData::new_empty(f.data_type())],
Expand Down Expand Up @@ -2262,6 +2274,22 @@ impl From<ArrayData> for ArrayDataBuilder {
}
}

/// Get byte width of FixedSizeBinary size
/// # Panics:
/// - Panics if the `data_type` is not FixedSizeBinary
/// - Panics if byte width is negative
pub(crate) fn get_fixed_size_binary_width(data_type: &DataType) -> usize {
match data_type {
DataType::FixedSizeBinary(i) => {
if *i < 0 {
panic!("cannot compare FixedSizeBinary({})", *i);
}
*i as usize
}
_ => unreachable!(),
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
8 changes: 2 additions & 6 deletions arrow-data/src/equal/fixed_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@

use crate::bit_iterator::BitSliceIterator;
use crate::contains_nulls;
use crate::data::ArrayData;
use crate::data::{ArrayData, get_fixed_size_binary_width};
use crate::equal::primitive::NULL_SLICES_SELECTIVITY_THRESHOLD;
use arrow_schema::DataType;

use super::utils::equal_len;

Expand All @@ -30,10 +29,7 @@ pub(super) fn fixed_binary_equal(
rhs_start: usize,
len: usize,
) -> bool {
let size = match lhs.data_type() {
DataType::FixedSizeBinary(i) => *i as usize,
_ => unreachable!(),
};
let size = get_fixed_size_binary_width(lhs.data_type());

let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * size..];
let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * size..];
Expand Down
13 changes: 3 additions & 10 deletions arrow-data/src/transform/fixed_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,10 @@

use super::{_MutableArrayData, Extend};
use crate::ArrayData;
use arrow_schema::DataType;
use crate::data::get_fixed_size_binary_width;

pub(super) fn build_extend(array: &ArrayData) -> Extend<'_> {
let size = match array.data_type() {
DataType::FixedSizeBinary(i) => *i as usize,
_ => unreachable!(),
};

let size = get_fixed_size_binary_width(array.data_type());
let values = &array.buffers()[0].as_slice()[array.offset() * size..];
Box::new(
move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
Expand All @@ -35,10 +31,7 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend<'_> {
}

pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
let size = match mutable.data_type {
DataType::FixedSizeBinary(i) => i as usize,
_ => unreachable!(),
};
let size = get_fixed_size_binary_width(&mutable.data_type);

let values_buffer = &mut mutable.buffer1;
values_buffer.extend_zeros(len * size);
Expand Down
5 changes: 4 additions & 1 deletion arrow-json/src/reader/binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,10 @@ impl ArrayDecoder for FixedSizeBinaryArrayDecoder {
fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayRef, ArrowError> {
let mut builder = FixedSizeBinaryBuilder::with_capacity(pos.len(), self.len);
// Preallocate for the decoded byte width (FixedSizeBinary len), not the hex string length.
let mut scratch = Vec::with_capacity(self.len as usize);
let capacity: usize = self.len.try_into().map_err(|_| {
ArrowError::InvalidArgumentError(format!("Cannot convert size '{}' to usize", self.len))
})?;
let mut scratch = Vec::with_capacity(capacity);

for p in pos {
match tape.get(*p) {
Expand Down
6 changes: 6 additions & 0 deletions arrow-row/src/fixed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -475,13 +475,19 @@ where
}

/// Decodes a `FixedLengthBinary` from rows
///
/// # Panics:
/// Panics if `size` is negative
pub fn decode_fixed_size_binary(
rows: &mut [&[u8]],
size: i32,
options: SortOptions,
) -> FixedSizeBinaryArray {
let len = rows.len();

if size < 0 {
panic!("cannot decode FixedSizeBinary({size})");
}
let mut values = MutableBuffer::new(size as usize * rows.len());
let (null_count, nulls) = decode_nulls(rows);

Expand Down
13 changes: 11 additions & 2 deletions arrow-schema/src/datatype_parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,12 @@ impl<'a> Parser<'a> {
fn parse_fixed_size_binary(&mut self) -> ArrowResult<DataType> {
self.expect_token(Token::LParen)?;
let length = self.parse_i32("FixedSizeBinary")?;
if length < 0 {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is FixedSizeBinary(0) allowed? I think this check only verifies >= 0 🤔

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The format doesn't tell anything about it. The C++ implementation allows zero-width buffer. Fixed an error message

return Err(make_error(
self.val,
&format!("FixedSizeBinary length must be non-negative, got {length}"),
));
}
self.expect_token(Token::RParen)?;
Ok(DataType::FixedSizeBinary(length))
}
Expand Down Expand Up @@ -997,7 +1003,6 @@ mod test {
DataType::BinaryView,
DataType::FixedSizeBinary(0),
DataType::FixedSizeBinary(1234),
DataType::FixedSizeBinary(-432),
DataType::LargeBinary,
DataType::Utf8,
DataType::Utf8View,
Expand Down Expand Up @@ -1312,7 +1317,6 @@ mod test {
("BinaryView", BinaryView),
("FixedSizeBinary(0)", FixedSizeBinary(0)),
("FixedSizeBinary(1234)", FixedSizeBinary(1234)),
("FixedSizeBinary(-432)", FixedSizeBinary(-432)),
("LargeBinary", LargeBinary),
("Utf8", Utf8),
("Utf8View", Utf8View),
Expand Down Expand Up @@ -1437,6 +1441,11 @@ mod test {
"FixedSizeBinary(4000000000), ",
"Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted",
),
// can't have negative width
(
"FixedSizeBinary(-1), ",
"FixedSizeBinary length must be non-negative, got -1",
),
// can't have negative precision
(
"Decimal32(-3, 5)",
Expand Down
14 changes: 12 additions & 2 deletions arrow-string/src/concat_elements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,18 @@ pub fn concat_elements_fixed_size_binary(
)));
}

let left_size = left.value_length() as usize;
let right_size = right.value_length() as usize;
let left_size: usize = left.value_length().try_into().map_err(|_| {
ArrowError::InvalidArgumentError(format!(
"Invalid size of FixedSizeBinaryArray({})",
left.value_length()
))
})?;
let right_size: usize = right.value_length().try_into().map_err(|_| {
ArrowError::InvalidArgumentError(format!(
"Invalid size of FixedSizeBinaryArray({})",
right.value_length()
))
})?;
let output_size = left_size + right_size;

// Pre-compute combined null bitmap so the per-row NULL check is efficient
Expand Down
7 changes: 6 additions & 1 deletion arrow/src/util/data_gen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,12 @@ pub fn create_random_array(
)),
Binary => Arc::new(create_binary_array::<i32>(size, null_density)),
LargeBinary => Arc::new(create_binary_array::<i64>(size, null_density)),
FixedSizeBinary(len) => Arc::new(create_fsb_array(size, null_density, *len as usize)),
FixedSizeBinary(len) => {
let len = TryInto::<usize>::try_into(*len).map_err(|_| {
ArrowError::InvalidArgumentError(format!("cannot use FixedSizeBinary({len})"))
})?;
Arc::new(create_fsb_array(size, null_density, len))
}
BinaryView => Arc::new(
create_string_view_array_with_len(size, null_density, 4, false).to_binary_view(),
),
Expand Down
Loading