Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion arrow-array/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,13 @@ fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
"The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
)));
}
(DataType::FixedSizeBinary(num_bytes), 1) => *num_bytes as usize * u8::BITS as usize,
(DataType::FixedSizeBinary(num_bytes), 1) => {
TryInto::<usize>::try_into(*num_bytes).map_err(|_| {
ArrowError::InvalidArgumentError(format!(
"cannot determine bit_width for FixedSizeBinary({num_bytes})"
))
})? * u8::BITS as usize
}
(DataType::FixedSizeList(f, num_elems), 1) => {
let child_bit_width = bit_width(f.data_type(), 1)?;
child_bit_width * (*num_elems as usize)
Expand Down
30 changes: 29 additions & 1 deletion arrow-data/src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
MutableBuffer::new(capacity * mem::size_of::<i64>()),
],
DataType::FixedSizeBinary(size) => {
if *size < 0 {
panic!("cannot construct buffers from FixedSizeBinary({size})");
}
[MutableBuffer::new(capacity * *size as usize), empty_buffer]
}
DataType::Dictionary(k, _) => [
Expand Down Expand Up @@ -634,6 +637,10 @@ impl ArrayData {
}

/// Returns a new [`ArrayData`] valid for `data_type` containing `len` null values
///
/// # Panics
/// This function panics if:
/// * the datatype `data_type` has incorrect layout
pub fn new_null(data_type: &DataType, len: usize) -> Self {
let bit_len = bit_util::ceil(len, 8);
let zeroed = |len: usize| Buffer::from(MutableBuffer::from_len_zeroed(len));
Expand All @@ -650,7 +657,12 @@ impl ArrayData {
DataType::LargeBinary | DataType::LargeUtf8 => {
(vec![zeroed((len + 1) * 8), zeroed(0)], vec![], true)
}
DataType::FixedSizeBinary(i) => (vec![zeroed(*i as usize * len)], vec![], true),
DataType::FixedSizeBinary(i) => {
if *i < 0 {
panic!("cannot construct null data from FixedSizeBinary({i})");
}
(vec![zeroed(*i as usize * len)], vec![], true)
}
DataType::List(f) | DataType::Map(f, _) => (
vec![zeroed((len + 1) * 4)],
vec![ArrayData::new_empty(f.data_type())],
Expand Down Expand Up @@ -2262,6 +2274,22 @@ impl From<ArrayData> for ArrayDataBuilder {
}
}

/// Get byte width of FixedSizeBinary size
/// # Panics:
/// - Panics if the `data_type` is not FixedSizeBinary
/// - Panics if byte width is negative
pub(crate) fn get_fixed_size_binary_width(data_type: &DataType) -> usize {
match data_type {
DataType::FixedSizeBinary(i) => {
if *i < 0 {
panic!("cannot compare FixedSizeBinary({})", *i);
}
*i as usize
}
_ => unreachable!(),
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
8 changes: 2 additions & 6 deletions arrow-data/src/equal/fixed_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@

use crate::bit_iterator::BitSliceIterator;
use crate::contains_nulls;
use crate::data::ArrayData;
use crate::data::{ArrayData, get_fixed_size_binary_width};
use crate::equal::primitive::NULL_SLICES_SELECTIVITY_THRESHOLD;
use arrow_schema::DataType;

use super::utils::equal_len;

Expand All @@ -30,10 +29,7 @@ pub(super) fn fixed_binary_equal(
rhs_start: usize,
len: usize,
) -> bool {
let size = match lhs.data_type() {
DataType::FixedSizeBinary(i) => *i as usize,
_ => unreachable!(),
};
let size = get_fixed_size_binary_width(lhs.data_type());

let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * size..];
let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * size..];
Expand Down
13 changes: 3 additions & 10 deletions arrow-data/src/transform/fixed_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,10 @@

use super::{_MutableArrayData, Extend};
use crate::ArrayData;
use arrow_schema::DataType;
use crate::data::get_fixed_size_binary_width;

pub(super) fn build_extend(array: &ArrayData) -> Extend<'_> {
let size = match array.data_type() {
DataType::FixedSizeBinary(i) => *i as usize,
_ => unreachable!(),
};

let size = get_fixed_size_binary_width(array.data_type());
let values = &array.buffers()[0].as_slice()[array.offset() * size..];
Box::new(
move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
Expand All @@ -35,10 +31,7 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend<'_> {
}

pub(super) fn extend_nulls(mutable: &mut _MutableArrayData, len: usize) {
let size = match mutable.data_type {
DataType::FixedSizeBinary(i) => i as usize,
_ => unreachable!(),
};
let size = get_fixed_size_binary_width(&mutable.data_type);

let values_buffer = &mut mutable.buffer1;
values_buffer.extend_zeros(len * size);
Expand Down
5 changes: 4 additions & 1 deletion arrow-json/src/reader/binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,10 @@ impl ArrayDecoder for FixedSizeBinaryArrayDecoder {
fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayRef, ArrowError> {
let mut builder = FixedSizeBinaryBuilder::with_capacity(pos.len(), self.len);
// Preallocate for the decoded byte width (FixedSizeBinary len), not the hex string length.
let mut scratch = Vec::with_capacity(self.len as usize);
let capacity: usize = self.len.try_into().map_err(|_| {
ArrowError::InvalidArgumentError(format!("Cannot convert size '{}' to usize", self.len))
})?;
let mut scratch = Vec::with_capacity(capacity);

for p in pos {
match tape.get(*p) {
Expand Down
6 changes: 6 additions & 0 deletions arrow-row/src/fixed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -475,13 +475,19 @@ where
}

/// Decodes a `FixedLengthBinary` from rows
///
/// # Panics:
/// Panics if `size` is negative
pub fn decode_fixed_size_binary(
rows: &mut [&[u8]],
size: i32,
options: SortOptions,
) -> FixedSizeBinaryArray {
let len = rows.len();

if size < 0 {
panic!("cannot decode FixedSizeBinary({size})");
}
let mut values = MutableBuffer::new(size as usize * rows.len());
let (null_count, nulls) = decode_nulls(rows);

Expand Down
13 changes: 11 additions & 2 deletions arrow-schema/src/datatype_parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,12 @@ impl<'a> Parser<'a> {
fn parse_fixed_size_binary(&mut self) -> ArrowResult<DataType> {
self.expect_token(Token::LParen)?;
let length = self.parse_i32("FixedSizeBinary")?;
if length < 0 {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is FixedSizeBinary(0) allowed? I think this check only verifies >= 0 🤔

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The format doesn't tell anything about it. The C++ implementation allows zero-width buffer. Fixed an error message

return Err(make_error(
self.val,
&format!("FixedSizeBinary length must be positive, got {length}"),
));
}
self.expect_token(Token::RParen)?;
Ok(DataType::FixedSizeBinary(length))
}
Expand Down Expand Up @@ -997,7 +1003,6 @@ mod test {
DataType::BinaryView,
DataType::FixedSizeBinary(0),
DataType::FixedSizeBinary(1234),
DataType::FixedSizeBinary(-432),
DataType::LargeBinary,
DataType::Utf8,
DataType::Utf8View,
Expand Down Expand Up @@ -1312,7 +1317,6 @@ mod test {
("BinaryView", BinaryView),
("FixedSizeBinary(0)", FixedSizeBinary(0)),
("FixedSizeBinary(1234)", FixedSizeBinary(1234)),
("FixedSizeBinary(-432)", FixedSizeBinary(-432)),
("LargeBinary", LargeBinary),
("Utf8", Utf8),
("Utf8View", Utf8View),
Expand Down Expand Up @@ -1437,6 +1441,11 @@ mod test {
"FixedSizeBinary(4000000000), ",
"Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted",
),
// can't have negative width
(
"FixedSizeBinary(-1), ",
"FixedSizeBinary length must be positive, got -1",
),
// can't have negative precision
(
"Decimal32(-3, 5)",
Expand Down
14 changes: 12 additions & 2 deletions arrow-string/src/concat_elements.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,18 @@ pub fn concat_elements_fixed_size_binary(
)));
}

let left_size = left.value_length() as usize;
let right_size = right.value_length() as usize;
let left_size: usize = left.value_length().try_into().map_err(|_| {
ArrowError::InvalidArgumentError(format!(
"Invalid size of FixedSizeBinaryArray({})",
left.value_length()
))
})?;
let right_size: usize = right.value_length().try_into().map_err(|_| {
ArrowError::InvalidArgumentError(format!(
"Invalid size of FixedSizeBinaryArray({})",
right.value_length()
))
})?;
let output_size = left_size + right_size;

// Pre-compute combined null bitmap so the per-row NULL check is efficient
Expand Down
7 changes: 6 additions & 1 deletion arrow/src/util/data_gen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,12 @@ pub fn create_random_array(
)),
Binary => Arc::new(create_binary_array::<i32>(size, null_density)),
LargeBinary => Arc::new(create_binary_array::<i64>(size, null_density)),
FixedSizeBinary(len) => Arc::new(create_fsb_array(size, null_density, *len as usize)),
FixedSizeBinary(len) => {
let len = TryInto::<usize>::try_into(*len).map_err(|_| {
ArrowError::InvalidArgumentError(format!("cannot use FixedSizeBinary({len})"))
})?;
Arc::new(create_fsb_array(size, null_density, len))
}
BinaryView => Arc::new(
create_string_view_array_with_len(size, null_density, 4, false).to_binary_view(),
),
Expand Down
Loading