Skip to content
Open
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 104 additions & 18 deletions arrow-select/src/interleave.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ use arrow_array::builder::{BooleanBufferBuilder, PrimitiveBuilder};
use arrow_array::cast::AsArray;
use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::bit_mask::set_bits;
use arrow_buffer::bit_util;
use arrow_buffer::{ArrowNativeType, BooleanBuffer, MutableBuffer, NullBuffer, OffsetBuffer};
use arrow_data::ByteView;
use arrow_data::transform::MutableArrayData;
Expand Down Expand Up @@ -373,13 +375,85 @@ fn interleave_struct(
Ok(Arc::new(struct_array))
}

/// Specialized interleave for list child arrays that are primitive.
/// Directly copies typed value slices and null bit ranges without
/// going through MutableArrayData's function pointer indirection.
fn interleave_list_primitive_child<O: OffsetSizeTrait, T: ArrowPrimitiveType>(
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used to uses MutableArrayData, but it's about 15% slower than this implementation.

interleaved: &Interleave<'_, GenericListArray<O>>,
indices: &[(usize, usize)],
capacity: usize,
) -> ArrayRef {
let child_arrays: Vec<&PrimitiveArray<T>> = interleaved
.arrays
.iter()
.map(|list| list.values().as_primitive::<T>())
.collect();

let has_child_nulls = child_arrays.iter().any(|a| a.null_count() > 0);

// Build values buffer by copying contiguous slices
let mut values: Vec<T::Native> = Vec::with_capacity(capacity);
for &(array, row) in indices {
let o = interleaved.arrays[array].value_offsets();
let start = o[row].as_usize();
let end = o[row + 1].as_usize();
if end > start {
values.extend_from_slice(&child_arrays[array].values()[start..end]);
}
}

// Build null buffer. Pre-allocate with 0x00 (all null), then:
// - Sources with nulls: set_bits ORs in valid bits from source.
Comment thread
mapleFU marked this conversation as resolved.
Outdated
// - Sources without nulls: set the bit range to all 1s directly.
let nulls = if has_child_nulls {
let null_byte_len = bit_util::ceil(capacity, 8);
let mut null_buf = MutableBuffer::new(null_byte_len);
Comment thread
mapleFU marked this conversation as resolved.
Outdated
null_buf.resize(null_byte_len, 0);
Comment thread
mapleFU marked this conversation as resolved.
Outdated

let mut offset_write = 0;
for &(array, row) in indices {
let o = interleaved.arrays[array].value_offsets();
let start = o[row].as_usize();
let end = o[row + 1].as_usize();
let len = end - start;
if len > 0 {
match child_arrays[array].nulls() {
Some(null_buffer) => {
set_bits(
null_buf.as_slice_mut(),
null_buffer.validity(),
offset_write,
null_buffer.offset() + start,
len,
);
}
None => {
// Slow path. For a non-nullable source, set the bit range to all 1s directly.
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is merely happens so uses slow path

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And I don't find set_bits function, though it's easy to add, I just think we can add it separately.

let buf = null_buf.as_slice_mut();
(offset_write..offset_write + len).for_each(|i| bit_util::set_bit(buf, i));
}
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know whether set_bits works well for 0xFF sequence...

}
}
offset_write += len;
}

let bool_buf = BooleanBuffer::new(null_buf.into(), 0, capacity);
Some(NullBuffer::new(bool_buf))
Comment thread
mapleFU marked this conversation as resolved.
Outdated
} else {
None
};

Arc::new(PrimitiveArray::<T>::new(values.into(), nulls))
}
Comment thread
mapleFU marked this conversation as resolved.
Outdated

fn interleave_list<O: OffsetSizeTrait>(
values: &[&dyn Array],
indices: &[(usize, usize)],
field: &FieldRef,
) -> Result<ArrayRef, ArrowError> {
let interleaved = Interleave::<'_, GenericListArray<O>>::new(values, indices);

// Step 1: compute output offsets and total child capacity
let mut capacity = 0usize;
let mut offsets = Vec::with_capacity(indices.len() + 1);
offsets.push(O::from_usize(0).unwrap());
Expand All @@ -392,29 +466,41 @@ fn interleave_list<O: OffsetSizeTrait>(
);
}

let mut child_indices = Vec::with_capacity(capacity);
for (array, row) in indices {
let list = interleaved.arrays[*array];
let start = list.value_offsets()[*row].as_usize();
let end = list.value_offsets()[*row + 1].as_usize();
child_indices.extend((start..end).map(|i| (*array, i)));
// Step 2: build child values.
macro_rules! list_primitive_helper {
($t:ty) => {
interleave_list_primitive_child::<O, $t>(&interleaved, indices, capacity)
};
}

let child_arrays: Vec<&dyn Array> = interleaved
.arrays
.iter()
.map(|list| list.values().as_ref())
.collect();
let child_values = downcast_primitive! {
// For primitive child types, directly copy typed value slices and null bit
// ranges, avoiding both the intermediate child_indices Vec allocation and
// MutableArrayData's function pointer indirection.
field.data_type() => (list_primitive_helper),
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is just for type which could be copied fastly, for List<List<...>>, still we need some optimizations

_ => {
// For complex child types (nested lists, structs, views, dictionaries, etc.),
// use recursive interleave to benefit from type-specific optimizations.
let mut child_indices = Vec::with_capacity(capacity);
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This keeps the previous code

for (array, row) in indices {
let list = interleaved.arrays[*array];
let start = list.value_offsets()[*row].as_usize();
let end = list.value_offsets()[*row + 1].as_usize();
child_indices.extend((start..end).map(|i| (*array, i)));
}

let interleaved_values = interleave(&child_arrays, &child_indices)?;
let child_arrays: Vec<&dyn Array> = interleaved
.arrays
.iter()
.map(|list| list.values().as_ref())
.collect();
interleave(&child_arrays, &child_indices)?
}
};

let offsets = OffsetBuffer::new(offsets.into());
let list_array = GenericListArray::<O>::new(
field.clone(),
offsets,
interleaved_values,
interleaved.nulls,
);
let list_array =
GenericListArray::<O>::new(field.clone(), offsets, child_values, interleaved.nulls);

Ok(Arc::new(list_array))
}
Expand Down
Loading