-
Notifications
You must be signed in to change notification settings - Fork 1.2k
perf(interleave): Optimize list interleave_list when child is primitive #10025
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
64ab2be
e98e758
99761a0
161d29f
f08e703
270524b
03a5c15
63c76e4
2a619c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,8 @@ use arrow_array::builder::{BooleanBufferBuilder, PrimitiveBuilder}; | |
| use arrow_array::cast::AsArray; | ||
| use arrow_array::types::*; | ||
| use arrow_array::*; | ||
| use arrow_buffer::bit_mask::set_bits; | ||
| use arrow_buffer::bit_util; | ||
| use arrow_buffer::{ArrowNativeType, BooleanBuffer, MutableBuffer, NullBuffer, OffsetBuffer}; | ||
| use arrow_data::ByteView; | ||
| use arrow_data::transform::MutableArrayData; | ||
|
|
@@ -373,13 +375,85 @@ fn interleave_struct( | |
| Ok(Arc::new(struct_array)) | ||
| } | ||
|
|
||
| /// Specialized interleave for list child arrays that are primitive. | ||
| /// Directly copies typed value slices and null bit ranges without | ||
| /// going through MutableArrayData's function pointer indirection. | ||
| fn interleave_list_primitive_child<O: OffsetSizeTrait, T: ArrowPrimitiveType>( | ||
| interleaved: &Interleave<'_, GenericListArray<O>>, | ||
| indices: &[(usize, usize)], | ||
| capacity: usize, | ||
| ) -> ArrayRef { | ||
| let child_arrays: Vec<&PrimitiveArray<T>> = interleaved | ||
| .arrays | ||
| .iter() | ||
| .map(|list| list.values().as_primitive::<T>()) | ||
| .collect(); | ||
|
|
||
| let has_child_nulls = child_arrays.iter().any(|a| a.null_count() > 0); | ||
|
|
||
| // Build values buffer by copying contiguous slices | ||
| let mut values: Vec<T::Native> = Vec::with_capacity(capacity); | ||
| for &(array, row) in indices { | ||
| let o = interleaved.arrays[array].value_offsets(); | ||
| let start = o[row].as_usize(); | ||
| let end = o[row + 1].as_usize(); | ||
| if end > start { | ||
| values.extend_from_slice(&child_arrays[array].values()[start..end]); | ||
| } | ||
| } | ||
|
|
||
| // Build null buffer. Pre-allocate with 0x00 (all null), then: | ||
| // - Sources with nulls: set_bits ORs in valid bits from source. | ||
|
mapleFU marked this conversation as resolved.
Outdated
|
||
| // - Sources without nulls: set the bit range to all 1s directly. | ||
| let nulls = if has_child_nulls { | ||
| let null_byte_len = bit_util::ceil(capacity, 8); | ||
| let mut null_buf = MutableBuffer::new(null_byte_len); | ||
|
mapleFU marked this conversation as resolved.
Outdated
|
||
| null_buf.resize(null_byte_len, 0); | ||
|
mapleFU marked this conversation as resolved.
Outdated
|
||
|
|
||
| let mut offset_write = 0; | ||
| for &(array, row) in indices { | ||
| let o = interleaved.arrays[array].value_offsets(); | ||
| let start = o[row].as_usize(); | ||
| let end = o[row + 1].as_usize(); | ||
| let len = end - start; | ||
| if len > 0 { | ||
| match child_arrays[array].nulls() { | ||
| Some(null_buffer) => { | ||
| set_bits( | ||
| null_buf.as_slice_mut(), | ||
| null_buffer.validity(), | ||
| offset_write, | ||
| null_buffer.offset() + start, | ||
| len, | ||
| ); | ||
| } | ||
| None => { | ||
| // Slow path. For a non-nullable source, set the bit range to all 1s directly. | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is merely happens so uses slow path
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And I don't find |
||
| let buf = null_buf.as_slice_mut(); | ||
| (offset_write..offset_write + len).for_each(|i| bit_util::set_bit(buf, i)); | ||
| } | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know whether set_bits works well for 0xFF sequence... |
||
| } | ||
| } | ||
| offset_write += len; | ||
| } | ||
|
|
||
| let bool_buf = BooleanBuffer::new(null_buf.into(), 0, capacity); | ||
| Some(NullBuffer::new(bool_buf)) | ||
|
mapleFU marked this conversation as resolved.
Outdated
|
||
| } else { | ||
| None | ||
| }; | ||
|
|
||
| Arc::new(PrimitiveArray::<T>::new(values.into(), nulls)) | ||
| } | ||
|
mapleFU marked this conversation as resolved.
Outdated
|
||
|
|
||
| fn interleave_list<O: OffsetSizeTrait>( | ||
| values: &[&dyn Array], | ||
| indices: &[(usize, usize)], | ||
| field: &FieldRef, | ||
| ) -> Result<ArrayRef, ArrowError> { | ||
| let interleaved = Interleave::<'_, GenericListArray<O>>::new(values, indices); | ||
|
|
||
| // Step 1: compute output offsets and total child capacity | ||
| let mut capacity = 0usize; | ||
| let mut offsets = Vec::with_capacity(indices.len() + 1); | ||
| offsets.push(O::from_usize(0).unwrap()); | ||
|
|
@@ -392,29 +466,41 @@ fn interleave_list<O: OffsetSizeTrait>( | |
| ); | ||
| } | ||
|
|
||
| let mut child_indices = Vec::with_capacity(capacity); | ||
| for (array, row) in indices { | ||
| let list = interleaved.arrays[*array]; | ||
| let start = list.value_offsets()[*row].as_usize(); | ||
| let end = list.value_offsets()[*row + 1].as_usize(); | ||
| child_indices.extend((start..end).map(|i| (*array, i))); | ||
| // Step 2: build child values. | ||
| macro_rules! list_primitive_helper { | ||
| ($t:ty) => { | ||
| interleave_list_primitive_child::<O, $t>(&interleaved, indices, capacity) | ||
| }; | ||
| } | ||
|
|
||
| let child_arrays: Vec<&dyn Array> = interleaved | ||
| .arrays | ||
| .iter() | ||
| .map(|list| list.values().as_ref()) | ||
| .collect(); | ||
| let child_values = downcast_primitive! { | ||
| // For primitive child types, directly copy typed value slices and null bit | ||
| // ranges, avoiding both the intermediate child_indices Vec allocation and | ||
| // MutableArrayData's function pointer indirection. | ||
| field.data_type() => (list_primitive_helper), | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is just for type which could be copied fastly, for |
||
| _ => { | ||
| // For complex child types (nested lists, structs, views, dictionaries, etc.), | ||
| // use recursive interleave to benefit from type-specific optimizations. | ||
| let mut child_indices = Vec::with_capacity(capacity); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This keeps the previous code |
||
| for (array, row) in indices { | ||
| let list = interleaved.arrays[*array]; | ||
| let start = list.value_offsets()[*row].as_usize(); | ||
| let end = list.value_offsets()[*row + 1].as_usize(); | ||
| child_indices.extend((start..end).map(|i| (*array, i))); | ||
| } | ||
|
|
||
| let interleaved_values = interleave(&child_arrays, &child_indices)?; | ||
| let child_arrays: Vec<&dyn Array> = interleaved | ||
| .arrays | ||
| .iter() | ||
| .map(|list| list.values().as_ref()) | ||
| .collect(); | ||
| interleave(&child_arrays, &child_indices)? | ||
| } | ||
| }; | ||
|
|
||
| let offsets = OffsetBuffer::new(offsets.into()); | ||
| let list_array = GenericListArray::<O>::new( | ||
| field.clone(), | ||
| offsets, | ||
| interleaved_values, | ||
| interleaved.nulls, | ||
| ); | ||
| let list_array = | ||
| GenericListArray::<O>::new(field.clone(), offsets, child_values, interleaved.nulls); | ||
|
|
||
| Ok(Arc::new(list_array)) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I used to uses
MutableArrayData, but it's about 15% slower than this implementation.