Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,7 @@ if(ARROW_COMPUTE)
compute/kernels/vector_rank.cc
compute/kernels/vector_replace.cc
compute/kernels/vector_run_end_encode.cc
compute/kernels/vector_search_sorted.cc
compute/kernels/vector_select_k.cc
compute/kernels/vector_sort.cc
compute/kernels/vector_statistics.cc
Expand Down
28 changes: 28 additions & 0 deletions cpp/src/arrow/compute/api_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ using compute::FilterOptions;
using compute::NullPlacement;
using compute::RankOptions;
using compute::RankQuantileOptions;
using compute::SearchSortedOptions;

template <>
struct EnumTraits<FilterOptions::NullSelectionBehavior>
Expand Down Expand Up @@ -96,6 +97,21 @@ struct EnumTraits<NullPlacement>
}
};
template <>
struct EnumTraits<SearchSortedOptions::Side>
: BasicEnumTraits<SearchSortedOptions::Side, SearchSortedOptions::Left,
SearchSortedOptions::Right> {
static std::string name() { return "SearchSortedOptions::Side"; }
static std::string value_name(SearchSortedOptions::Side value) {
switch (value) {
case SearchSortedOptions::Left:
return "Left";
case SearchSortedOptions::Right:
return "Right";
}
return "<INVALID>";
}
};
template <>
struct EnumTraits<RankOptions::Tiebreaker>
: BasicEnumTraits<RankOptions::Tiebreaker, RankOptions::Min, RankOptions::Max,
RankOptions::First, RankOptions::Dense> {
Expand Down Expand Up @@ -137,6 +153,8 @@ static auto kRunEndEncodeOptionsType = GetFunctionOptionsType<RunEndEncodeOption
static auto kArraySortOptionsType = GetFunctionOptionsType<ArraySortOptions>(
DataMember("order", &ArraySortOptions::order),
DataMember("null_placement", &ArraySortOptions::null_placement));
static auto kSearchSortedOptionsType = GetFunctionOptionsType<SearchSortedOptions>(
DataMember("side", &SearchSortedOptions::side));
static auto kSortOptionsType = GetFunctionOptionsType<SortOptions>(
DataMember("sort_keys", &SortOptions::sort_keys),
DataMember("null_placement", &SortOptions::null_placement));
Expand Down Expand Up @@ -196,6 +214,10 @@ ArraySortOptions::ArraySortOptions(SortOrder order, NullPlacement null_placement
null_placement(null_placement) {}
constexpr char ArraySortOptions::kTypeName[];

SearchSortedOptions::SearchSortedOptions(SearchSortedOptions::Side side)
: FunctionOptions(internal::kSearchSortedOptionsType), side(side) {}
constexpr char SearchSortedOptions::kTypeName[];

SortOptions::SortOptions(std::vector<SortKey> sort_keys, NullPlacement null_placement)
: FunctionOptions(internal::kSortOptionsType),
sort_keys(std::move(sort_keys)),
Expand Down Expand Up @@ -274,6 +296,7 @@ void RegisterVectorOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kDictionaryEncodeOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kRunEndEncodeOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kArraySortOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kSearchSortedOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kSortOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kSelectKOptionsType));
Expand Down Expand Up @@ -315,6 +338,11 @@ Result<std::shared_ptr<Array>> SelectKUnstable(const Datum& datum,
return result.make_array();
}

Result<Datum> SearchSorted(const Datum& values, const Datum& needles,
const SearchSortedOptions& options, ExecContext* ctx) {
return CallFunction("search_sorted", {values, needles}, &options, ctx);
}

Result<Datum> ReplaceWithMask(const Datum& values, const Datum& mask,
const Datum& replacements, ExecContext* ctx) {
return CallFunction("replace_with_mask", {values, mask, replacements}, ctx);
Expand Down
36 changes: 36 additions & 0 deletions cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,21 @@ class ARROW_EXPORT ArraySortOptions : public FunctionOptions {
NullPlacement null_placement;
};

class ARROW_EXPORT SearchSortedOptions : public FunctionOptions {
public:
enum Side {
Left,
Right,
};

explicit SearchSortedOptions(Side side = Side::Left);
static constexpr const char kTypeName[] = "SearchSortedOptions";
static SearchSortedOptions Defaults() { return SearchSortedOptions(); }

/// Whether to return the leftmost or rightmost insertion point.
Side side;
};

class ARROW_EXPORT SortOptions : public FunctionOptions {
public:
explicit SortOptions(std::vector<SortKey> sort_keys = {},
Expand Down Expand Up @@ -515,6 +530,27 @@ Result<std::shared_ptr<Array>> SelectKUnstable(const Datum& datum,
const SelectKOptions& options,
ExecContext* ctx = NULLPTR);

/// \brief Find insertion indices that preserve sorted order.
///
/// The `values` datum must be a plain array or run-end encoded array sorted in
/// ascending order. `needles` may be a scalar, plain array, or run-end encoded
/// array whose logical value type matches `values`.
///
/// Nulls in `values` are supported when clustered entirely at the start or the
/// end of the sorted array. Non-null needles are matched only against the
/// non-null portion of `values`. Null needles yield null outputs.
///
/// \param[in] values sorted array to search within
/// \param[in] needles scalar or array-like values to search for
/// \param[in] options selects left or right insertion semantics
/// \param[in] ctx the function execution context, optional
/// \return insertion indices as uint64 scalar or array
ARROW_EXPORT
Result<Datum> SearchSorted(
const Datum& values, const Datum& needles,
const SearchSortedOptions& options = SearchSortedOptions::Defaults(),
ExecContext* ctx = NULLPTR);

/// \brief Return the indices that would sort an array.
///
/// Perform an indirect sort of array. The output array will contain
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/initialize.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Status RegisterComputeKernels() {
internal::RegisterVectorNested(registry);
internal::RegisterVectorRank(registry);
internal::RegisterVectorReplace(registry);
internal::RegisterVectorSearchSorted(registry);
internal::RegisterVectorSelectK(registry);
internal::RegisterVectorSort(registry);
internal::RegisterVectorRunEndEncode(registry);
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/compute/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,13 @@ add_arrow_compute_test(vector_sort_test
arrow_compute_kernels_testing
arrow_compute_testing)

add_arrow_compute_test(vector_search_sorted_test
SOURCES
vector_search_sorted_test.cc
EXTRA_LINK_LIBS
arrow_compute_kernels_testing
arrow_compute_testing)

add_arrow_compute_test(vector_selection_test
SOURCES
vector_selection_test.cc
Expand All @@ -141,6 +148,7 @@ add_arrow_compute_benchmark(vector_sort_benchmark)
add_arrow_compute_benchmark(vector_partition_benchmark)
add_arrow_compute_benchmark(vector_topk_benchmark)
add_arrow_compute_benchmark(vector_replace_benchmark)
add_arrow_compute_benchmark(vector_search_sorted_benchmark)
add_arrow_compute_benchmark(vector_selection_benchmark)

# ----------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/kernels/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ vector_kernel_benchmarks = [
'vector_partition_benchmark',
'vector_topk_benchmark',
'vector_replace_benchmark',
'vector_search_sorted_benchmark',
'vector_selection_benchmark',
]

Expand Down
Loading
Loading