diff --git a/include/oneapi/dpl/experimental/kt/esimd_radix_sort.h b/include/oneapi/dpl/experimental/kt/esimd_radix_sort.h index 7abeac090e5..5e05fa1a687 100644 --- a/include/oneapi/dpl/experimental/kt/esimd_radix_sort.h +++ b/include/oneapi/dpl/experimental/kt/esimd_radix_sort.h @@ -45,7 +45,7 @@ radix_sort(sycl::queue __q, _KeysIterator __keys_first, _KeysIterator __keys_las if (__keys_last - __keys_first < 2) return {}; - auto __keys_keep = oneapi::dpl::__ranges::__get_sycl_range(); + auto __keys_keep = oneapi::dpl::__ranges::__get_sycl_range(); auto __keys_rng = __keys_keep(__keys_first, __keys_last).all_view(); auto __pack = __impl::__rng_pack{::std::move(__keys_rng)}; return __impl::__radix_sort<__is_ascending, __radix_bits, /*__in_place=*/true>(__q, __pack, __pack, __param); @@ -77,10 +77,10 @@ radix_sort_by_key(sycl::queue __q, _KeysIterator __keys_first, _KeysIterator __k if (__keys_last - __keys_first < 2) return {}; - auto __keys_keep = oneapi::dpl::__ranges::__get_sycl_range(); + auto __keys_keep = oneapi::dpl::__ranges::__get_sycl_range(); auto __keys_rng = __keys_keep(__keys_first, __keys_last).all_view(); - auto __vals_keep = oneapi::dpl::__ranges::__get_sycl_range(); + auto __vals_keep = oneapi::dpl::__ranges::__get_sycl_range(); auto __vals_rng = __vals_keep(__vals_first, __vals_first + (__keys_last - __keys_first)).all_view(); auto __pack = __impl::__rng_pack{::std::move(__keys_rng), ::std::move(__vals_rng)}; return __impl::__radix_sort<__is_ascending, __radix_bits, /*__in_place=*/true>(__q, __pack, __pack, __param); @@ -113,10 +113,10 @@ radix_sort(sycl::queue __q, _KeysIterator1 __keys_first, _KeysIterator1 __keys_l if (__n == 0) return {}; - auto __keys_keep = oneapi::dpl::__ranges::__get_sycl_range(); + auto __keys_keep = oneapi::dpl::__ranges::__get_sycl_range(); auto __keys_rng = __keys_keep(__keys_first, __keys_last).all_view(); auto __pack = __impl::__rng_pack{::std::move(__keys_rng)}; - auto __keys_out_keep = oneapi::dpl::__ranges::__get_sycl_range(); + auto __keys_out_keep = oneapi::dpl::__ranges::__get_sycl_range(); auto __keys_out_rng = __keys_out_keep(__keys_out_first, __keys_out_first + __n).all_view(); auto __pack_out = __impl::__rng_pack{::std::move(__keys_out_rng)}; return __impl::__radix_sort<__is_ascending, __radix_bits, /*__in_place=*/false>(__q, ::std::move(__pack), @@ -153,17 +153,17 @@ radix_sort_by_key(sycl::queue __q, _KeysIterator1 __keys_first, _KeysIterator1 _ if (__n == 0) return {}; - auto __keys_keep = oneapi::dpl::__ranges::__get_sycl_range(); + auto __keys_keep = oneapi::dpl::__ranges::__get_sycl_range(); auto __keys_rng = __keys_keep(__keys_first, __keys_last).all_view(); - auto __vals_keep = oneapi::dpl::__ranges::__get_sycl_range(); + auto __vals_keep = oneapi::dpl::__ranges::__get_sycl_range(); auto __vals_rng = __vals_keep(__vals_first, __vals_first + __n).all_view(); auto __pack = __impl::__rng_pack{::std::move(__keys_rng), ::std::move(__vals_rng)}; - auto __keys_out_keep = oneapi::dpl::__ranges::__get_sycl_range(); + auto __keys_out_keep = oneapi::dpl::__ranges::__get_sycl_range(); auto __keys_out_rng = __keys_out_keep(__keys_out_first, __keys_out_first + __n).all_view(); - auto __vals_out_keep = oneapi::dpl::__ranges::__get_sycl_range(); + auto __vals_out_keep = oneapi::dpl::__ranges::__get_sycl_range(); auto __vals_out_rng = __vals_out_keep(__vals_out_first, __vals_out_first + __n).all_view(); auto __pack_out = __impl::__rng_pack{::std::move(__keys_out_rng), ::std::move(__vals_out_rng)}; return __impl::__radix_sort<__is_ascending, __radix_bits, /*__in_place=*/false>(__q, ::std::move(__pack), diff --git a/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_dispatchers.h b/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_dispatchers.h index 9a1b8ef2740..15a41bc781b 100644 --- a/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_dispatchers.h +++ b/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_dispatchers.h @@ -328,12 +328,12 @@ __onesweep(sycl::queue __q, _RngPack1&& __pack, _RngPack2&& __pack_out, ::std::s __mem_holder.__allocate(); auto __get_tmp_pack = [&]() { - auto __keys_tmp_keep = oneapi::dpl::__ranges::__get_sycl_range(); + auto __keys_tmp_keep = oneapi::dpl::__ranges::__get_sycl_range(); auto __keys_tmp_rng = __keys_tmp_keep(__mem_holder.__keys_ptr(), __mem_holder.__keys_ptr() + __n).all_view(); if constexpr (__has_values) { - auto __vals_tmp_keep = oneapi::dpl::__ranges::__get_sycl_range(); + auto __vals_tmp_keep = oneapi::dpl::__ranges::__get_sycl_range(); auto __vals_tmp_rng = __vals_tmp_keep(__mem_holder.__vals_ptr(), __mem_holder.__vals_ptr() + __n).all_view(); return __rng_pack(::std::move(__keys_tmp_rng), ::std::move(__vals_tmp_rng)); diff --git a/include/oneapi/dpl/experimental/kt/single_pass_scan.h b/include/oneapi/dpl/experimental/kt/single_pass_scan.h index 7a9e16f76a4..b75c21d5260 100644 --- a/include/oneapi/dpl/experimental/kt/single_pass_scan.h +++ b/include/oneapi/dpl/experimental/kt/single_pass_scan.h @@ -326,9 +326,10 @@ inclusive_scan(sycl::queue __queue, _InIterator __in_begin, _InIterator __in_end { auto __n = __in_end - __in_begin; - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _InIterator>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__in_begin, __in_end); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _OutIterator>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __buf2 = __keep2(__out_begin, __out_begin + __n); return __impl::__single_pass_scan(__queue, __buf1.all_view(), __buf2.all_view(), __binary_op, __param); diff --git a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h index 1681658d61f..78bce988cb6 100644 --- a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h +++ b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h @@ -30,7 +30,8 @@ namespace dpl namespace __internal { -template +template <__par_backend_hetero::access_mode __acc_mode, bool _IsNoInitRequested, typename _BackendTag, + typename _ExecutionPolicy, typename _ForwardIterator, typename _Function> auto __pattern_walk1_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f) @@ -38,8 +39,7 @@ __pattern_walk1_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _For auto __n = __last - __first; assert(__n > 0); - auto __keep = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _ForwardIterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode, _IsNoInitRequested>(); auto __buf = __keep(__first, __last); auto __future_obj = oneapi::dpl::__par_backend_hetero::__parallel_for( @@ -48,10 +48,8 @@ __pattern_walk1_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _For return __future_obj; } -template <__par_backend_hetero::access_mode __acc_mode1 = __par_backend_hetero::access_mode::read, - __par_backend_hetero::access_mode __acc_mode2 = __par_backend_hetero::access_mode::write, - typename _BackendTag, typename _ExecutionPolicy, typename _ForwardIterator1, typename _ForwardIterator2, - typename _Function> +template <__par_backend_hetero::access_mode __out_acc_mode, bool _IsOutNoInitRequested, typename _BackendTag, + typename _ExecutionPolicy, typename _ForwardIterator1, typename _ForwardIterator2, typename _Function> auto __pattern_walk2_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) @@ -59,10 +57,10 @@ __pattern_walk2_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _For auto __n = __last1 - __first1; assert(__n > 0); - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode1, _ForwardIterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range(); auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode2, _ForwardIterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__out_acc_mode, _IsOutNoInitRequested>(); auto __buf2 = __keep2(__first2, __first2 + __n); auto __future = oneapi::dpl::__par_backend_hetero::__parallel_for( @@ -73,8 +71,9 @@ __pattern_walk2_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _For return __future.__make_future(__first2 + __n); } -template +template <__par_backend_hetero::access_mode __output_acc_mode, bool _IsOutNoInitRequested, typename _BackendTag, + typename _ExecutionPolicy, typename _ForwardIterator1, typename _ForwardIterator2, typename _ForwardIterator3, + typename _Function> auto __pattern_walk3_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) @@ -82,14 +81,11 @@ __pattern_walk3_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _For auto __n = __last1 - __first1; assert(__n > 0); - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf2 = __keep2(__first2, __first2 + __n); - auto __keep3 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator3>(); + auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__output_acc_mode, _IsOutNoInitRequested>(); auto __buf3 = __keep3(__first3, __first3 + __n); auto __future = oneapi::dpl::__par_backend_hetero::__parallel_for( @@ -106,7 +102,7 @@ auto __pattern_walk2_brick_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Brick __brick) { - return __pattern_walk2_async( + return __pattern_walk2_async<__par_backend_hetero::access_mode::write, /*_IsNoInitRequested=*/true>( __tag, __par_backend_hetero::make_wrapped_policy<__walk2_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __first1, __last1, __first2, __brick); @@ -129,11 +125,9 @@ __pattern_transform_reduce_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& _ using _RepackedTp = __par_backend_hetero::__repacked_tuple_t<_Tp>; auto __n = __last1 - __first1; - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _RandomAccessIterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _RandomAccessIterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf2 = __keep2(__first2, __first2 + __n); return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, @@ -159,7 +153,7 @@ __pattern_transform_reduce_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& _ using _Functor = unseq_backend::walk_n<_UnaryOperation>; using _RepackedTp = __par_backend_hetero::__repacked_tuple_t<_Tp>; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf = __keep(__first, __last); return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, @@ -174,7 +168,7 @@ auto __pattern_fill_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _T& __value) { - return __pattern_walk1_async( + return __pattern_walk1_async<__par_backend_hetero::access_mode::read_write, /*_IsNoInitRequested=*/false>( __tag, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), @@ -195,9 +189,10 @@ __pattern_transform_scan_base_async(__hetero_tag<_BackendTag>, _ExecutionPolicy& assert(__first < __last); auto __n = __last - __first; - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first, __last); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __buf2 = __keep2(__result, __result + __n); auto __res = oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( diff --git a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h index 054af3f4b82..5acb764abf3 100644 --- a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h +++ b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h @@ -47,7 +47,8 @@ transform_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIt const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); wait_for_all(::std::forward<_Events>(__dependencies)...); - auto ret_val = oneapi::dpl::__internal::__pattern_walk2_async( + auto ret_val = oneapi::dpl::__internal::__pattern_walk2_async<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>( __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__transform_functor<_UnaryOperation>{::std::move(__op)}); return ret_val; @@ -65,7 +66,8 @@ transform_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __result); wait_for_all(::std::forward<_Events>(__dependencies)...); - auto ret_val = oneapi::dpl::__internal::__pattern_walk3_async( + auto ret_val = oneapi::dpl::__internal::__pattern_walk3_async<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>( __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, oneapi::dpl::__internal::__transform_functor<_BinaryOperation>(::std::move(__op))); return ret_val; @@ -97,7 +99,7 @@ sort_async(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Comp wait_for_all(::std::forward<_Events>(__dependencies)...); assert(__last - __first >= 2); - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write>(); auto __buf = __keep(__first, __last); const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); @@ -128,7 +130,8 @@ for_each_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIter const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); wait_for_all(::std::forward<_Events>(__dependencies)...); - auto ret_val = oneapi::dpl::__internal::__pattern_walk1_async( + auto ret_val = oneapi::dpl::__internal::__pattern_walk1_async<__par_backend_hetero::access_mode::read_write, + /*_IsNoInitRequested=*/false>( __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __f); return ret_val; } diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index 711fa785bdc..9fdafe703de 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -194,13 +194,13 @@ lower_bound_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIt const auto value_size = std::distance(value_start, value_end); - auto keep_input = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read, InputIterator1>(); + auto keep_input = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read>(); auto input_buf = keep_input(start, end); - auto keep_values = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read, InputIterator2>(); + auto keep_values = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read>(); auto value_buf = keep_values(value_start, value_end); - auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write, OutputIterator>(); + auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write>(); auto result_buf = keep_result(result, result + value_size); auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view()); const bool use_32bit_indexing = size <= std::numeric_limits::max(); @@ -226,13 +226,13 @@ upper_bound_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIt const auto value_size = std::distance(value_start, value_end); - auto keep_input = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read, InputIterator1>(); + auto keep_input = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read>(); auto input_buf = keep_input(start, end); - auto keep_values = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read, InputIterator2>(); + auto keep_values = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read>(); auto value_buf = keep_values(value_start, value_end); - auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write, OutputIterator>(); + auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write>(); auto result_buf = keep_result(result, result + value_size); auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view()); const bool use_32bit_indexing = size <= std::numeric_limits::max(); @@ -258,13 +258,13 @@ binary_search_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, Input const auto value_size = std::distance(value_start, value_end); - auto keep_input = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read, InputIterator1>(); + auto keep_input = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read>(); auto input_buf = keep_input(start, end); - auto keep_values = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read, InputIterator2>(); + auto keep_values = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read>(); auto value_buf = keep_values(value_start, value_end); - auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write, OutputIterator>(); + auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write>(); auto result_buf = keep_result(result, result + value_size); auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view()); const bool use_32bit_indexing = size <= std::numeric_limits::max(); diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index c3364667a71..396e1bace73 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -43,17 +43,17 @@ namespace __internal // walk1 //------------------------------------------------------------------------ -template +template <__par_backend_hetero::access_mode __acc_mode, bool _IsNoInitRequested, typename _BackendTag, + typename _ExecutionPolicy, typename _ForwardIterator, typename _Function> void -__pattern_walk1(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - _Function __f) +__pattern_hetero_walk1(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Function __f) { auto __n = __last - __first; if (__n <= 0) return; - auto __keep = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _ForwardIterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode, _IsNoInitRequested>(); auto __buf = __keep(__first, __last); oneapi::dpl::__par_backend_hetero::__parallel_for( @@ -62,6 +62,15 @@ __pattern_walk1(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIt .__checked_deferrable_wait(); } +template +void +__pattern_walk1(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _Function __f) +{ + __pattern_hetero_walk1<__par_backend_hetero::access_mode::read_write, /*_IsNoInitRequested=*/false>( + __hetero_tag<_BackendTag>{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __f); +} + //------------------------------------------------------------------------ // walk1_n //------------------------------------------------------------------------ @@ -72,7 +81,8 @@ _ForwardIterator __pattern_walk1_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f) { - __pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f); + __pattern_hetero_walk1<__par_backend_hetero::access_mode::read_write, /*_IsNoInitRequested=*/false>( + __tag, std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f); return __first + __n; } @@ -83,31 +93,22 @@ __pattern_walk1_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _F // TODO: A tag _WaitMode is used for provide a patterns call pipeline, where the last one should be synchronous // Probably it should be re-designed by a pipeline approach, when a pattern returns some sync objects // and ones are combined into a "pipeline" (probably like Range pipeline) -// -// A note on access mode types below: the __vector_path_impl in unseq_backend::walk_n_vectors_or_scalars only respects -// the default template arguments: -// __acc_mode1 = __par_backend_hetero::access_mode::read -// __acc_mode2 = __par_backend_hetero::access_mode::write -// For any provided _Function object, the default access modes should be respected even if other access modes are -// required due to dependency / synchronization issues. For a detailed explanation, see: -// https://github.com/uxlfoundation/oneDPL/issues/1272 template + __par_backend_hetero::access_mode __output_acc_mode = __par_backend_hetero::access_mode::write, + bool _IsOutNoInitRequested = true, typename _BackendTag, typename _ExecutionPolicy, + typename _ForwardIterator1, typename _ForwardIterator2, typename _Function> _ForwardIterator2 -__pattern_walk2(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, - _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) +__pattern_hetero_walk2(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) { auto __n = __last1 - __first1; if (__n <= 0) return __first2; - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode1, _ForwardIterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode2, _ForwardIterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__output_acc_mode, _IsOutNoInitRequested>(); auto __buf2 = __keep2(__first2, __first2 + __n); auto __future = oneapi::dpl::__par_backend_hetero::__parallel_for( @@ -121,13 +122,26 @@ __pattern_walk2(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIt return __first2 + __n; } +template +_ForwardIterator2 +__pattern_walk2(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) +{ + return __pattern_hetero_walk2<__par_backend_hetero::__deferrable_mode, __par_backend_hetero::access_mode::write, + /*_IsOutNoInitRequested=*/true>( + __hetero_tag<_BackendTag>{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f); +} + template _ForwardIterator2 __pattern_walk2_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, _Function __f) { - return __pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, __f); + return __pattern_hetero_walk2<__par_backend_hetero::__deferrable_mode, __par_backend_hetero::access_mode::write, + /*_IsOutNoInitRequested=*/true>(__tag, std::forward<_ExecutionPolicy>(__exec), + __first1, __first1 + __n, __first2, __f); } //------------------------------------------------------------------------ @@ -143,12 +157,10 @@ __pattern_swap(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIte if (__n == 0) return __first2; - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _ForwardIterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write>(); auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _ForwardIterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write>(); auto __buf2 = __keep2(__first2, __first2 + __n); using _Function = oneapi::dpl::__internal::__swap_fn; @@ -164,32 +176,22 @@ __pattern_swap(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIte // walk3 //------------------------------------------------------------------------ -// A note on access mode types below: the __vector_path_impl in unseq_backend::walk_n_vectors_or_scalars only respects -// the default template arguments: -// __acc_mode1 = __par_backend_hetero::access_mode::read -// __acc_mode2 = __par_backend_hetero::access_mode::read -// __acc_mode3 __par_backend_hetero::access_mode::write -// For any provided _Function object, the default access modes should be respected even if other access modes are -// required due to dependency / synchronization issues. For a detailed explanation, see: -// https://github.com/uxlfoundation/oneDPL/issues/1272 -template _ForwardIterator3 -__pattern_walk3(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, - _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) +__pattern_hetero_walk3(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) { auto __n = __last1 - __first1; if (__n <= 0) return __first3; - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode1, _ForwardIterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode2, _ForwardIterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf2 = __keep2(__first2, __first2 + __n); - auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode3, _ForwardIterator3>(); + auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__output_acc_mode, _IsOutNoInitRequested>(); auto __buf3 = __keep3(__first3, __first3 + __n); oneapi::dpl::__par_backend_hetero::__parallel_for( @@ -201,6 +203,17 @@ __pattern_walk3(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIt return __first3 + __n; } +template +_ForwardIterator3 +__pattern_walk3(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) +{ + return __pattern_hetero_walk3<__par_backend_hetero::access_mode::write, true>( + __hetero_tag<_BackendTag>{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __first3, + __f); +} + //------------------------------------------------------------------------ // walk_brick, walk_brick_n //------------------------------------------------------------------------ @@ -216,7 +229,7 @@ __pattern_walk_brick(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, if (__last - __first <= 0) return; - __pattern_walk1( + __pattern_hetero_walk1<__par_backend_hetero::access_mode::read_write, false>( __tag, __par_backend_hetero::make_wrapped_policy<__walk_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __first, __last, __f); @@ -231,7 +244,7 @@ _ForwardIterator __pattern_walk_brick_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f) { - __pattern_walk1( + __pattern_hetero_walk1<__par_backend_hetero::access_mode::read_write, false>( __tag, __par_backend_hetero::make_wrapped_policy<__walk_brick_n_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __first, __first + __n, __f); @@ -251,7 +264,8 @@ _ForwardIterator2 __pattern_walk2_brick(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Brick __brick) { - return __pattern_walk2( + return __pattern_hetero_walk2<__par_backend_hetero::__deferrable_mode, __par_backend_hetero::access_mode::write, + /*_IsOutNoInitRequested=*/true>( __tag, __par_backend_hetero::make_wrapped_policy<__walk2_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __first1, __last1, __first2, __brick); @@ -266,7 +280,8 @@ _ForwardIterator2 __pattern_walk2_brick_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, _Brick __brick) { - return __pattern_walk2( + return __pattern_hetero_walk2<__par_backend_hetero::__deferrable_mode, __par_backend_hetero::access_mode::write, + /*_IsOutNoInitRequested=*/true>( __tag, __par_backend_hetero::make_wrapped_policy<__walk2_brick_n_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __first1, __first1 + __n, __first2, __brick); @@ -288,9 +303,9 @@ __pattern_walk2_transform_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& // Require `read_write` access mode for output sequence to force a copy in for host iterators to capture incoming // values of the output sequence for elements where the predicate is false. We never actually read from the output // sequence, so there is no risk when ran with the vectorized path of walk_n_vector_or_scalars. For more info, - // please see the comment above __pattern_walk2 and https://github.com/uxlfoundation/oneDPL/issues/1272. - return __pattern_walk2( + // please see the comment above __pattern_hetero_walk2 and https://github.com/uxlfoundation/oneDPL/issues/1272. + return __pattern_hetero_walk2( __tag, __par_backend_hetero::make_wrapped_policy<__walk2_transform_if_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), @@ -310,9 +325,8 @@ __pattern_walk3_transform_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& // Require `read_write` access mode for output sequence to force a copy in for host iterators to capture incoming // values of the output sequence for elements where the predicate is false. We never actually read from the output // sequence, so there is no risk when ran with the vectorized path of walk_n_vector_or_scalars. For more info, - // please see the comment above __pattern_walk3 and https://github.com/uxlfoundation/oneDPL/issues/1272. - return __pattern_walk3<_BackendTag, __par_backend_hetero::access_mode::read, - __par_backend_hetero::access_mode::read, __par_backend_hetero::access_mode::read_write>( + // please see the comment above __pattern_hetero_walk3 and https://github.com/uxlfoundation/oneDPL/issues/1272. + return __pattern_hetero_walk3<__par_backend_hetero::access_mode::read_write, /*_IsOutNoInitRequested=*/false>( __tag, __par_backend_hetero::make_wrapped_policy<__walk3_transform_if_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), @@ -340,10 +354,11 @@ _ForwardIterator __pattern_fill(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _T& __value) { - __pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), - fill_functor<_T>{__value}); + __pattern_hetero_walk1<__par_backend_hetero::access_mode::read_write, /*_IsNoInitRequested=*/false>( + __tag, std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), + fill_functor<_T>{__value}); return __last; } @@ -377,10 +392,11 @@ _ForwardIterator __pattern_generate(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Generator __g) { - __pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), - generate_functor<_Generator>{__g}); + __pattern_hetero_walk1<__par_backend_hetero::access_mode::read_write, /*_IsNoInitRequested=*/false>( + __tag, std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), + generate_functor<_Generator>{__g}); return __last; } @@ -476,7 +492,7 @@ __pattern_min_element(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Ite __pattern_min_element_reduce_fn<_ReduceValueType, _Compare> __reduce_fn{__comp}; oneapi::dpl::__internal::__pattern_min_element_transform_fn<_ReduceValueType> __transform_fn; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf = __keep(__first, __last); auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, _Commutative>( @@ -527,7 +543,7 @@ __pattern_minmax_element(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ // a `tuple` of `difference_type`, not the `difference_type` itself. oneapi::dpl::__internal::__pattern_minmax_element_transform_fn<_ReduceValueType> __transform_fn; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf = __keep(__first, __last); auto __ret = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, @@ -556,7 +572,7 @@ __pattern_adjacent_find(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _I using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_BinaryPredicate>; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf = __keep(__first, __last); //a scope lifetime of this instance should be. auto __view = __buf.all_view(); @@ -601,7 +617,7 @@ __pattern_count(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator // otherwise we can only pass the difference_type as a functor template parameter oneapi::dpl::__internal::__pattern_count_transform_fn<_Predicate> __transform_fn{__predicate}; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf = __keep(__first, __last); return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, @@ -626,7 +642,7 @@ __pattern_any_of(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_Pred>; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf = __keep(__first, __last); using __size_calc = oneapi::dpl::__ranges::__first_size_calc; @@ -923,9 +939,11 @@ __pattern_copy_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterato typename std::iterator_traits<_Iterator1>::difference_type __n = __last - __first; - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first, __last); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); + + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __buf2 = __keep2(__result_first, __result_first + __n); std::size_t __num_copied = __par_backend_hetero::__parallel_copy_if(_BackendTag{}, @@ -951,15 +969,15 @@ __pattern_partition_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ _It1DifferenceType __n = __last - __first; - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first, __last); auto __zipped_res = __par_backend_hetero::zip( __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result1), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result2)); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, decltype(__zipped_res)>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __buf2 = __keep2(__zipped_res, __zipped_res + __n); auto __result = oneapi::dpl::__par_backend_hetero::__parallel_partition_copy( @@ -995,9 +1013,10 @@ __pattern_unique_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Ite return __result_first + 1; } - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first, __last); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __buf2 = __keep2(__result_first, __result_first + __n); auto __result = oneapi::dpl::__par_backend_hetero::__parallel_unique_copy( @@ -1031,8 +1050,9 @@ __pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, // __pattern_copy_if above may be async due to there is implicit synchronization on sycl::buffer and the accessors // The temporary buffer is constructed from a range, therefore it's destructor will not block, therefore - // we must call __pattern_walk2 in a way which provides blocking synchronization for this pattern. - return __pattern_walk2( + // we must call __pattern_hetero_walk2 in a way which provides blocking synchronization for this pattern. + return __pattern_hetero_walk2<__par_backend_hetero::__deferrable_mode, __par_backend_hetero::access_mode::write, + /*_IsOutNoInitRequested=*/true>( __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __copy_first, __copy_last, __first, __brick_copy<__hetero_tag<_BackendTag>>{}); } @@ -1054,13 +1074,10 @@ __pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _It //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer // The temporary buffer is constructed from a range, therefore it's destructor will not block, therefore - // we must call __pattern_walk2 in a way which provides blocking synchronization for this pattern. - // We never actually write to the sequence, so there is no risk when ran with the vectorized path of - // walk_n_vector_or_scalars. For more info, please see the comment above __pattern_walk2 and - // https://github.com/uxlfoundation/oneDPL/issues/1272. - return __pattern_walk2( + // we must call __pattern_hetero_walk2 in a way which provides blocking synchronization for this pattern. + return __pattern_hetero_walk2<__par_backend_hetero::__deferrable_mode, + __par_backend_hetero::access_mode::read_write, + /*_IsOutNoInitRequested=*/false>( __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __copy_first, __copy_last, __first, __brick_copy<__hetero_tag<_BackendTag>>{}); } @@ -1115,7 +1132,7 @@ __pattern_is_partitioned(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ __pattern_is_partitioned_reduce_fn<_ReduceValueType> __reduce_fn; __pattern_is_partitioned_transform_fn<_Predicate> __transform_fn{__predicate}; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf = __keep(__first, __last); auto __res = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, @@ -1221,12 +1238,13 @@ __pattern_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Ite __first1, __last1, __d_first, oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>>{}); else { - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf2 = __keep2(__first2, __last2); - auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator3>(); + auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __buf3 = __keep3(__d_first, __d_first + __n); __par_backend_hetero::__parallel_merge(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), @@ -1268,8 +1286,8 @@ __pattern_inplace_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __ex //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer // The temporary buffer is constructed from a range, therefore it's destructor will not block, therefore - // we must call __pattern_walk2 in a way which provides blocking synchronization for this pattern. - __pattern_walk2( + // we must call __pattern_hetero_walk2 in a way which provides blocking synchronization for this pattern. + __pattern_hetero_walk2<__par_backend_hetero::__deferrable_mode, __par_backend_hetero::access_mode::write, true>( __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __copy_first, __copy_last, __first, __brick_move<__hetero_tag<_BackendTag>>{}); } @@ -1285,7 +1303,7 @@ __stable_sort_with_projection(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __ex if (__last - __first < 2) return; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write>(); auto __buf = __keep(__first, __last); __par_backend_hetero::__parallel_stable_sort(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), @@ -1361,16 +1379,19 @@ __pattern_stable_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& _ auto true_count = copy_result.first - __true_result; //TODO: optimize copy back if possible (inplace, decrease number of submits) - __pattern_walk2(__tag, __par_backend_hetero::make_wrapped_policy(__exec), __true_result, - copy_result.first, __first, __brick_move<__hetero_tag<_BackendTag>>{}); + __pattern_hetero_walk2<__par_backend_hetero::__deferrable_mode, __par_backend_hetero::access_mode::write, + /*_IsOutNoInitRequested=*/true>( + __tag, __par_backend_hetero::make_wrapped_policy(__exec), __true_result, copy_result.first, + __first, __brick_move<__hetero_tag<_BackendTag>>{}); - __pattern_walk2( + __pattern_hetero_walk2<__par_backend_hetero::__deferrable_mode, __par_backend_hetero::access_mode::write, + /*_IsOutNoInitRequested=*/true>( __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __false_result, copy_result.second, __first + true_count, __brick_move<__hetero_tag<_BackendTag>>{}); //TODO: A buffer is constructed from a range, the destructor does not need to block. // The synchronization between these patterns is not required due to the data are being processed independently. - // So, sycl::event::wait(event1, event2) should be call. __pattern_walk2 calls above should be asynchronous and + // So, sycl::event::wait(event1, event2) should be call. __pattern_hetero_walk2 calls above should be asynchronous and // return event1 and event2. return __first + true_count; @@ -1439,10 +1460,10 @@ __pattern_lexicographical_compare(__hetero_tag<_BackendTag>, _ExecutionPolicy&& auto __shared_size = ::std::min(__last1 - __first1, (_Iterator1DifferenceType)(__last2 - __first2)); - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first1, __first1 + __shared_size); - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf2 = __keep2(__first2, __first2 + __shared_size); auto __ret_idx = @@ -1552,20 +1573,22 @@ __pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& if (__in_size == 0 || __out_size == 0) return __out_first; - // TODO: we can avoid a separate __pattern_walk2 for initial copy: it can be done during sort itself + // TODO: we can avoid a separate __pattern_hetero_walk2 for initial copy: it can be done during sort itself // like it's done for CPU version, but it's better to be done together with merge cutoff implementation // as it uses a similar mechanism. if (__in_size <= __out_size) { // If our output buffer is larger than the input buffer, simply copy elements to the output and use // full sort on them. - auto __out_end = __pattern_walk2<__par_backend_hetero::__sync_mode>( - __tag, __par_backend_hetero::make_wrapped_policy<__initial_copy_1>(__exec), __first, __last, __out_first, - __brick_copy<__hetero_tag<_BackendTag>>{}); + auto __out_end = + __pattern_hetero_walk2<__par_backend_hetero::__sync_mode, __par_backend_hetero::access_mode::write, + /*_IsOutNoInitRequested=*/true>( + __tag, __par_backend_hetero::make_wrapped_policy<__initial_copy_1>(__exec), __first, __last, + __out_first, __brick_copy<__hetero_tag<_BackendTag>>{}); - // TODO: __pattern_walk2 is a blocking call here, so there is a synchronization between the patterns. + // TODO: __pattern_hetero_walk2 is a blocking call here, so there is a synchronization between the patterns. // But, when the input iterators are a kind of hetero iterator on top of sycl::buffer, SYCL - // runtime makes a dependency graph. In that case the call of __pattern_walk2 could be changed to + // runtime makes a dependency graph. In that case the call of __pattern_hetero_walk2 could be changed to // be asynchronous for better performance. // Use regular sort as partial_sort isn't required to be stable. @@ -1587,15 +1610,17 @@ __pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& auto __buf_first = __buf.get(); - auto __buf_last = __pattern_walk2<__par_backend_hetero::__async_mode>( - __tag, __par_backend_hetero::make_wrapped_policy<__initial_copy_2>(__exec), __first, __last, __buf_first, - __brick_copy<__hetero_tag<_BackendTag>>{}); + auto __buf_last = + __pattern_hetero_walk2<__par_backend_hetero::__async_mode, __par_backend_hetero::access_mode::write, + /*_IsOutNoInitRequested=*/true>( + __tag, __par_backend_hetero::make_wrapped_policy<__initial_copy_2>(__exec), __first, __last, + __buf_first, __brick_copy<__hetero_tag<_BackendTag>>{}); auto __buf_mid = __buf_first + __out_size; // An explicit wait between the patterns isn't required here because we are working a with temporary // sycl::buffer and sycl accessors. SYCL runtime makes a dependency graph to prevent the races between - // the patterns: __pattern_walk2, __parallel_partial_sort and __pattern_walk2. + // the patterns: __pattern_hetero_walk2, __parallel_partial_sort and __pattern_hetero_walk2. __par_backend_hetero::__parallel_partial_sort( _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), @@ -1603,12 +1628,13 @@ __pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_mid), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_last), __comp); - return __pattern_walk2( + return __pattern_hetero_walk2<__par_backend_hetero::__deferrable_mode, __par_backend_hetero::access_mode::write, + /*_IsOutNoInitRequested=*/true>( __tag, __par_backend_hetero::make_wrapped_policy<__copy_back>(::std::forward<_ExecutionPolicy>(__exec)), __buf_first, __buf_mid, __out_first, __brick_copy<__hetero_tag<_BackendTag>>{}); // The temporary buffer is constructed from a range, therefore it's destructor will not block, therefore - // we must call __pattern_walk2 in a way which provides blocking synchronization for this pattern. + // we must call __pattern_hetero_walk2 in a way which provides blocking synchronization for this pattern. } } @@ -1642,7 +1668,7 @@ __pattern_reverse(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterato if (__n <= 1) return; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write>(); auto __buf = __keep(__first, __last); oneapi::dpl::__par_backend_hetero::__parallel_for( _BackendTag{}, std::forward<_ExecutionPolicy>(__exec), @@ -1664,11 +1690,10 @@ __pattern_reverse_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Bi if (__n <= 0) return __result; - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _BidirectionalIterator>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first, __last); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __buf2 = __keep2(__result, __result + __n); oneapi::dpl::__par_backend_hetero::__parallel_for( _BackendTag{}, std::forward<_ExecutionPolicy>(__exec), @@ -1701,7 +1726,7 @@ __pattern_rotate(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator using _Tp = typename ::std::iterator_traits<_Iterator>::value_type; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write>(); auto __buf = __keep(__first, __last); auto __temp_buf = oneapi::dpl::__par_backend_hetero::__buffer<_Tp>(__n); @@ -1744,11 +1769,10 @@ __pattern_rotate_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Bid if (__n <= 0) return __result; - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _BidirectionalIterator>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first, __last); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __buf2 = __keep2(__result, __result + __n); const auto __shift = __new_first - __first; @@ -1786,14 +1810,13 @@ __pattern_hetero_set_op(__hetero_tag<_BackendTag>, _SetTag __set_tag, _Execution __output_size = __n1 + __n2; } - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf2 = __keep2(__first2, __last2); - auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _OutputIterator>(); + auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __buf3 = __keep3(__result, __result + __output_size); _SizeType __result_size = __par_backend_hetero::__parallel_set_op<_SetTag>( @@ -2006,7 +2029,7 @@ __pattern_shift_left(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, if (__n >= __size) return __first; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write>(); auto __buf = __keep(__first, __last); auto __res = oneapi::dpl::__internal::__pattern_shift_left(__tag, ::std::forward<_ExecutionPolicy>(__exec), @@ -2026,7 +2049,7 @@ __pattern_shift_right(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec if (__n >= __size) return __last; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write>(); auto __buf = __keep(__first, __last); //A shift right is the shift left with a reverse logic. @@ -2064,15 +2087,14 @@ __pattern_reduce_by_segment(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& return 1; } - auto __keep_keys = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __keep_keys = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __keys = __keep_keys(__keys_first, __keys_last); - auto __keep_values = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); + auto __keep_values = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __values = __keep_values(__values_first, __values_first + __n); - auto __keep_key_outputs = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator3>(); + auto __keep_key_outputs = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write>(); auto __out_keys = __keep_key_outputs(__out_keys_first, __out_keys_first + __n); auto __keep_value_outputs = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator4>(); + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write>(); auto __out_values = __keep_value_outputs(__out_values_first, __out_values_first + __n); return oneapi::dpl::__par_backend_hetero::__parallel_reduce_by_segment( _BackendTag{}, std::forward<_ExecutionPolicy>(__exec), __keys.all_view(), __values.all_view(), @@ -2095,12 +2117,11 @@ __pattern_scan_by_segment_impl(__hetero_tag<_BackendTag>, _Policy&& __policy, _I namespace __bknd = oneapi::dpl::__par_backend_hetero; - auto __keep_keys = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read, _InputIterator1>(); + auto __keep_keys = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read>(); auto __key_buf = __keep_keys(__first1, __last1); - auto __keep_values = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read, _InputIterator2>(); + auto __keep_values = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read>(); auto __value_buf = __keep_values(__first2, __first2 + __n); - auto __keep_value_outputs = - oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write, _OutputIterator>(); + auto __keep_value_outputs = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write>(); auto __value_output_buf = __keep_value_outputs(__result, __result + __n); __bknd::__parallel_scan_by_segment<_Inclusive::value>( diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index a59528cc84d..43c8fa2e597 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -1146,8 +1146,8 @@ __set_write_a_only_op(oneapi::dpl::unseq_backend::_UnionTag, _UseReduceThenScan, // temporary buffer to store intermediate result oneapi::dpl::__par_backend_hetero::__buffer<_ValueType> __diff(__n2); auto __buf = __diff.get(); - auto __keep_tmp1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, decltype(__buf)>(); + auto __keep_tmp1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __tmp_rng1 = __keep_tmp1(__buf, __buf + __n2); //1. Calc difference {2} \ {1} @@ -1166,8 +1166,7 @@ __set_write_a_only_op(oneapi::dpl::unseq_backend::_UnionTag, _UseReduceThenScan, else { // merge if elements are in diff - auto __keep_tmp2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, decltype(__buf)>(); + auto __keep_tmp2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __tmp_rng2 = __keep_tmp2(__buf, __buf + __n_diff); oneapi::dpl::__par_backend_hetero::__parallel_merge_impl<__set_union_merge_wrapper<_CustomName>>( __q, std::forward<_Range1>(__rng1), __tmp_rng2.all_view(), std::forward<_Range3>(__result), __comp, __proj1, @@ -1207,10 +1206,10 @@ __set_write_a_only_op(oneapi::dpl::unseq_backend::_SymmetricDifferenceTag, _UseR oneapi::dpl::__par_backend_hetero::__buffer<_ValueType2> __diff_2(__n2); auto __buf_2 = __diff_2.get(); - auto __keep_tmp1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, decltype(__buf_1)>(); - auto __keep_tmp2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, decltype(__buf_2)>(); + auto __keep_tmp1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); + auto __keep_tmp2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __tmp_rng1 = __keep_tmp1(__buf_1, __buf_1 + __n1); auto __tmp_rng2 = __keep_tmp2(__buf_2, __buf_2 + __n2); @@ -1226,10 +1225,8 @@ __set_write_a_only_op(oneapi::dpl::unseq_backend::_SymmetricDifferenceTag, _UseR oneapi::dpl::unseq_backend::_DifferenceTag{}, __q, std::forward<_Range2>(__rng2), std::forward<_Range1>(__rng1), __tmp_rng2.all_view(), __comp, __proj2, __proj1); - auto __keep_tmp3 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, decltype(__buf_1)>(); - auto __keep_tmp4 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, decltype(__buf_2)>(); + auto __keep_tmp3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); + auto __keep_tmp4 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); //3. Merge the differences if (__n_diff_1 == 0 && __n_diff_2 == 0) @@ -2109,7 +2106,7 @@ __parallel_partial_sort(oneapi::dpl::__internal::__device_backend_tag, _Executio { const auto __mid_idx = __mid - __first; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write>(); auto __buf = __keep(__first, __last); return __parallel_partial_sort_impl(oneapi::dpl::__internal::__device_backend_tag{}, diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_iterator.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_iterator.h index 114bcbdb3d1..c0e86d8ccea 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_iterator.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_iterator.h @@ -134,33 +134,34 @@ struct sycl_iterator // map access_mode tag to access_mode value // TODO: consider removing the logic for discard_read_write and discard_write which are deprecated in SYCL 2020 -template +template struct __access_mode_resolver { }; -template -struct __access_mode_resolver, _NoInitT> +template +struct __access_mode_resolver, _IsNoInitRequestedT> { static constexpr access_mode __value = access_mode::read; }; -template -struct __access_mode_resolver, _NoInitT> +template +struct __access_mode_resolver, _IsNoInitRequestedT> { static constexpr access_mode __value = - std::is_same_v<_NoInitT, __dpl_sycl::__no_init> ? access_mode::discard_write : access_mode::write; + std::is_same_v<_IsNoInitRequestedT, __dpl_sycl::__no_init> ? access_mode::discard_write : access_mode::write; }; -template -struct __access_mode_resolver, _NoInitT> +template +struct __access_mode_resolver, _IsNoInitRequestedT> { - static constexpr access_mode __value = - std::is_same_v<_NoInitT, __dpl_sycl::__no_init> ? access_mode::discard_read_write : access_mode::read_write; + static constexpr access_mode __value = std::is_same_v<_IsNoInitRequestedT, __dpl_sycl::__no_init> + ? access_mode::discard_read_write + : access_mode::read_write; }; -template -constexpr access_mode __access_mode_resolver_v = __access_mode_resolver<_ModeTagT, _NoInitT>::__value; +template +constexpr access_mode __access_mode_resolver_v = __access_mode_resolver<_ModeTagT, _IsNoInitRequestedT>::__value; template ::value_type>> using __default_alloc_vec_iter = typename std::vector::iterator; diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h index 96159c7ee32..1fa148e79ad 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h @@ -406,7 +406,7 @@ struct __range_holder } }; -template //TODO: _Iterator is not used and should be removed +template struct __get_sycl_range { __get_sycl_range() @@ -414,17 +414,19 @@ struct __get_sycl_range m_buffers.reserve(4); //4 - due to a number of arguments(host iterators) cannot be too big. } - private: - // We have to keep sycl buffer(s) instance here by sync reasons; - std::vector> m_buffers; - - template + template static constexpr bool __is_copy_direct_v = - _LocalAccMode == sycl::access::mode::read_write || _LocalAccMode == sycl::access::mode::read; + !_LocalNoInit && (_LocalAccMode == sycl::access::mode::read_write || + _LocalAccMode == sycl::access::mode::read || _LocalAccMode == sycl::access::mode::write); + template static constexpr bool __is_copy_back_v = _LocalAccMode == sycl::access::mode::read_write || _LocalAccMode == sycl::access::mode::write; + private: + // We have to keep sycl buffer(s) instance here by sync reasons; + std::vector> m_buffers; + //SFINAE iterator type checks template static constexpr auto @@ -452,7 +454,7 @@ struct __get_sycl_range } //zip iterators - template + template auto __process_input_iter(oneapi::dpl::zip_iterator __first, oneapi::dpl::zip_iterator __last) { @@ -464,14 +466,14 @@ struct __get_sycl_range } //specialization for transform_iterator - template + template auto __process_input_iter(oneapi::dpl::transform_iterator<_Iter, _UnaryFunction> __first, oneapi::dpl::transform_iterator<_Iter, _UnaryFunction> __last) { assert(__first < __last); - auto res = __process_input_iter<_LocalAccMode>(__first.base(), __last.base()); + auto res = __process_input_iter<_LocalAccMode, _LocalNoInit>(__first.base(), __last.base()); auto rng = oneapi::dpl::__ranges::transform_view_simple{ res.all_view(), __first.functor()}; @@ -479,13 +481,13 @@ struct __get_sycl_range } //specialization for std::reverse_iterator - template + template auto __process_input_iter(::std::reverse_iterator<_Iter> __first, ::std::reverse_iterator<_Iter> __last) { assert(__first < __last); - auto __res = __process_input_iter<_LocalAccMode>(__last.base(), __first.base()); + auto __res = __process_input_iter<_LocalAccMode, _LocalNoInit>(__last.base(), __first.base()); auto __rng = oneapi::dpl::__ranges::reverse_view_simple{__res.all_view()}; return __range_holder{__rng}; @@ -504,13 +506,14 @@ struct __get_sycl_range auto __get_permutation_view(_R __r, _Map __m, _Size __s) { - //For permutation iterator, the Map iterator is always read (only) - auto view_map = __process_input_iter(__m, __m + __s).all_view(); + //For permutation iterator, the Map iterator is always read (only) without no_init + auto view_map = + __process_input_iter(__m, __m + __s).all_view(); return oneapi::dpl::__ranges::permutation_view_simple<_R, decltype(view_map)>{__r, view_map}; } //specialization for permutation_iterator using sycl_iterator as source - template , int> = 0> auto __process_input_iter(oneapi::dpl::permutation_iterator<_It, _Map> __first, @@ -528,8 +531,8 @@ struct __get_sycl_range // offset, and use that to recurse as a sycl_iterator over the __base_buffer. auto __base_iter = __first.base(); auto __base_buffer = __base_iter.get_buffer(); - auto res_src = __process_input_iter<_LocalAccMode>(oneapi::dpl::begin(__base_buffer) + __base_iter.get_idx(), - oneapi::dpl::end(__base_buffer)); + auto res_src = __process_input_iter<_LocalAccMode, _LocalNoInit>( + oneapi::dpl::begin(__base_buffer) + __base_iter.get_idx(), oneapi::dpl::end(__base_buffer)); //_Map is handled by recursively calling __get_sycl_range() in __get_permutation_view. auto rng = __get_permutation_view(res_src.all_view(), __first.map(), __n); @@ -538,7 +541,7 @@ struct __get_sycl_range } //specialization for permutation_iterator using USM pointer or direct pass object as source - template && oneapi::dpl::__ranges::__is_passed_directly_device_ready_v<_Iter>, int> = 0> @@ -558,7 +561,7 @@ struct __get_sycl_range // specialization for general case, permutation_iterator with base iterator that is not sycl_iterator or // device accessible content iterators. - template && !oneapi::dpl::__ranges::__is_passed_directly_device_ready_v<_Iter>, int> = 0> @@ -581,7 +584,7 @@ struct __get_sycl_range } //specialization for permutation discard iterator - template + template auto __process_input_iter(oneapi::dpl::permutation_iterator __first, oneapi::dpl::permutation_iterator __last) @@ -595,7 +598,7 @@ struct __get_sycl_range } // for raw pointers and direct pass objects (for example, counting_iterator, iterator of USM-containers) - template + template std::enable_if_t, __range_holder>> __process_input_iter(_Iter __first, _Iter __last) @@ -606,12 +609,14 @@ struct __get_sycl_range } //specialization for hetero iterator - template + template auto __process_input_iter(_Iter __first, _Iter __last) -> std::enable_if_t, __range_holder, _LocalAccMode>>> { + static_assert(!(_LocalAccMode == sycl::access::mode::read && _LocalNoInit), + "Read mode cannot be used with no_init property."); assert(__first < __last); using value_type = val_t<_Iter>; @@ -633,17 +638,19 @@ struct __get_sycl_range } //SFINAE-overload for a contiguous host iterator - template + template auto __process_input_iter(_Iter __first, _Iter __last) -> ::std::enable_if_t::value && __is_addressable_v<_Iter> && !is_zip<_Iter>::value && !is_permutation<_Iter>::value, __range_holder, _LocalAccMode>>> { + static_assert(!(_LocalAccMode == sycl::access::mode::read && _LocalNoInit), + "Read mode cannot be used with no_init property."); using _T = val_t<_Iter>; - return __process_host_iter_impl<_LocalAccMode>(__first, __last, [&]() { - if constexpr (__is_copy_direct_v<_LocalAccMode>) + return __process_host_iter_impl<_LocalAccMode, _LocalNoInit>(__first, __last, [&]() { + if constexpr (__is_copy_direct_v<_LocalAccMode, _LocalNoInit>) { //wait and copy on a buffer destructor; an exclusive access buffer, good performance return sycl::buffer<_T, 1>{::std::addressof(*__first), __last - __first}; @@ -662,7 +669,7 @@ struct __get_sycl_range } //SFINAE-overload for non-contiguous host iterator - template + template auto __process_input_iter(_Iter __first, _Iter __last) -> ::std::enable_if_t::value && !__is_addressable_v<_Iter> && !is_zip<_Iter>::value && @@ -671,8 +678,8 @@ struct __get_sycl_range { using _T = val_t<_Iter>; - return __process_host_iter_impl<_LocalAccMode>(__first, __last, [&]() { - if constexpr (__is_copy_direct_v<_LocalAccMode>) + return __process_host_iter_impl<_LocalAccMode, _LocalNoInit>(__first, __last, [&]() { + if constexpr (__is_copy_direct_v<_LocalAccMode, _LocalNoInit>) { //This constructor requires an extra host-side copy as compared to the host pointer + size constructors sycl::buffer<_T, 1> __buf(__first, __last); //SYCL API for non-contiguous iterators @@ -692,7 +699,7 @@ struct __get_sycl_range } //implementation of operator()(_Iter __first, _Iter __last) for the host iterator types - template + template auto __process_host_iter_impl([[maybe_unused]] _Iter __first, [[maybe_unused]] _Iter __last, _GetBufferFunc __get_buf) { @@ -720,8 +727,8 @@ struct __get_sycl_range auto operator()(_ArgTypes... __args) { - //when called using operator(), use access mode provided by the struct template parameter - return __process_input_iter(::std::forward<_ArgTypes>(__args)...); + //when called using operator(), use access mode and no_init flag provided by the struct template parameters + return __process_input_iter(std::forward<_ArgTypes>(__args)...); } }; diff --git a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h index a42b46e18d9..f3e6f4470b3 100644 --- a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h @@ -104,8 +104,7 @@ auto __make_binhash_manager(oneapi::dpl::__internal::__custom_boundary_binhash<_RandomAccessIterator>&& __bin_hash) { auto __buffer_lifetime_holder = - oneapi::dpl::__ranges::__get_sycl_range(); + oneapi::dpl::__ranges::__get_sycl_range(); auto __range_holder = __buffer_lifetime_holder(__bin_hash.__boundary_first, __bin_hash.__boundary_last); auto __bin_hash_range = oneapi::dpl::__par_backend_hetero::__custom_boundary_range_binhash{__range_holder.all_view()}; @@ -131,10 +130,9 @@ __pattern_histogram(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Rando // The access mode we we want here is "read_write" + no_init property to cover the reads required by the main // kernel, but also to avoid copying the data in unnecessarily. In practice, this "write" access mode should // accomplish this as write implies read, and we avoid a copy-in from the host for "write" access mode. - // TODO: Add no_init property to get_sycl_range to allow expressivity we need here. auto __keep_bins = oneapi::dpl::__ranges::__get_sycl_range(); + /*_IsNoInitRequested=*/true>(); auto __bins_buf = __keep_bins(__histogram_first, __histogram_first + __num_bins); auto __bins = __bins_buf.all_view(); @@ -153,8 +151,7 @@ __pattern_histogram(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Rando // __make_binhash_manager will call __get_sycl_range for any data which requires it within __func auto __binhash_manager = __make_binhash_manager(::std::forward<_BinHash>(__func)); auto __keep_input = - oneapi::dpl::__ranges::__get_sycl_range(); + oneapi::dpl::__ranges::__get_sycl_range(); auto __input_buf = __keep_input(__first, __last); oneapi::dpl::__par_backend_hetero::__parallel_histogram( diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index 6730a41fb7c..477fab764b3 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -51,11 +51,9 @@ __pattern_transform_reduce(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, using _RepackedTp = __par_backend_hetero::__repacked_tuple_t<_Tp>; auto __n = __last1 - __first1; - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _RandomAccessIterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first1, __last1); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _RandomAccessIterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf2 = __keep2(__first2, __first2 + __n); return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, @@ -83,7 +81,7 @@ __pattern_transform_reduce(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, using _Functor = unseq_backend::walk_n<_UnaryOperation>; using _RepackedTp = __par_backend_hetero::__repacked_tuple_t<_Tp>; - auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator>(); + auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf = __keep(__first, __last); return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, @@ -134,14 +132,15 @@ __pattern_transform_scan_base(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy& const auto __n = __last - __first; - auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first, __last); // This is a temporary workaround for an in-place exclusive scan while the SYCL backend scan pattern is not fixed. const bool __is_scan_inplace_exclusive = __n > 1 && !_Inclusive{} && __iterators_possibly_equal(__first, __result); if (!__is_scan_inplace_exclusive) { - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __buf2 = __keep2(__result, __result + __n); oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( @@ -164,7 +163,8 @@ __pattern_transform_scan_base(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy& oneapi::dpl::__par_backend_hetero::__buffer<_Type> __tmp_buf(__n); auto __first_tmp = __tmp_buf.get(); auto __last_tmp = __first_tmp + __n; - auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __buf2 = __keep2(__first_tmp, __last_tmp); // Run main algorithm and save data into temporary buffer @@ -250,11 +250,10 @@ __pattern_adjacent_difference(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __ex { oneapi::dpl::__internal::__transform_functor<_BinaryOperation, std::true_type> __fn{__op}; - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator1>(); + auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read>(); auto __buf1 = __keep1(__first, __last); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator2>(); + auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, + /*_IsNoInitRequested=*/true>(); auto __buf2 = __keep2(__d_first, __d_last); using _Function = unseq_backend::walk_adjacent_difference; diff --git a/test/general/implementation_details/get_sycl_range.pass.cpp b/test/general/implementation_details/get_sycl_range.pass.cpp new file mode 100644 index 00000000000..c33c8680e7a --- /dev/null +++ b/test/general/implementation_details/get_sycl_range.pass.cpp @@ -0,0 +1,165 @@ +// -*- C++ -*- +//===-- get_sycl_range.pass.cpp -------------------------------------------===// +// +// Copyright (C) Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This file incorporates work covered by the following copyright and permission +// notice: +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +#include "support/test_config.h" + +#include "support/utils.h" + +#if TEST_DPCPP_BACKEND_PRESENT + +#include + +// Test the compile-time traits __is_copy_direct_v and __is_copy_back_v +// used by __get_sycl_range to determine buffer copy behavior. + +void +test_is_copy_direct_v() +{ + // __is_copy_direct_v determines whether data should be copied FROM host TO device + // when creating a SYCL buffer. It should be true when: + // - _IsNoInitRequested is false AND access mode implies reading (read, write, or read_write) + // + // Key change in this PR: write mode copies in by default (matching SYCL standard) + // unless _IsNoInitRequested=true is specified. + + using read_mode = oneapi::dpl::__ranges::__get_sycl_range; + using write_mode = oneapi::dpl::__ranges::__get_sycl_range; + using read_write_mode = oneapi::dpl::__ranges::__get_sycl_range; + + using write_mode_no_init = + oneapi::dpl::__ranges::__get_sycl_range; + using read_write_mode_no_init = + oneapi::dpl::__ranges::__get_sycl_range; + + // Test: read mode without no_init -> copy in (true) + static_assert(read_mode::__is_copy_direct_v == true, + "read mode without no_init should copy in"); + + // Test: write mode without no_init -> copy in (true) + static_assert(write_mode::__is_copy_direct_v == true, + "write mode without no_init should copy in"); + + // Test: write mode with no_init -> no copy in (false) + static_assert(write_mode_no_init::__is_copy_direct_v == false, + "write mode with no_init should not copy in"); + + // Test: read_write mode without no_init -> copy in (true) + static_assert(read_write_mode::__is_copy_direct_v == true, + "read_write mode without no_init should copy in"); + + // Test: read_write mode with no_init -> no copy in (false) + static_assert(read_write_mode_no_init::__is_copy_direct_v == false, + "read_write mode with no_init should not copy in"); +} + +void +test_is_copy_back_v() +{ + // __is_copy_back_v determines whether data should be copied FROM device TO host + // when the SYCL buffer is destroyed. It should be true when: + // - access mode is write or read_write + + using read_mode = oneapi::dpl::__ranges::__get_sycl_range; + using write_mode = oneapi::dpl::__ranges::__get_sycl_range; + using read_write_mode = oneapi::dpl::__ranges::__get_sycl_range; + + // Test: read mode -> no copy back (false) + static_assert(read_mode::__is_copy_back_v == false, + "read mode should not copy back"); + + // Test: write mode -> copy back (true) + static_assert(write_mode::__is_copy_back_v == true, + "write mode should copy back"); + + // Test: read_write mode -> copy back (true) + static_assert(read_write_mode::__is_copy_back_v == true, + "read_write mode should copy back"); + + // Verify __is_copy_back_v does NOT depend on _IsNoInitRequested + using write_mode_no_init = + oneapi::dpl::__ranges::__get_sycl_range; + using read_write_mode_no_init = + oneapi::dpl::__ranges::__get_sycl_range; + + // Test: write mode with no_init -> still copy back (true) + static_assert(write_mode_no_init::__is_copy_back_v == true, + "write mode with no_init should still copy back"); + + // Test: read_write mode with no_init -> still copy back (true) + static_assert(read_write_mode_no_init::__is_copy_back_v == true, + "read_write mode with no_init should still copy back"); +} + +void +test_traits_use_local_parameters() +{ + // The traits __is_copy_direct_v and __is_copy_back_v are static and depend only on their + // template parameters (_LocalAccMode, _LocalNoInit), NOT on the struct's template parameters + // (AccMode, _IsNoInitRequested). This is important because when processing nested iterators like + // permutation_iterator, the map iterator is always processed with read mode regardless of + // the outer access mode. + + // Use a write mode struct but query with read mode parameters (like permutation map iterator) + using write_no_init_struct = + oneapi::dpl::__ranges::__get_sycl_range; + + // Even though the struct is write+no_init, querying with read without no_init should give read behavior + static_assert(write_no_init_struct::__is_copy_direct_v == true, + "local read mode should copy in regardless of struct's no_init mode"); + static_assert(write_no_init_struct::__is_copy_back_v == false, + "local read mode should not copy back regardless of struct's no_init mode"); + + // And querying with write+no_init should give write+no_init behavior + static_assert(write_no_init_struct::__is_copy_direct_v == false, + "local write+no_init should not copy in"); + static_assert(write_no_init_struct::__is_copy_back_v == true, + "local write mode should copy back"); + + // Use a read mode struct but query with write mode parameters + using read_struct = oneapi::dpl::__ranges::__get_sycl_range; + + static_assert(read_struct::__is_copy_direct_v == true, + "local write mode should copy in regardless of struct's read mode"); + static_assert(read_struct::__is_copy_back_v == true, + "local write mode should copy back regardless of struct's read mode"); +} + +void +test_default_template_parameter() +{ + // Verify that _IsNoInitRequested defaults to false + using write_mode_default = oneapi::dpl::__ranges::__get_sycl_range; + using write_mode_explicit_false = oneapi::dpl::__ranges::__get_sycl_range; + + // Both should have the same copy-in behavior (copy in enabled) + static_assert(write_mode_default::__is_copy_direct_v == + write_mode_explicit_false::__is_copy_direct_v, + "default _IsNoInitRequested should be false"); +} + +#endif // TEST_DPCPP_BACKEND_PRESENT + +int +main() +{ +#if TEST_DPCPP_BACKEND_PRESENT + test_is_copy_direct_v(); + test_is_copy_back_v(); + test_traits_use_local_parameters(); + test_default_template_parameter(); +#endif + + return TestUtils::done(TEST_DPCPP_BACKEND_PRESENT); +} diff --git a/test/parallel_api/iterator/input_data_sweep.h b/test/parallel_api/iterator/input_data_sweep.h index f23c7149ae2..254d4ae15dc 100644 --- a/test/parallel_api/iterator/input_data_sweep.h +++ b/test/parallel_api/iterator/input_data_sweep.h @@ -49,20 +49,48 @@ struct get_expected_op // Attention: // We cannot use oneapi::dpl::identity here because it returns the reference it accepted as argument. // Such functors cannot be used within transform_iterator in combination with a source iterator -// which returns some prvalue when dereferenced (i.e. counting_iterator or zip_iterator). -// This combination returns a dangling reference and results in undefined behavior. +// which returns some prvalue when dereferenced (i.e. counting_iterator or zip_iterator). +// This combination returns a dangling reference and results in undefined behavior. // Instead, we use a functor which copies the returned value. inline constexpr auto noop = [](auto i) { return i; }; +// Helper function to verify that guard region at the end of output buffer hasn't been overwritten +template +void +verify_guard_region(Policy&& exec, Iterator guard_start, Size guard_size, T sentinel_value, + const std::string& input_descr, const char* operation_type) +{ + if (guard_size > 0) + { + std::vector guard_check(guard_size); + oneapi::dpl::copy(std::forward(exec), guard_start, guard_start + guard_size, guard_check.begin()); + + for (Size i = 0; i < guard_size; ++i) + { + if (!TestUtils::is_equal_val(guard_check[i], sentinel_value)) + { + std::stringstream msg; + msg << "Buffer overflow detected in " << operation_type << " from " << input_descr + << " at guard position " << i << ": expected " << sentinel_value << ", got " << guard_check[i]; + TestUtils::issue_error_message(msg); + } + } +# if _ONEDPL_DEBUG_SYCL + std::cout << " guard region verified (" << guard_size << " elements)"; +# endif + } +} + template + typename T = typename std::iterator_traits::value_type> void wrap_recurse(Policy&& exec, InputIterator1 first, InputIterator1 last, InputIterator2 copy_from_first, OutputIterator copy_to_first, OriginalIterator1 orig_first, OriginalIterator2 orig_out_first, - ExpectedIterator expected_first, T trash, const std::string& input_descr) + ExpectedIterator expected_first, T trash, const std::string& input_descr, std::size_t guard_size = 0, + T sentinel_value = {}) { oneapi::dpl::counting_iterator counting(size_t{0}); @@ -77,7 +105,11 @@ wrap_recurse(Policy&& exec, InputIterator1 first, InputIterator1 last, InputIter if constexpr (__read) { + // Initialize working area with trash, guard region with sentinel oneapi::dpl::fill(CLONE_TEST_POLICY_IDX(exec, 0), orig_out_first, orig_out_first + n, trash); + oneapi::dpl::fill(CLONE_TEST_POLICY_IDX(exec, 0), orig_out_first + n, orig_out_first + n + guard_size, + sentinel_value); + if constexpr (__reset_read) { //Reset data if required @@ -92,6 +124,11 @@ wrap_recurse(Policy&& exec, InputIterator1 first, InputIterator1 last, InputIter std::string msg = std::string("wrong read effect from ") + input_descr; //verify result using original unwrapped output EXPECT_EQ_N(expect, orig_out_first, n, msg.c_str()); + + // Verify guard region + verify_guard_region(CLONE_TEST_POLICY_IDX(exec, 0), orig_out_first + n, guard_size, sentinel_value, input_descr, + "read"); + # if _ONEDPL_DEBUG_SYCL std::cout << " read pass,"; # endif // _ONEDPL_DEBUG_SYCL @@ -101,8 +138,10 @@ wrap_recurse(Policy&& exec, InputIterator1 first, InputIterator1 last, InputIter //Reset data if constexpr (__check_write) { - //only reset output data if we intend to check it afterward + // Initialize working area with trash, guard region with sentinel oneapi::dpl::fill(CLONE_TEST_POLICY_IDX(exec, 3), orig_first, orig_first + n, trash); + oneapi::dpl::fill(CLONE_TEST_POLICY_IDX(exec, 3), orig_first + n, orig_first + n + guard_size, + sentinel_value); } oneapi::dpl::copy(CLONE_TEST_POLICY_IDX(exec, 4), copy_from_first, copy_from_first + n, first); @@ -118,6 +157,11 @@ wrap_recurse(Policy&& exec, InputIterator1 first, InputIterator1 last, InputIter std::string msg = std::string("wrong write effect from ") + input_descr; //verify copied back data EXPECT_EQ_N(expect, copy_back.begin(), n, msg.c_str()); + + // Verify guard region + verify_guard_region(CLONE_TEST_POLICY_IDX(exec, 3), orig_first + n, guard_size, sentinel_value, input_descr, + "write"); + # if _ONEDPL_DEBUG_SYCL std::cout << " write pass"; # endif // _ONEDPL_DEBUG_SYCL @@ -155,9 +199,9 @@ wrap_recurse(Policy&& exec, InputIterator1 first, InputIterator1 last, InputIter std::string new_input_descr = std::string("std::reverse(") + input_descr + std::string(")"); //TODO: Look at device copyability of std::reverse_iterator and re-enable recurse wrap_recurse<0, __reverses + 1, __read, __reset_read, __write, __check_write, __usable_as_perm_map, - __usable_as_perm_src, __is_reversible>(CLONE_TEST_POLICY_IDX(exec, 6), reversed_first, reversed_last, copy_from_first, - copy_to_first, orig_first, orig_out_first, - expected_first, trash, new_input_descr); + __usable_as_perm_src, __is_reversible>( + CLONE_TEST_POLICY_IDX(exec, 6), reversed_first, reversed_last, copy_from_first, copy_to_first, + orig_first, orig_out_first, expected_first, trash, new_input_descr, guard_size, sentinel_value); } { //transform_iterator(it,noop) @@ -165,8 +209,8 @@ wrap_recurse(Policy&& exec, InputIterator1 first, InputIterator1 last, InputIter std::string new_input_descr = std::string("transform_iterator(") + input_descr + std::string(", noop)"); wrap_recurse<__recurse - 1, __reverses, __read, __reset_read, /*__write=*/false, __check_write, __usable_as_perm_map, __usable_as_perm_src, __is_reversible>( - CLONE_TEST_POLICY_IDX(exec, 7), trans, trans + n, discard, copy_to_first, orig_first, orig_out_first, expected_first, trash, - new_input_descr); + CLONE_TEST_POLICY_IDX(exec, 7), trans, trans + n, discard, copy_to_first, orig_first, orig_out_first, + expected_first, trash, new_input_descr, guard_size, sentinel_value); } if constexpr (__usable_as_perm_src) @@ -174,9 +218,9 @@ wrap_recurse(Policy&& exec, InputIterator1 first, InputIterator1 last, InputIter std::string new_input_descr = std::string("permutation_iterator(") + input_descr + std::string(", noop)"); auto perm = oneapi::dpl::make_permutation_iterator(first, noop); wrap_recurse<__recurse - 1, __reverses, __read, __reset_read, __write, __check_write, __usable_as_perm_map, - __usable_as_perm_src, __is_reversible>(CLONE_TEST_POLICY_IDX(exec, 8), perm, perm + n, copy_from_first, copy_to_first, - orig_first, orig_out_first, expected_first, trash, - new_input_descr); + __usable_as_perm_src, __is_reversible>( + CLONE_TEST_POLICY_IDX(exec, 8), perm, perm + n, copy_from_first, copy_to_first, orig_first, + orig_out_first, expected_first, trash, new_input_descr, guard_size, sentinel_value); } if constexpr (__usable_as_perm_src) @@ -185,9 +229,9 @@ wrap_recurse(Policy&& exec, InputIterator1 first, InputIterator1 last, InputIter std::string("permutation_iterator(") + input_descr + std::string(", counting_iterator)"); auto perm = oneapi::dpl::make_permutation_iterator(first, counting); wrap_recurse<__recurse - 1, __reverses, __read, __reset_read, __write, __check_write, __usable_as_perm_map, - __usable_as_perm_src, __is_reversible>(CLONE_TEST_POLICY_IDX(exec, 9), perm, perm + n, copy_from_first, copy_to_first, - orig_first, orig_out_first, expected_first, trash, - new_input_descr); + __usable_as_perm_src, __is_reversible>( + CLONE_TEST_POLICY_IDX(exec, 9), perm, perm + n, copy_from_first, copy_to_first, orig_first, + orig_out_first, expected_first, trash, new_input_descr, guard_size, sentinel_value); } if constexpr (__usable_as_perm_map) @@ -197,8 +241,8 @@ wrap_recurse(Policy&& exec, InputIterator1 first, InputIterator1 last, InputIter auto perm = oneapi::dpl::make_permutation_iterator(counting, first); wrap_recurse<__recurse - 1, __reverses, __read, __reset_read, /*__write=*/false, __check_write, __usable_as_perm_map, __usable_as_perm_src, __is_reversible>( - CLONE_TEST_POLICY_IDX(exec, 10), perm, perm + n, discard, copy_to_first, orig_first, orig_out_first, expected_first, trash, - new_input_descr); + CLONE_TEST_POLICY_IDX(exec, 10), perm, perm + n, discard, copy_to_first, orig_first, orig_out_first, + expected_first, trash, new_input_descr, guard_size, sentinel_value); } { //zip_iterator(counting_iterator,it) @@ -208,8 +252,8 @@ wrap_recurse(Policy&& exec, InputIterator1 first, InputIterator1 last, InputIter auto zip_out = oneapi::dpl::make_zip_iterator(discard, copy_to_first); wrap_recurse<__recurse - 1, __reverses, __read, __reset_read, /*__write=*/false, __check_write, /*__usable_as_perm_map=*/false, __usable_as_perm_src, __is_reversible>( - CLONE_TEST_POLICY_IDX(exec, 11), zip, zip + n, discard, zip_out, orig_first, orig_out_first, expected_first, trash, - new_input_descr); + CLONE_TEST_POLICY_IDX(exec, 11), zip, zip + n, discard, zip_out, orig_first, orig_out_first, + expected_first, trash, new_input_descr, guard_size, sentinel_value); } { //zip_iterator(it, discard_iterator) @@ -219,8 +263,8 @@ wrap_recurse(Policy&& exec, InputIterator1 first, InputIterator1 last, InputIter auto zip_in = oneapi::dpl::make_zip_iterator(copy_from_first, counting); wrap_recurse<__recurse - 1, __reverses, /*__read=*/false, false, __write, __check_write, /*__usable_as_perm_map=*/false, __usable_as_perm_src, __is_reversible>( - CLONE_TEST_POLICY_IDX(exec, 12), zip, zip + n, zip_in, discard, orig_first, orig_out_first, expected_first, trash, - new_input_descr); + CLONE_TEST_POLICY_IDX(exec, 12), zip, zip + n, zip_in, discard, orig_first, orig_out_first, + expected_first, trash, new_input_descr, guard_size, sentinel_value); } } } diff --git a/test/parallel_api/iterator/input_data_sweep_counting_iter.pass.cpp b/test/parallel_api/iterator/input_data_sweep_counting_iter.pass.cpp index a32522bf590..09f36a742e9 100644 --- a/test/parallel_api/iterator/input_data_sweep_counting_iter.pass.cpp +++ b/test/parallel_api/iterator/input_data_sweep_counting_iter.pass.cpp @@ -35,16 +35,20 @@ call_wrap_recurse(Policy&& exec, T trash, size_t n, const std::string& type_text { if (TestUtils::has_types_support(exec.queue().get_device())) { + constexpr size_t guard_size = 5; + const size_t total_size = n + guard_size; + const T sentinel = static_cast(-999); // Distinct from trash - TestUtils::usm_data_transfer copy_out(exec, n); + TestUtils::usm_data_transfer copy_out(exec, total_size); oneapi::dpl::counting_iterator counting(0); oneapi::dpl::counting_iterator my_counting(0); //counting_iterator wrap_recurse<__recurse, 0, /*__read =*/true, /*__reset_read=*/false, /*__write=*/false, /*__check_write=*/false, /*__usable_as_perm_map=*/true, /*__usable_as_perm_src=*/true, - /*__is_reversible=*/true>(std::forward(exec), my_counting, my_counting + n, counting, copy_out.get_data(), - my_counting, copy_out.get_data(), counting, trash, - std::string("counting_iterator<") + type_text + std::string(">")); + /*__is_reversible=*/true>( + std::forward(exec), my_counting, my_counting + n, counting, copy_out.get_data(), my_counting, + copy_out.get_data(), counting, trash, std::string("counting_iterator<") + type_text + std::string(">"), + guard_size, sentinel); } else { diff --git a/test/parallel_api/iterator/input_data_sweep_host_iter.pass.cpp b/test/parallel_api/iterator/input_data_sweep_host_iter.pass.cpp index 7e55477360c..105d94b06ad 100644 --- a/test/parallel_api/iterator/input_data_sweep_host_iter.pass.cpp +++ b/test/parallel_api/iterator/input_data_sweep_host_iter.pass.cpp @@ -33,15 +33,20 @@ call_wrap_recurse(Policy&& exec, T trash, size_t n, const std::string& type_text { if (TestUtils::has_types_support(exec.queue().get_device())) { - TestUtils::usm_data_transfer copy_out(exec, n); + constexpr size_t guard_size = 5; + const size_t total_size = n + guard_size; + const T sentinel = static_cast(-999); // Distinct from trash + + TestUtils::usm_data_transfer copy_out(exec, total_size); auto copy_from = oneapi::dpl::counting_iterator(0); // host iterator - std::vector host_iter(n); + std::vector host_iter(total_size); wrap_recurse<__recurse, 0, /*__read =*/true, /*__reset_read=*/true, /*__write=*/true, /*__check_write=*/true, /*__usable_as_perm_map=*/true, /*__usable_as_perm_src=*/false, - /*__is_reversible=*/true>(std::forward(exec), host_iter.begin(), host_iter.end(), copy_from, - copy_out.get_data(), host_iter.begin(), copy_out.get_data(), copy_from, - trash, std::string("host_iterator<") + type_text + std::string(">")); + /*__is_reversible=*/true>( + std::forward(exec), host_iter.begin(), host_iter.begin() + n, copy_from, copy_out.get_data(), + host_iter.begin(), copy_out.get_data(), copy_from, trash, + std::string("host_iterator<") + type_text + std::string(">"), guard_size, sentinel); } else { diff --git a/test/parallel_api/iterator/input_data_sweep_sycl_iter.pass.cpp b/test/parallel_api/iterator/input_data_sweep_sycl_iter.pass.cpp index a10a0f34f9b..c922de8b950 100644 --- a/test/parallel_api/iterator/input_data_sweep_sycl_iter.pass.cpp +++ b/test/parallel_api/iterator/input_data_sweep_sycl_iter.pass.cpp @@ -33,17 +33,21 @@ call_wrap_recurse(Policy&& exec, T trash, size_t n, const std::string& type_text { if (TestUtils::has_types_support(exec.queue().get_device())) { - TestUtils::usm_data_transfer copy_out(exec, n); + constexpr size_t guard_size = 5; + const size_t total_size = n + guard_size; + const T sentinel = static_cast(-999); // Distinct from trash + + TestUtils::usm_data_transfer copy_out(exec, total_size); oneapi::dpl::counting_iterator counting(0); // sycl iterator - sycl::buffer buf(n); + sycl::buffer buf(total_size); //test all modes / wrappers wrap_recurse<__recurse, 0, /*__read =*/true, /*__reset_read=*/true, /*__write=*/true, /*__check_write=*/true, /*__usable_as_perm_map=*/true, /*__usable_as_perm_src=*/true, - /*__is_reversible=*/false>(std::forward(exec), oneapi::dpl::begin(buf), oneapi::dpl::end(buf), counting, - copy_out.get_data(), oneapi::dpl::begin(buf), copy_out.get_data(), - counting, trash, - std::string("sycl_iterator<") + type_text + std::string(">")); + /*__is_reversible=*/false>( + std::forward(exec), oneapi::dpl::begin(buf), oneapi::dpl::begin(buf) + n, counting, + copy_out.get_data(), oneapi::dpl::begin(buf), copy_out.get_data(), counting, trash, + std::string("sycl_iterator<") + type_text + std::string(">"), guard_size, sentinel); } else { diff --git a/test/parallel_api/iterator/input_data_sweep_usm_allocator.pass.cpp b/test/parallel_api/iterator/input_data_sweep_usm_allocator.pass.cpp index 8e81057225c..41ddeed3bc0 100644 --- a/test/parallel_api/iterator/input_data_sweep_usm_allocator.pass.cpp +++ b/test/parallel_api/iterator/input_data_sweep_usm_allocator.pass.cpp @@ -33,12 +33,16 @@ test_usm_shared_alloc(Policy&& exec, T trash, size_t n, const std::string& type_ { if (TestUtils::has_types_support(exec.queue().get_device())) { + constexpr size_t guard_size = 5; + const size_t total_size = n + guard_size; + const T sentinel = static_cast(-999); // Distinct from trash + //std::vector using usm shared allocator - TestUtils::usm_data_transfer copy_out(exec, n); + TestUtils::usm_data_transfer copy_out(exec, total_size); oneapi::dpl::counting_iterator counting(0); // usm_shared allocator std::vector sycl::usm_allocator q_alloc{exec}; - std::vector shared_data_vec(n, q_alloc); + std::vector shared_data_vec(total_size, q_alloc); //test all modes / wrappers //Only test as source iterator for permutation iterator if we can expect it to work @@ -48,9 +52,10 @@ test_usm_shared_alloc(Policy&& exec, T trash, size_t n, const std::string& type_ /*__check_write=*/true, /*__usable_as_perm_map=*/true, /*__usable_as_perm_src=*/ TestUtils::__vector_impl_distinguishes_usm_allocator_from_default_v, - /*__is_reversible=*/true>(std::forward(exec), shared_data_vec.begin(), shared_data_vec.end(), counting, - copy_out.get_data(), shared_data_vec.begin(), copy_out.get_data(), counting, - trash, std::string("usm_shared_alloc_vector<") + type_text + std::string(">")); + /*__is_reversible=*/true>( + std::forward(exec), shared_data_vec.begin(), shared_data_vec.begin() + n, counting, + copy_out.get_data(), shared_data_vec.begin(), copy_out.get_data(), counting, trash, + std::string("usm_shared_alloc_vector<") + type_text + std::string(">"), guard_size, sentinel); } else { @@ -64,12 +69,16 @@ test_usm_host_alloc(Policy&& exec, T trash, size_t n, const std::string& type_te { if (TestUtils::has_types_support(exec.queue().get_device())) { + constexpr size_t guard_size = 5; + const size_t total_size = n + guard_size; + const T sentinel = static_cast(-999); // Distinct from trash + //std::vector using usm host allocator - TestUtils::usm_data_transfer copy_out(exec, n); + TestUtils::usm_data_transfer copy_out(exec, total_size); oneapi::dpl::counting_iterator counting(0); // usm_host allocator std::vector sycl::usm_allocator q_alloc{exec}; - std::vector host_data_vec(n, q_alloc); + std::vector host_data_vec(total_size, q_alloc); //test all modes / wrappers //Only test as source iterator for permutation iterator if we can expect it to work @@ -78,9 +87,10 @@ test_usm_host_alloc(Policy&& exec, T trash, size_t n, const std::string& type_te __recurse, 0, /*__read =*/true, /*__reset_read=*/true, /*__write=*/true, /*__check_write=*/true, /*__usable_as_perm_map=*/true, /*__usable_as_perm_src=*/ TestUtils::__vector_impl_distinguishes_usm_allocator_from_default_v, - /*__is_reversible=*/true>(std::forward(exec), host_data_vec.begin(), host_data_vec.end(), counting, copy_out.get_data(), - host_data_vec.begin(), copy_out.get_data(), counting, trash, - std::string("usm_host_alloc_vector<") + type_text + std::string(">")); + /*__is_reversible=*/true>( + std::forward(exec), host_data_vec.begin(), host_data_vec.begin() + n, counting, copy_out.get_data(), + host_data_vec.begin(), copy_out.get_data(), counting, trash, + std::string("usm_host_alloc_vector<") + type_text + std::string(">"), guard_size, sentinel); } else { diff --git a/test/parallel_api/iterator/input_data_sweep_usm_device.pass.cpp b/test/parallel_api/iterator/input_data_sweep_usm_device.pass.cpp index b8927cc4ad3..b0f2c9bbb4c 100644 --- a/test/parallel_api/iterator/input_data_sweep_usm_device.pass.cpp +++ b/test/parallel_api/iterator/input_data_sweep_usm_device.pass.cpp @@ -33,15 +33,19 @@ call_wrap_recurse(Policy&& exec, T trash, size_t n, const std::string& type_text { if (TestUtils::has_types_support(exec.queue().get_device())) { - TestUtils::usm_data_transfer copy_out(exec, n); + constexpr size_t guard_size = 5; + const size_t total_size = n + guard_size; + const T sentinel = static_cast(-999); // Distinct from trash + + TestUtils::usm_data_transfer copy_out(exec, total_size); oneapi::dpl::counting_iterator counting(0); // usm_device - TestUtils::usm_data_transfer device_data(exec, n); + TestUtils::usm_data_transfer device_data(exec, total_size); auto usm_device = device_data.get_data(); //test all modes / wrappers - wrap_recurse<__recurse, 0>(std::forward(exec), usm_device, usm_device + n, counting, copy_out.get_data(), usm_device, - copy_out.get_data(), counting, trash, - std::string("usm_device<") + type_text + std::string(">")); + wrap_recurse<__recurse, 0>(std::forward(exec), usm_device, usm_device + n, counting, + copy_out.get_data(), usm_device, copy_out.get_data(), counting, trash, + std::string("usm_device<") + type_text + std::string(">"), guard_size, sentinel); } else { diff --git a/test/parallel_api/iterator/input_data_sweep_usm_shared.pass.cpp b/test/parallel_api/iterator/input_data_sweep_usm_shared.pass.cpp index 13a6aa2cfd4..6b458a8a715 100644 --- a/test/parallel_api/iterator/input_data_sweep_usm_shared.pass.cpp +++ b/test/parallel_api/iterator/input_data_sweep_usm_shared.pass.cpp @@ -33,17 +33,20 @@ call_wrap_recurse(Policy&& exec, T trash, size_t n, const std::string& type_text { if (TestUtils::has_types_support(exec.queue().get_device())) { + constexpr size_t guard_size = 5; + const size_t total_size = n + guard_size; + const T sentinel = static_cast(-999); // Distinct from trash { //usm shared ptr - TestUtils::usm_data_transfer copy_out(exec, n); + TestUtils::usm_data_transfer copy_out(exec, total_size); oneapi::dpl::counting_iterator counting(0); // usm_shared - TestUtils::usm_data_transfer shared_data(exec, n); + TestUtils::usm_data_transfer shared_data(exec, total_size); auto usm_shared = shared_data.get_data(); //test all modes / wrappers wrap_recurse<__recurse, 0>(std::forward(exec), usm_shared, usm_shared + n, counting, copy_out.get_data(), usm_shared, copy_out.get_data(), counting, trash, - std::string("usm_shared<") + type_text + std::string(">")); + std::string("usm_shared<") + type_text + std::string(">"), guard_size, sentinel); } } else @@ -67,16 +70,21 @@ test_impl(Policy&& exec) call_wrap_recurse(CLONE_TEST_POLICY_IDX(exec, 3), -666, n, "int32_t"); // special case: recurse once on perm(perm(usm_shared,count), count) + constexpr size_t guard_size = 5; + const size_t total_size = n + guard_size; + const int sentinel = static_cast(-999); + oneapi::dpl::counting_iterator counting(0); - TestUtils::usm_data_transfer copy_out(exec, n); - TestUtils::usm_data_transfer input(exec, n); + TestUtils::usm_data_transfer copy_out(exec, total_size); + TestUtils::usm_data_transfer input(exec, total_size); auto perm1 = oneapi::dpl::make_permutation_iterator(input.get_data(), counting); auto perm2 = oneapi::dpl::make_permutation_iterator(perm1, counting); wrap_recurse<1, 0, /*__read =*/false, /*__reset_read=*/false, /*__write=*/true, /*__check_write=*/false, /*__usable_as_perm_map=*/true, /*__usable_as_perm_src=*/true, /*__is_reversible=*/true>( - CLONE_TEST_POLICY_IDX(exec, 4), perm2, perm2 + n, counting, copy_out.get_data(), perm2, copy_out.get_data(), counting, -666, - "permutation_iter(permutation_iterator(usm_shared,counting_iterator),counting_iterator)"); + CLONE_TEST_POLICY_IDX(exec, 4), perm2, perm2 + n, counting, copy_out.get_data(), perm2, copy_out.get_data(), + counting, -666, "permutation_iter(permutation_iterator(usm_shared,counting_iterator),counting_iterator)", + guard_size, sentinel); } #endif //TEST_DPCPP_BACKEND_PRESENT