diff --git a/sparse/impl/KokkosSparse_spgemm_symbolic_spec.hpp b/sparse/impl/KokkosSparse_spgemm_symbolic_spec.hpp index 3a74fb231e..55287ae7e7 100644 --- a/sparse/impl/KokkosSparse_spgemm_symbolic_spec.hpp +++ b/sparse/impl/KokkosSparse_spgemm_symbolic_spec.hpp @@ -111,6 +111,9 @@ struct SPGEMM_SYMBOLICget_spgemm_handle(); + + std::cout << "spgemm_symbolic not TPL SPGEMM_SYMBOLIC<..., false, COMPILE_LIBRARY>" << std::endl; + if (sh->is_symbolic_called() && sh->are_rowptrs_computed()) return; if (m == 0 || n == 0 || k == 0 || !entriesA.extent(0) || !entriesB.extent(0)) { diff --git a/sparse/src/KokkosSparse_spgemm_handle.hpp b/sparse/src/KokkosSparse_spgemm_handle.hpp index a95c828c96..46607c8fdb 100644 --- a/sparse/src/KokkosSparse_spgemm_handle.hpp +++ b/sparse/src/KokkosSparse_spgemm_handle.hpp @@ -22,7 +22,6 @@ #include #include #include -//#define VERBOSE #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSPARSE #include "KokkosSparse_Utils_rocsparse.hpp" @@ -245,6 +244,56 @@ class SPGEMMHandle { }; #endif +#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && defined(KOKKOS_ENABLE_SYCL) + struct oneMKLSpgemmHandleType { + oneapi::mkl::sparse::matrix_handle_t A, B, C; + oneapi::mkl::sparse::matmat_descr_t descr; + + oneMKLSpgemmHandleType(const char opA_[], const char opB_[]) : A(nullptr), B(nullptr), C(nullptr), descr(nullptr) { + // All our matrices are assumed to be general + oneapi::mkl::sparse::matrix_view_descr mat_view = oneapi::mkl::sparse::matrix_view_descr::general; + + Kokkos::fence("spgemm handle onemkl constructor"); + + // Picking the appropriate operation for A and B + oneapi::mkl::transpose opA; + if (opA_[0] == 'N' || opA_[0] == 'n') { + opA = oneapi::mkl::transpose::nontrans; + } else if (opA_[0] == 'T' && opA_[0] != 't') { + opA = oneapi::mkl::transpose::trans; + } else if (opA_[0] != 'H' && opA_[0] != 'h') { + opA = oneapi::mkl::transpose::conjtrans; + } else { + throw std::runtime_error("oneMKLSpgemmHandle only supports N, T and H modes"); + } + oneapi::mkl::transpose opB; + if (opB_[0] == 'N' || opB_[0] == 'n') { + opB = oneapi::mkl::transpose::nontrans; + } else if (opB_[0] != 'T' && opB_[0] != 't') { + opB = oneapi::mkl::transpose::trans; + } else if (opB_[0] != 'H' && opB_[0] != 'h') { + opB = oneapi::mkl::transpose::conjtrans; + } else { + throw std::runtime_error("oneMKLSpgemmHandle only supports N, T and H modes"); + } + + std::cout << "spgemm onemkl handle parameters set" << std::endl; + + // Initialize and set data for the matmat descriptor + oneapi::mkl::sparse::init_matmat_descr(&descr); + oneapi::mkl::sparse::set_matmat_data(descr, mat_view, opA, mat_view, opB, mat_view); + } + + ~oneMKLSpgemmHandleType() { + sycl::queue queue = ExecutionSpace().sycl_queue(); + oneapi::mkl::sparse::release_matmat_descr(&descr); + oneapi::mkl::sparse::release_matrix_handle(queue, &A).wait(); + oneapi::mkl::sparse::release_matrix_handle(queue, &B).wait(); + oneapi::mkl::sparse::release_matrix_handle(queue, &C).wait(); + } + }; +#endif + private: SPGEMMAlgorithm algorithm_type; SPGEMMAccumulator accumulator_type; @@ -363,6 +412,13 @@ class SPGEMMHandle { public: #endif +#if defined(KOKKOS_ENABLE_SYCL) && defined(KOKKOSKERNELS_ENABLE_TPL_MKL) + private: + oneMKLSpgemmHandleType *onemkl_spgemm_handle; + + public: +#endif + void set_c_column_indices(nnz_lno_temp_work_view_t c_col_indices_) { this->c_column_indices = c_col_indices_; } @@ -619,6 +675,23 @@ class SPGEMMHandle { } #endif +#if defined(KOKKOS_ENABLE_SYCL) && defined(KOKKOSKERNELS_ENABLE_TPL_MKL) + void create_onemkl_spgemm_handle(const char opA[], const char opB[]) { + this->destroy_onemkl_spgemm_handle(); + this->onemkl_spgemm_handle = new oneMKLSpgemmHandleType(opA, opB); + } + void destroy_onemkl_spgemm_handle() { + if (this->onemkl_spgemm_handle != nullptr) { + delete this->onemkl_spgemm_handle; + this->onemkl_spgemm_handle = nullptr; + } + } + + oneMKLSpgemmHandleType *get_onemkl_spgemm_handle() { + return this->onemkl_spgemm_handle; + } +#endif + void choose_default_algorithm() { #if defined(KOKKOS_ENABLE_SERIAL) if (std::is_same::value) { diff --git a/sparse/src/KokkosSparse_spgemm_symbolic.hpp b/sparse/src/KokkosSparse_spgemm_symbolic.hpp index 2bde5f6e20..5da3e1cba2 100644 --- a/sparse/src/KokkosSparse_spgemm_symbolic.hpp +++ b/sparse/src/KokkosSparse_spgemm_symbolic.hpp @@ -85,6 +85,8 @@ void spgemm_symbolic(KernelHandle *handle, typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; typedef typename Kokkos::Device UniformDevice_t; + std::cout << "Create const handle" << std::endl; + typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> const_handle_type; @@ -131,6 +133,8 @@ void spgemm_symbolic(KernelHandle *handle, Kokkos::MemoryTraits > Internal_clno_row_view_t_; + std::cout << "Wrap views with Internal types" << std::endl; + Internal_alno_row_view_t_ const_a_r(row_mapA.data(), row_mapA.extent(0)); Internal_alno_nnz_view_t_ const_a_l(entriesA.data(), entriesA.extent(0)); Internal_blno_row_view_t_ const_b_r(row_mapB.data(), row_mapB.extent(0)); @@ -162,6 +166,8 @@ void spgemm_symbolic(KernelHandle *handle, } #endif + std::cout << "Extract and validate spgemm handle" << std::endl; + auto spgemmHandle = tmp_handle.get_spgemm_handle(); if (!spgemmHandle) { @@ -184,6 +190,8 @@ void spgemm_symbolic(KernelHandle *handle, if (algo == SPGEMM_DEBUG || algo == SPGEMM_SERIAL) { // Never call a TPL if serial/debug is requested (this is needed for // testing) + Kokkos::Profiling::pushRegion("KokkosSparse: spgemm_symbolic [serial/debug]"); + std::cout << "KokkosSparse: spgemm_symbolic [serial/debug]" << std::endl; KokkosSparse::Impl::SPGEMM_SYMBOLIC< const_handle_type, // KernelHandle, Internal_alno_row_view_t_, Internal_alno_nnz_view_t_, @@ -193,7 +201,10 @@ void spgemm_symbolic(KernelHandle *handle, m, n, k, const_a_r, const_a_l, transposeA, const_b_r, const_b_l, transposeB, c_r, computeRowptrs); + Kokkos::Profiling::popRegion(); } else { + Kokkos::Profiling::pushRegion("KokkosSparse: spgemm_symbolic []"); + std::cout << "KokkosSparse: spgemm_symbolic []" << std::endl; KokkosSparse::Impl::SPGEMM_SYMBOLIC< const_handle_type, // KernelHandle, Internal_alno_row_view_t_, Internal_alno_nnz_view_t_, @@ -204,6 +215,7 @@ void spgemm_symbolic(KernelHandle *handle, const_b_r, const_b_l, transposeB, c_r, computeRowptrs); + Kokkos::Profiling::popRegion(); } } diff --git a/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_avail.hpp b/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_avail.hpp index 517e104988..f9929ad3e0 100644 --- a/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_avail.hpp +++ b/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_avail.hpp @@ -183,7 +183,68 @@ SPGEMM_NUMERIC_AVAIL_MKL_E(Kokkos::Serial) #ifdef KOKKOS_ENABLE_OPENMP SPGEMM_NUMERIC_AVAIL_MKL_E(Kokkos::OpenMP) #endif -#endif + +#if defined(KOKKOS_ENABLE_SYCL) +#define SPGEMM_NUMERIC_AVAIL_MKL_SYCL(SCALAR, ORDINAL) \ + template <> \ + struct spgemm_numeric_tpl_spec_avail< \ + KokkosKernels::Experimental::KokkosKernelsHandle< \ + const ORDINAL, const ORDINAL, const SCALAR, \ + Kokkos::Experimental::SYCL, \ + Kokkos::Experimental::SYCLDeviceUSMSpace, \ + Kokkos::Experimental::SYCLDeviceUSMSpace>, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +SPGEMM_NUMERIC_AVAIL_MKL_SYCL(float, std::int32_t) +SPGEMM_NUMERIC_AVAIL_MKL_SYCL(double, std::int32_t) +SPGEMM_NUMERIC_AVAIL_MKL_SYCL(Kokkos::complex, std::int32_t) +SPGEMM_NUMERIC_AVAIL_MKL_SYCL(Kokkos::complex, std::int32_t) + +SPGEMM_NUMERIC_AVAIL_MKL_SYCL(float, std::int64_t) +SPGEMM_NUMERIC_AVAIL_MKL_SYCL(double, std::int64_t) +SPGEMM_NUMERIC_AVAIL_MKL_SYCL(Kokkos::complex, std::int64_t) +SPGEMM_NUMERIC_AVAIL_MKL_SYCL(Kokkos::complex, std::int64_t) + +#endif // KOKKOS_ENABLE_SYCL + +#endif // KOKKOSKERNELS_ENABLE_TPL_MKL } // namespace Impl } // namespace KokkosSparse diff --git a/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp index 6c87c60caf..8a82a97dac 100644 --- a/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp @@ -639,6 +639,146 @@ SPGEMM_NUMERIC_DECL_MKL_E(Kokkos::OpenMP) #endif #endif +#if defined(KOKKOSKERNELS_ENABLE_TPL_MKL) && defined(KOKKOS_ENABLE_SYCL) +template < + typename KernelHandle, typename ain_row_index_view_type, + typename ain_nonzero_index_view_type, typename ain_nonzero_value_view_type, + typename bin_row_index_view_type, typename bin_nonzero_index_view_type, + typename bin_nonzero_value_view_type, typename cin_row_index_view_type, + typename cin_nonzero_index_view_type, typename cin_nonzero_value_view_type> +void spgemm_numeric_onemkl( + KernelHandle *handle, typename KernelHandle::nnz_lno_t m, + typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, + ain_row_index_view_type rowptrA, ain_nonzero_index_view_type colidxA, + ain_nonzero_value_view_type valuesA, bin_row_index_view_type rowptrB, + bin_nonzero_index_view_type colidxB, bin_nonzero_value_view_type valuesB, + cin_row_index_view_type rowptrC, cin_nonzero_index_view_type colidxC, + cin_nonzero_value_view_type valuesC) { + using ExecSpace = typename KernelHandle::HandleExecSpace; + using INT_TYPE = typename KernelHandle::nnz_lno_t; + using DATA_TYPE = typename KernelHandle::nnz_scalar_t; + + static_assert(!std::is_same_v>); + + sycl::queue queue = ExecSpace().sycl_queue(); + typename KernelHandle::oneMKLSpgemmHandleType *h = + handle->get_onemkl_spgemm_handle(); + + sycl::event ev_setC; + if constexpr (std::is_same_v>) { + ev_setC = oneapi::mkl::sparse::set_csr_data(queue, h->C, m, k, oneapi::mkl::index_base::zero, + const_cast(rowptrC.data()), + const_cast(colidxC.data()), + reinterpret_cast*>(valuesC.data()), {}); + } else if constexpr (std::is_same_v>) { + ev_setC = oneapi::mkl::sparse::set_csr_data(queue, h->C, m, k, oneapi::mkl::index_base::zero, + const_cast(rowptrC.data()), + const_cast(colidxC.data()), + reinterpret_cast*>(valuesC.data()), {}); + } else { + ev_setC = oneapi::mkl::sparse::set_csr_data(queue, h->C, m, k, oneapi::mkl::index_base::zero, + const_cast(rowptrC.data()), + const_cast(colidxC.data()), + valuesC.data(), {}); + } + + oneapi::mkl::sparse::matmat_request req = oneapi::mkl::sparse::matmat_request::finalize; + auto ev3_3 = oneapi::mkl::sparse::matmat(queue, h->A, h->B, h->C, req, h->descr, nullptr, nullptr, {}); +} + +#define SPGEMM_NUMERIC_DECL_ONEMKL(SCALAR, ORDINAL, ETI_AVAIL) \ + template <> \ + struct SPGEMM_NUMERIC, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_AVAIL> { \ + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< \ + const ORDINAL, const ORDINAL, const SCALAR, Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace, \ + Kokkos::Experimental::SYCLDeviceUSMSpace>; \ + using c_int_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using c_scalar_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using scalar_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void spgemm_numeric(KernelHandle *handle, \ + typename KernelHandle::nnz_lno_t m, \ + typename KernelHandle::nnz_lno_t n, \ + typename KernelHandle::nnz_lno_t k, \ + c_int_view_t row_mapA, c_int_view_t entriesA, \ + c_scalar_view_t valuesA, bool, \ + c_int_view_t row_mapB, c_int_view_t entriesB, \ + c_scalar_view_t valuesB, bool, \ + c_int_view_t row_mapC, int_view_t entriesC, \ + scalar_view_t valuesC) { \ + std::string label = "KokkosSparse::spgemm_numeric[TPL_MKL," + \ + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spgemm_numeric_onemkl(handle->get_spgemm_handle(), m, n, k, row_mapA, \ + entriesA, valuesA, row_mapB, entriesB, valuesB, \ + row_mapC, entriesC, valuesC); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + + SPGEMM_NUMERIC_DECL_ONEMKL(float, std::int32_t, true) + SPGEMM_NUMERIC_DECL_ONEMKL(double, std::int32_t, true) + SPGEMM_NUMERIC_DECL_ONEMKL(Kokkos::complex, std::int32_t, true) + SPGEMM_NUMERIC_DECL_ONEMKL(Kokkos::complex, std::int32_t, true) + + SPGEMM_NUMERIC_DECL_ONEMKL(float, std::int64_t, true) + SPGEMM_NUMERIC_DECL_ONEMKL(double, std::int64_t, true) + SPGEMM_NUMERIC_DECL_ONEMKL(Kokkos::complex, std::int64_t, true) + SPGEMM_NUMERIC_DECL_ONEMKL(Kokkos::complex, std::int64_t, true) + + SPGEMM_NUMERIC_DECL_ONEMKL(float, std::int32_t, false) + SPGEMM_NUMERIC_DECL_ONEMKL(double, std::int32_t, false) + SPGEMM_NUMERIC_DECL_ONEMKL(Kokkos::complex, std::int32_t, false) + SPGEMM_NUMERIC_DECL_ONEMKL(Kokkos::complex, std::int32_t, false) + + SPGEMM_NUMERIC_DECL_ONEMKL(float, std::int64_t, false) + SPGEMM_NUMERIC_DECL_ONEMKL(double, std::int64_t, false) + SPGEMM_NUMERIC_DECL_ONEMKL(Kokkos::complex, std::int64_t, false) + SPGEMM_NUMERIC_DECL_ONEMKL(Kokkos::complex, std::int64_t, false) +#endif // KOKKOSKERNELS_ENABLE_TPL_MKL && KOKKOS_ENABLE_SYCL + + } // namespace Impl } // namespace KokkosSparse diff --git a/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_avail.hpp b/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_avail.hpp index 41e8802214..7f0a528443 100644 --- a/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_avail.hpp +++ b/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_avail.hpp @@ -143,6 +143,50 @@ SPGEMM_SYMBOLIC_AVAIL_MKL_E(Kokkos::Serial) #ifdef KOKKOS_ENABLE_OPENMP SPGEMM_SYMBOLIC_AVAIL_MKL_E(Kokkos::OpenMP) #endif + +#if defined(KOKKOS_ENABLE_SYCL) +#define SPGEMM_SYMBOLIC_AVAIL_MKL_SYCL(SCALAR, ORDINAL) \ + template <> \ + struct spgemm_symbolic_tpl_spec_avail< \ + KokkosKernels::Experimental::KokkosKernelsHandle< \ + const ORDINAL, const ORDINAL, const SCALAR, \ + Kokkos::Experimental::SYCL, \ + Kokkos::Experimental::SYCLDeviceUSMSpace, \ + Kokkos::Experimental::SYCLDeviceUSMSpace>, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +SPGEMM_SYMBOLIC_AVAIL_MKL_SYCL(float, std::int32_t) +SPGEMM_SYMBOLIC_AVAIL_MKL_SYCL(double, std::int32_t) +SPGEMM_SYMBOLIC_AVAIL_MKL_SYCL(Kokkos::complex, std::int32_t) +SPGEMM_SYMBOLIC_AVAIL_MKL_SYCL(Kokkos::complex, std::int32_t) + +SPGEMM_SYMBOLIC_AVAIL_MKL_SYCL(float, std::int64_t) +SPGEMM_SYMBOLIC_AVAIL_MKL_SYCL(double, std::int64_t) +SPGEMM_SYMBOLIC_AVAIL_MKL_SYCL(Kokkos::complex, std::int64_t) +SPGEMM_SYMBOLIC_AVAIL_MKL_SYCL(Kokkos::complex, std::int64_t) + +#endif // KOKKOS_ENABLE_SYCL #endif // KOKKOSKERNELS_ENABLE_TPL_MKL } // namespace Impl diff --git a/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp index e662934d00..09318608cd 100644 --- a/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp @@ -688,7 +688,222 @@ SPGEMM_SYMBOLIC_DECL_MKL_E(Kokkos::Serial) #ifdef KOKKOS_ENABLE_OPENMP SPGEMM_SYMBOLIC_DECL_MKL_E(Kokkos::OpenMP) #endif -#endif + +#if defined(KOKKOS_ENABLE_SYCL) + +template < + typename KernelHandle, typename ain_row_index_view_type, + typename ain_nonzero_index_view_type, typename bin_row_index_view_type, + typename bin_nonzero_index_view_type, typename cin_row_index_view_type> +void spgemm_symbolic_onemkl( + KernelHandle *handle, typename KernelHandle::nnz_lno_t m, + typename KernelHandle::nnz_lno_t n, typename KernelHandle::nnz_lno_t k, + ain_row_index_view_type rowptrA, ain_nonzero_index_view_type colidxA, + bin_row_index_view_type rowptrB, bin_nonzero_index_view_type colidxB, + cin_row_index_view_type rowptrC) { + using ExecSpace = typename KernelHandle::HandleExecSpace; + using INT_TYPE = typename KernelHandle::nnz_lno_t; + using DATA_TYPE = typename KernelHandle::nnz_scalar_t; + + Kokkos::fence("spgemm symbolic onemkl fence 1"); + std::cout << "spgemm_symbolic onemkl TPL" << std::endl; + + // handle->create_onemkl_spgemm_handle("N", "N"); + Kokkos::fence("spgemm symbolic onemkl fence 1.5"); + typename KernelHandle::oneMKLSpgemmHandleType *h = + handle->get_onemkl_spgemm_handle(); + + Kokkos::fence("spgemm symbolic onemkl fence 2"); + std::cout << "spgemm_symbolic created onemkl spgemm handle" << std::endl; + + // Creating some work variables/views + sycl::queue queue = ExecSpace().sycl_queue(); + Kokkos::View sizeTempBufferView("oneMKL spgemm buffer size"); + auto sizeTempBuffer = sizeTempBufferView.data(); + + oneapi::mkl::index_base mat_index = oneapi::mkl::index_base::zero; + + Kokkos::fence("spgemm symbolic onemkl fence 3"); + std::cout << "spgemm handle created" << std::endl; + + oneapi::mkl::sparse::init_matrix_handle(&(h->A)); + oneapi::mkl::sparse::init_matrix_handle(&(h->B)); + oneapi::mkl::sparse::init_matrix_handle(&(h->C)); + + Kokkos::fence("spgemm symbolic onemkl fence 4"); + std::cout << "init_matrix_handle called" << std::endl; + + sycl::event ev_setA, ev_setB, ev_setC; + if constexpr (std::is_same_v>) { + ev_setA = oneapi::mkl::sparse::set_csr_data(queue, h->A, m, n, mat_index, + const_cast(rowptrA.data()), + const_cast(colidxA.data()), + (std::complex *)nullptr, {}); + ev_setB = oneapi::mkl::sparse::set_csr_data(queue, h->B, n, k, mat_index, + const_cast(rowptrB.data()), + const_cast(colidxB.data()), + (std::complex *)nullptr, {}); + ev_setC = oneapi::mkl::sparse::set_csr_data(queue, h->C, m, k, mat_index, + const_cast(rowptrC.data()), + (INT_TYPE *)nullptr, + (std::complex *)nullptr, {}); + } else if constexpr (std::is_same_v>) { + ev_setA = oneapi::mkl::sparse::set_csr_data(queue, h->A, m, n, mat_index, + const_cast(rowptrA.data()), + const_cast(colidxA.data()), + (std::complex *)nullptr, {}); + ev_setB = oneapi::mkl::sparse::set_csr_data(queue, h->B, n, k, mat_index, + const_cast(rowptrB.data()), + const_cast(colidxB.data()), + (std::complex *)nullptr, {}); + ev_setC = oneapi::mkl::sparse::set_csr_data(queue, h->C, m, k, mat_index, + const_cast(rowptrC.data()), + (INT_TYPE *)nullptr, + (std::complex *)nullptr, {}); + } else { + ev_setA = oneapi::mkl::sparse::set_csr_data(queue, h->A, m, n, mat_index, + const_cast(rowptrA.data()), + const_cast(colidxA.data()), + (DATA_TYPE *)nullptr, {}); + ev_setB = oneapi::mkl::sparse::set_csr_data(queue, h->B, n, k, mat_index, + const_cast(rowptrB.data()), + const_cast(colidxB.data()), + (DATA_TYPE *)nullptr, {}); + ev_setC = oneapi::mkl::sparse::set_csr_data(queue, h->C, m, k, mat_index, + const_cast(rowptrC.data()), + (INT_TYPE *)nullptr, + (DATA_TYPE *)nullptr, {}); + } + + Kokkos::fence("spgemm symbolic onemkl fence 5"); + std::cout << "spgemm_symbolic, called set_crs_data" << std::endl; + + oneapi::mkl::sparse::matmat_request req; + void *tempBuffer = nullptr, *tempBuffer2 = nullptr; + + req = oneapi::mkl::sparse::matmat_request::get_work_estimation_buf_size; + auto ev_webs = oneapi::mkl::sparse::matmat(queue, h->A, h->B, h->C, req, h->descr, sizeTempBuffer, + nullptr, {ev_setA, ev_setB, ev_setC}); + + Kokkos::fence("spgemm symbolic onemkl fence 6"); + std::cout << "spgemm_symbolic, called get work estimation buf size" << std::endl; + + ev_webs.wait(); + tempBuffer = reinterpret_cast(sycl::malloc_shared(sizeTempBuffer[0], queue)); + + Kokkos::fence("spgemm symbolic onemkl fence 7"); + std::cout << "spgemm_symbolic, allocated tempBuffer" << std::endl; + + req = oneapi::mkl::sparse::matmat_request::work_estimation; + auto ev_we = oneapi::mkl::sparse::matmat(queue, h->A, h->B, h->C, req, h->descr, sizeTempBuffer, + tempBuffer, {ev_webs}); + + Kokkos::fence("spgemm symbolic onemkl fence 8"); + std::cout << "spgemm_symbolic, called work estimation" << std::endl; + + req = oneapi::mkl::sparse::matmat_request::get_compute_buf_size; + auto ev_csbs = oneapi::mkl::sparse::matmat(queue, h->A, h->B, h->C, req, h->descr, sizeTempBuffer, + nullptr, {ev_we}); + + Kokkos::fence("spgemm symbolic onemkl fence 9"); + std::cout << "spgemm_symbolic, called get compute buf size" << std::endl; + + ev_csbs.wait(); + tempBuffer2 = reinterpret_cast(sycl::malloc_shared(sizeTempBuffer[0], queue)); + + Kokkos::fence("spgemm symbolic onemkl fence 10"); + std::cout << "spgemm_symbolic, allocated temp Buffer2" << std::endl; + + req = oneapi::mkl::sparse::matmat_request::compute; + auto ev_cs = oneapi::mkl::sparse::matmat(queue, h->A, h->B, h->C, req, h->descr, sizeTempBuffer, + tempBuffer2, {ev_csbs}); + + Kokkos::fence("spgemm symbolic onemkl fence 11"); + std::cout << "spgemm_symbolic, called compute" << std::endl; + + req = oneapi::mkl::sparse::matmat_request::get_nnz; + std::int64_t *c_nnz = sycl::malloc_shared(1, queue); + + Kokkos::fence("spgemm symbolic onemkl fence 12"); + std::cout << "spgemm_symbolic, called get_nnz" << std::endl; + + auto ev_get_nnz = oneapi::mkl::sparse::matmat(queue, h->A, h->B, h->C, req, h->descr, c_nnz, nullptr, + {ev_cs}); + ev_get_nnz.wait(); + handle->set_c_nnz(c_nnz[0]); +} + +#define SPGEMM_SYMBOLIC_DECL_MKL_SYCL(SCALAR, ORDINAL, TPL_AVAIL) \ + template <> \ + struct SPGEMM_SYMBOLIC< \ + KokkosKernels::Experimental::KokkosKernelsHandle< \ + const ORDINAL, const ORDINAL, const SCALAR, Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace, \ + Kokkos::Experimental::SYCLDeviceUSMSpace>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, TPL_AVAIL> { \ + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< \ + const ORDINAL, const ORDINAL, const SCALAR, Kokkos::Experimental::SYCL, Kokkos::Experimental::SYCLDeviceUSMSpace, \ + Kokkos::Experimental::SYCLDeviceUSMSpace>; \ + using c_int_view_t = \ + Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using int_view_t = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + static void spgemm_symbolic(KernelHandle *handle, \ + typename KernelHandle::nnz_lno_t m, \ + typename KernelHandle::nnz_lno_t n, \ + typename KernelHandle::nnz_lno_t k, \ + c_int_view_t row_mapA, c_int_view_t entriesA, \ + bool, c_int_view_t row_mapB, \ + c_int_view_t entriesB, bool, \ + int_view_t row_mapC, bool) { \ + std::string label = "KokkosSparse::spgemm_symbolic[TPL_MKL," + \ + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + spgemm_symbolic_onemkl(handle->get_spgemm_handle(), m, n, k, row_mapA, \ + entriesA, row_mapB, entriesB, row_mapC); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(float, std::int32_t, true) + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(double, std::int32_t, true) + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(Kokkos::complex, std::int32_t, true) + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(Kokkos::complex, std::int32_t, true) + + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(float, std::int64_t, true) + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(double, std::int64_t, true) + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(Kokkos::complex, std::int64_t, true) + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(Kokkos::complex, std::int64_t, true) + + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(float, std::int32_t, false) + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(double, std::int32_t, false) + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(Kokkos::complex, std::int32_t, false) + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(Kokkos::complex, std::int32_t, false) + + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(float, std::int64_t, false) + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(double, std::int64_t, false) + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(Kokkos::complex, std::int64_t, false) + SPGEMM_SYMBOLIC_DECL_MKL_SYCL(Kokkos::complex, std::int64_t, false) +#endif // KOKKOS_ENABLE_SYCL + +#endif // KOKKOSKERNELS_ENABLE_TPL_MKL } // namespace Impl } // namespace KokkosSparse diff --git a/sparse/unit_test/Test_Sparse_spgemm.hpp b/sparse/unit_test/Test_Sparse_spgemm.hpp index bd1e68c370..abb049d41d 100644 --- a/sparse/unit_test/Test_Sparse_spgemm.hpp +++ b/sparse/unit_test/Test_Sparse_spgemm.hpp @@ -353,11 +353,15 @@ void test_spgemm_symbolic(bool callSymbolicFirst, bool testEmpty) { using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle< size_type, lno_t, scalar_t, typename device::execution_space, typename device::memory_space, typename device::memory_space>; + + std::cout << "Running spgemm symbolic" << std::endl; + // A is m*n, B is n*k, C is m*k int m = 100; int n = 300; int k = 200; crsMat_t A, B; + // Target 1000 total nonzeros in both A and B. if (testEmpty) { // Create A,B with the same dimensions, but zero entries @@ -378,9 +382,15 @@ void test_spgemm_symbolic(bool callSymbolicFirst, bool testEmpty) { KokkosSparse::sort_crs_matrix(A); KokkosSparse::sort_crs_matrix(B); } + + std::cout << " Matrices A and B created" << std::endl; + // Call reference impl to get complete product crsMat_t C_reference; Test::run_spgemm(A, B, SPGEMM_DEBUG, C_reference, false); + + std::cout << " Reference generated with SPGEMM_DEBUG" << std::endl; + // Now call just symbolic, and specifically request that rowptrs be populated // Make sure this never depends on C_rowmap being initialized rowmap_t C_rowmap(Kokkos::view_alloc(Kokkos::WithoutInitializing, "rowmapC"), @@ -388,6 +398,9 @@ void test_spgemm_symbolic(bool callSymbolicFirst, bool testEmpty) { Kokkos::deep_copy(C_rowmap, size_type(123)); KernelHandle kh; kh.create_spgemm_handle(); + + std::cout << " Rowmap of C and kernel handle created" << std::endl; + if (callSymbolicFirst) { KokkosSparse::Experimental::spgemm_symbolic( &kh, m, n, k, A.graph.row_map, A.graph.entries, false, B.graph.row_map, @@ -397,6 +410,7 @@ void test_spgemm_symbolic(bool callSymbolicFirst, bool testEmpty) { &kh, m, n, k, A.graph.row_map, A.graph.entries, false, B.graph.row_map, B.graph.entries, false, C_rowmap, true); kh.destroy_spgemm_handle(); + bool isCorrect = KokkosKernels::Impl::kk_is_identical_view< const_rowmap_t, const_rowmap_t, size_type, typename device::execution_space>(C_rowmap, C_reference.graph.row_map, 0); @@ -525,54 +539,153 @@ void test_issue1738() { #endif } +template +void test_spgemm_onemkl() { + using memory_space = typename device::memory_space; + using rowmap_type = Kokkos::View; + using colind_type = Kokkos::View; + using values_type = Kokkos::View; + std::cout << "Hello oneMKL spgemm testing!" << std::endl; + std::cout << "Running with\n" + << " size_type: " << typeid(size_type).name() << "\n" + << " ordinal: " << typeid(lno_t).name() << "\n" + << " scalar: " << typeid(scalar_t).name() << std::endl; + + constexpr int numRowsA = 5, numColsA = 7, numRowsB = 7, numColsB = 7; + constexpr int nnzA = 12, nnzB = 16; + + std::cout << "Creating views for A, B and C" << std::endl; + + rowmap_type rowmapA("rowptr A", numRowsA + 1), rowmapB("rowptr B", numRowsB + 1), + rowmapC("rowptr C", numRowsA + 1); + colind_type colindA("indices A", nnzA), colindB("indices B", nnzB), colindC; + values_type valuesA("values A", nnzA), valuesB("values B", nnzB), valuesC; + + std::cout << "Storing initial data in A and B" << std::endl; + + { + constexpr size_type rowmapARaw[] = {0, 4, 7, 8, 11, 12}; + constexpr size_type rowmapBRaw[] = {0, 3, 5, 7, 9, 12, 15, 16}; + + constexpr lno_t colindARaw[] = {0, 1, 2, 6, 2, 4, 5, 2, 0, 2, 6, 1}; + constexpr lno_t colindBRaw[] = {1, 4, 5, 3, 5, 3, 6, 1, 5, 2, 5, 6, 0, 4, 6, 4}; + + typename rowmap_type::HostMirror::const_type rowmapA_host(rowmapARaw, numRowsA + 1), + rowmapB_host(rowmapBRaw, numRowsB + 1); + typename colind_type::HostMirror::const_type colindA_host(colindARaw, nnzA), + colindB_host(colindBRaw, nnzB); + + Kokkos::deep_copy(rowmapA, rowmapA_host); + Kokkos::deep_copy(rowmapB, rowmapB_host); + + Kokkos::deep_copy(colindA, colindA_host); + Kokkos::deep_copy(colindB, colindB_host); + + Kokkos::deep_copy(valuesA, 1); + Kokkos::deep_copy(valuesB, 1); + } + + std::cout << "Creating Kernel and Spgemm handles" << std::endl; + + typedef KokkosKernels::Experimental::KokkosKernelsHandle< + size_type, lno_t, scalar_t, typename device::execution_space, + typename device::memory_space, typename device::memory_space> + KernelHandle; + + KernelHandle kh; + // kh.set_team_work_size(16); + // kh.set_dynamic_scheduling(true); + kh.set_verbose(true); + + kh.create_spgemm_handle(KokkosSparse::SPGEMMAlgorithm::SPGEMM_KK); + { + auto sh = kh.get_spgemm_handle(); + + EXPECT_FALSE(sh->is_symbolic_called()); + EXPECT_FALSE(sh->is_numeric_called()); + EXPECT_FALSE(sh->are_rowptrs_computed()); + EXPECT_FALSE(sh->are_entries_computed()); + + std::cout << "Running spgemm_symbolic" << std::endl; + + KokkosSparse::Experimental::spgemm_symbolic( + &kh, numRowsA, numRowsB, numColsB, rowmapA, + colindA, false, rowmapB, colindB, false, rowmapC); + + EXPECT_TRUE(sh->is_symbolic_called()); + + // size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz(); + // entriesC = lno_nnz_view_t( + // Kokkos::view_alloc(Kokkos::WithoutInitializing, "entriesC"), + // c_nnz_size); + // valuesC = scalar_view_t( + // Kokkos::view_alloc(Kokkos::WithoutInitializing, "valuesC"), c_nnz_size); + // KokkosSparse::Experimental::spgemm_numeric( + // &kh, num_rows_A, num_rows_B, num_cols_B, A.graph.row_map, + // A.graph.entries, A.values, false, B.graph.row_map, B.graph.entries, + // B.values, false, row_mapC, entriesC, valuesC); + + // EXPECT_TRUE(sh->are_entries_computed()); + // EXPECT_TRUE(sh->is_numeric_called()); + } + + constexpr size_type rowmapCRef[] = {0, 5, 11, 13, 18, 20}; + constexpr lno_t colindCRef[] = {1, 3, 4, 5, 6, 0, 2, 3, 4, 5, 6, 3, 6, 1, 3, 4, 5, 6, 3, 5}; + constexpr scalar_t valuesCRef[] = {1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 2, 1, 1, 1, 1}; +} + #define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ TEST_F(TestCategory, \ sparse##_##spgemm##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ - test_spgemm( \ - 10000, 8000, 6000, 8000 * 20, 500, 10, ::Test::spgemm_reuse_matrix); \ - test_spgemm( \ - 10000, 8000, 6000, 8000 * 20, 500, 10, ::Test::spgemm_reuse_view); \ - test_spgemm( \ - 1000, 500, 1600, 1000 * 20, 500, 10, ::Test::spgemm_reuse_matrix, \ - true); \ - test_spgemm( \ - 1000, 500, 1600, 1000 * 20, 500, 10, ::Test::spgemm_reuse_view, true); \ - test_spgemm(0, 0, 0, 0, 10, 10, \ - ::Test::spgemm_reuse_matrix); \ - test_spgemm(0, 0, 0, 0, 10, 10, \ - ::Test::spgemm_reuse_view); \ - test_spgemm(0, 12, 5, 0, 10, 0, \ - ::Test::spgemm_reuse_matrix); \ - test_spgemm(0, 12, 5, 0, 10, 0, \ - ::Test::spgemm_reuse_view); \ - test_spgemm(10, 10, 0, 0, 10, 10, \ - ::Test::spgemm_reuse_matrix); \ - test_spgemm(10, 10, 0, 0, 10, 10, \ - ::Test::spgemm_reuse_view); \ - test_spgemm(10, 10, 10, 0, 0, 0, \ - ::Test::spgemm_reuse_matrix); \ - test_spgemm(10, 10, 10, 0, 0, 0, \ - ::Test::spgemm_reuse_view); \ - test_spgemm( \ - 10000, 8000, 6000, 8000 * 20, 500, 10, ::Test::spgemm_noreuse); \ - test_spgemm( \ - 1000, 500, 1600, 1000 * 20, 500, 10, ::Test::spgemm_noreuse); \ - test_spgemm(0, 0, 0, 0, 10, 10, \ - ::Test::spgemm_noreuse); \ - test_spgemm(0, 12, 5, 0, 10, 0, \ - ::Test::spgemm_noreuse); \ - test_spgemm(10, 10, 0, 0, 10, 10, \ - ::Test::spgemm_noreuse); \ - test_spgemm(10, 10, 10, 0, 0, 0, \ - ::Test::spgemm_noreuse); \ - test_spgemm_symbolic(true, true); \ - test_spgemm_symbolic(false, true); \ - test_spgemm_symbolic(true, false); \ - test_spgemm_symbolic(false, false); \ - test_issue402(); \ - test_issue1738(); \ + test_spgemm_onemkl(); \ } +// test_spgemm( \ +// 10000, 8000, 6000, 8000 * 20, 500, 10, ::Test::spgemm_reuse_matrix); \ +// test_spgemm( \ +// 10000, 8000, 6000, 8000 * 20, 500, 10, ::Test::spgemm_reuse_view); \ +// test_spgemm( \ +// 1000, 500, 1600, 1000 * 20, 500, 10, ::Test::spgemm_reuse_matrix, \ +// true); \ +// test_spgemm( \ +// 1000, 500, 1600, 1000 * 20, 500, 10, ::Test::spgemm_reuse_view, true); \ +// test_spgemm(0, 0, 0, 0, 10, 10, \ +// ::Test::spgemm_reuse_matrix); \ +// test_spgemm(0, 0, 0, 0, 10, 10, \ +// ::Test::spgemm_reuse_view); \ +// test_spgemm(0, 12, 5, 0, 10, 0, \ +// ::Test::spgemm_reuse_matrix); \ +// test_spgemm(0, 12, 5, 0, 10, 0, \ +// ::Test::spgemm_reuse_view); \ +// test_spgemm(10, 10, 0, 0, 10, 10, \ +// ::Test::spgemm_reuse_matrix); \ +// test_spgemm(10, 10, 0, 0, 10, 10, \ +// ::Test::spgemm_reuse_view); \ +// test_spgemm(10, 10, 10, 0, 0, 0, \ +// ::Test::spgemm_reuse_matrix); \ +// test_spgemm(10, 10, 10, 0, 0, 0, \ +// ::Test::spgemm_reuse_view); \ +// test_spgemm( \ +// 10000, 8000, 6000, 8000 * 20, 500, 10, ::Test::spgemm_noreuse); \ +// test_spgemm( \ +// 1000, 500, 1600, 1000 * 20, 500, 10, ::Test::spgemm_noreuse); \ +// test_spgemm(0, 0, 0, 0, 10, 10, \ +// ::Test::spgemm_noreuse); \ +// test_spgemm(0, 12, 5, 0, 10, 0, \ +// ::Test::spgemm_noreuse); \ +// test_spgemm(10, 10, 0, 0, 10, 10, \ +// ::Test::spgemm_noreuse); \ +// test_spgemm(10, 10, 10, 0, 0, 0, \ +// ::Test::spgemm_noreuse); \ +// test_spgemm_symbolic(true, true); \ +// test_spgemm_symbolic(false, true); \ +// test_spgemm_symbolic(true, false); \ +// test_spgemm_symbolic(false, false); \ +// test_issue402(); \ +// test_issue1738(); \ +// } + // test_spgemm(50000, 50000 * 30, 100, 10); // test_spgemm(50000, 50000 * 30, 200, 10);