diff --git a/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp b/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp index 9745dd1bd5..dac19fb7fe 100644 --- a/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp +++ b/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp @@ -91,8 +91,8 @@ KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRig #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS #define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT) \ - template \ - struct gemv_tpl_spec_avail \ + struct gemv_tpl_spec_avail, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ diff --git a/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp index 1351da1ed2..825b2cb739 100644 --- a/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp +++ b/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp @@ -409,9 +409,9 @@ namespace Impl { } #define KOKKOSBLAS2_DGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ + template <> \ struct GEMV< \ - ExecSpace, \ + Kokkos::HIP, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -429,7 +429,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const ExecSpace& space, const char trans[], typename AViewType::const_value_type& alpha, \ + static void gemv(const Kokkos::HIP& space, const char trans[], typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ const YViewType& Y) { \ Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_ROCBLAS,double]"); \ @@ -444,9 +444,9 @@ namespace Impl { }; #define KOKKOSBLAS2_SGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ + template <> \ struct GEMV< \ - ExecSpace, \ + Kokkos::HIP, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ Kokkos::View, \ @@ -464,7 +464,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const ExecSpace& space, const char trans[], typename AViewType::const_value_type& alpha, \ + static void gemv(const Kokkos::HIP& space, const char trans[], typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ const YViewType& Y) { \ Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_ROCBLAS,float]"); \ @@ -479,8 +479,8 @@ namespace Impl { }; #define KOKKOSBLAS2_ZGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct GEMV \ + struct GEMV**, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View*, LAYOUT, Kokkos::Device, \ @@ -499,7 +499,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const ExecSpace& space, const char trans[], typename AViewType::const_value_type& alpha, \ + static void gemv(const Kokkos::HIP& space, const char trans[], typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ const YViewType& Y) { \ Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_ROCBLAS,complex]"); \ @@ -518,8 +518,8 @@ namespace Impl { }; #define KOKKOSBLAS2_CGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct GEMV \ + struct GEMV**, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View*, LAYOUT, Kokkos::Device, \ @@ -538,7 +538,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const ExecSpace& space, const char trans[], typename AViewType::const_value_type& alpha, \ + static void gemv(const Kokkos::HIP& space, const char trans[], typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, typename YViewType::const_value_type& beta, \ const YViewType& Y) { \ Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_ROCBLAS,complex]"); \ diff --git a/blas/tpls/KokkosBlas3_gemm_tpl_spec_avail.hpp b/blas/tpls/KokkosBlas3_gemm_tpl_spec_avail.hpp index 8bdd61e619..5f4bdffe0d 100644 --- a/blas/tpls/KokkosBlas3_gemm_tpl_spec_avail.hpp +++ b/blas/tpls/KokkosBlas3_gemm_tpl_spec_avail.hpp @@ -101,16 +101,16 @@ KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, Kokkos::LayoutRig // rocBLAS #if defined(KOKKOSKERNELS_ENABLE_TPL_ROCBLAS) -#define KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, MEMSPACE) \ - template \ - struct gemm_tpl_spec_avail, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > > { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct gemm_tpl_spec_avail, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ }; KOKKOSBLAS3_GEMM_TPL_SPEC_AVAIL_ROCBLAS(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) diff --git a/blas/tpls/KokkosBlas3_gemm_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas3_gemm_tpl_spec_decl.hpp index 67ca399561..e0d52c2c70 100644 --- a/blas/tpls/KokkosBlas3_gemm_tpl_spec_decl.hpp +++ b/blas/tpls/KokkosBlas3_gemm_tpl_spec_decl.hpp @@ -248,23 +248,23 @@ namespace KokkosBlas { namespace Impl { #define KOKKOSBLAS3_XGEMM_ROCBLAS(SCALAR_TYPE, ROCBLAS_SCALAR_TYPE, ROCBLAS_FN, LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template \ - struct GEMM, \ + template <> \ + struct GEMM, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits >, \ - Kokkos::View, \ + Kokkos::View, \ Kokkos::MemoryTraits >, \ true, ETI_SPEC_AVAIL> { \ typedef SCALAR_TYPE SCALAR; \ - typedef Kokkos::View, \ + typedef Kokkos::View, \ Kokkos::MemoryTraits > \ AViewType; \ - typedef Kokkos::View, \ + typedef Kokkos::View, \ Kokkos::MemoryTraits > \ BViewType; \ - typedef Kokkos::View, \ + typedef Kokkos::View, \ Kokkos::MemoryTraits > \ CViewType; \ \ @@ -293,7 +293,7 @@ namespace Impl { M * N < numDotsLayoutLeftThreshold) || \ (is_lr && transa != rocblas_operation_none && transb == rocblas_operation_none && \ M * N < numDotsLayoutRightThreshold)) { \ - DotBasedGEMM gemm(alpha, A, B, beta, C); \ + DotBasedGEMM gemm(alpha, A, B, beta, C); \ bool conjT = (std::is_same::value || std::is_same::value) \ ? false \ : (transa == rocblas_operation_conjugate_transpose ? true : false); \ diff --git a/blas/tpls/KokkosBlas_tpl_spec.hpp b/blas/tpls/KokkosBlas_tpl_spec.hpp index 34064b48c1..5c3a16338c 100644 --- a/blas/tpls/KokkosBlas_tpl_spec.hpp +++ b/blas/tpls/KokkosBlas_tpl_spec.hpp @@ -91,6 +91,16 @@ inline cublasOperation_t trans_mode_kk_to_cublas(const char kkMode[]) { return trans; } +/// \brief This function converts KK side mode to cuBLAS side mode +inline cublasSideMode_t side_mode_kk_to_cublas(const char kkMode[]) { + cublasSideMode_t side; + if ((kkMode[0] == 'L') || (kkMode[0] == 'l')) + side = CUBLAS_SIDE_LEFT; + else + side = CUBLAS_SIDE_RIGHT; + return side; +} + } // namespace Impl } // namespace KokkosBlas #endif // KOKKOSKERNELS_ENABLE_TPL_CUBLAS @@ -187,6 +197,19 @@ inline rocblas_operation trans_mode_kk_to_rocblas(const char kkMode[]) { return trans; } +/// \brief This function converts KK side mode to rocBLAS side mode +inline rocblas_side side_mode_kk_to_rocblas(const char kkSide[]) { + rocblas_side side; + if (kkSide[0] == 'L' || kkSide[0] == 'l') { + side = rocblas_side_left; + } else if (kkSide[0] == 'R' || kkSide[0] == 'r') { + side = rocblas_side_right; + } else { + side = rocblas_side_both; + } + return side; +} + } // namespace Impl } // namespace KokkosBlas diff --git a/docs/source/API/lapack-index.rst b/docs/source/API/lapack-index.rst index cdd2900b1a..22d49b04e4 100644 --- a/docs/source/API/lapack-index.rst +++ b/docs/source/API/lapack-index.rst @@ -6,6 +6,7 @@ API: LAPACK :hidden: lapack/geqrf + lapack/gemqr lapack/potrf lapack/gesv lapack/gesvd @@ -72,11 +73,11 @@ Below are tables summarizing the currently supported function calls and third pa - * - geqrf - :doc:`geqrf ` - - - - - - - - - - + - -- + - X + - X + - X + - -- * - potrf - :doc:`potrf ` - -- @@ -84,20 +85,20 @@ Below are tables summarizing the currently supported function calls and third pa - X - X - -- - * - ungqr - - - - - - - - - - - - - * - unmqr + * - {or,un}gqr - - - - - - + * - {or,un}mqr + - :doc:`gemqr ` + - -- + - X + - X + - X + - -- * - gesvd - :doc:`gesvd ` - -- diff --git a/docs/source/API/lapack/gemqr.rst b/docs/source/API/lapack/gemqr.rst new file mode 100644 index 0000000000..2a2685cbfb --- /dev/null +++ b/docs/source/API/lapack/gemqr.rst @@ -0,0 +1,72 @@ +KokkosLapack::gemqr +################### + +Defined in header: :code:`KokkosLapack_gemqr.hpp` + +.. code:: c++ + + template + void gemqr(const ExecutionSpace& space, const char side[], const char trans[], const AMatrix& A, const TauArray& Tau, + const CMatrix& C, const InfoArray& Info); + + + template + void gemqr(const char side[], const char trans[], const AMatrix& A, const TauArray& Tau, const CMatrix& C, + const InfoArray& Info); + +Applies the `Q` factor from the QR factorization of matrix :math:`A` to matrix :math:`C` using the prescribed side and operation + +.. math:: + + C=Q*C\quad\text{or}\quadC=C*Q + +where :math:`A` is a matrix previously factored using a call to ``geqrf`` and :math:`Tau` stores the associated scaling factors. + +1. Overwrites :math:`C` with the :math:`Q*C` using the resources of ``space``. +2. Same as 1. but uses the resources of the default execution space from ``CMatrix::execution_space``. + +The function will throw a runtime exception if the size of :math:`C` is incompatible with that of :math:`Q`. + +Parameters +========== + +:space: execution space instance. + +:side: control parameter specifying on which side the solver is applied, supported values are ``L, l`` for left side and ``R, r`` for right side. + +:trans: control parameter specifying what operation on the entries of :math:`Q` should be performed. Supported values are ``N, n`` for nothing, ``T, t`` for transpose mode and ``C, c`` for conjugate transpose mode. + +:A: The input matrix that contains the :math:`QR` factors from a previous call to ``geqrf``. + +:Tau: rank-1 view of size min(M,N) that contains the scaling factors of the elementary reflectors. + +:C: The matrix to which we multiply the :math:`Q` factor. + +:Info: rank-1 view of integers and of size 1: Info[0] = 0: successful exit; Info[0] < 0: if equal to '-i', the i-th argument had an illegal value. + +Type Requirements +================= + +- `ExecutionSpace` must be a Kokkos `execution space `_ + +- `AMatrix` must be a Kokkos `View `_ of rank 2 that satisfies + + - ``Kokkos::SpaceAccessibility::accessible`` + +- `Tau` must be a Kokkos `View `_ of rank 1 that satisfies + + - ``Kokkos::SpaceAccessibility::accessible`` + +- `CMatrix` must be a Kokkos `View `_ of rank 2 that satisfies + + - ``Kokkos::SpaceAccessibility::accessible`` + +- `Info` must be a Kokkos `View `_ of rank 1 that satisfies + + - ``Kokkos::SpaceAccessibility::accessible`` + +Example +======= + +TBD + diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index e977126df5..90d578951e 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -57,4 +57,5 @@ gen_lapack_eti(Lapack_gesv gesv) gen_lapack_eti(Lapack_trtri trtri) gen_lapack_eti(Lapack_svd svd) gen_lapack_eti(Lapack_geqrf geqrf) +gen_lapack_eti(Lapack_gemqr gemqr) gen_lapack_eti(Lapack_potrf potrf) diff --git a/lapack/eti/generated_specializations_cpp/gemqr/KokkosLapack_gemqr_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/gemqr/KokkosLapack_gemqr_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..e5f222bdc6 --- /dev/null +++ b/lapack/eti/generated_specializations_cpp/gemqr/KokkosLapack_gemqr_eti_spec_inst.cpp.in @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" +#include "KokkosLapack_gemqr_spec.hpp" + +namespace KokkosLapack { +namespace Impl { +@LAPACK_GEMQR_ETI_INST_BLOCK@ + } // namespace Impl +} // namespace KokkosLapack diff --git a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in index 4f4ad91cb6..0bcc17dca8 100644 --- a/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in +++ b/lapack/eti/generated_specializations_cpp/geqrf/KokkosLapack_geqrf_eti_spec_inst.cpp.in @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true #include "KokkosKernels_config.h" diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_gemqr_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_gemqr_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..730a5d54da --- /dev/null +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_gemqr_eti_spec_avail.hpp.in @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project + +#ifndef KOKKOSLAPACK_GEMQR_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_GEMQR_ETI_SPEC_AVAIL_HPP_ +namespace KokkosLapack { +namespace Impl { +@LAPACK_GEMQR_ETI_AVAIL_BLOCK@ + } // namespace Impl +} // namespace KokkosLapack +#endif diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_gemqr_eti_spec_decl.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_gemqr_eti_spec_decl.hpp.in new file mode 100644 index 0000000000..60c404f84b --- /dev/null +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_gemqr_eti_spec_decl.hpp.in @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project + +#ifndef KOKKOSLAPACK_GEMQR_ETI_SPEC_DECL_HPP_ +#define KOKKOSLAPACK_GEMQR_ETI_SPEC_DECL_HPP_ +namespace KokkosLapack { +namespace Impl { +@LAPACK_GEMQR_ETI_DECL_BLOCK@ + } // IMPL +} // namespace KokkosLapack +#endif diff --git a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in index 899a8b7604..729700d470 100644 --- a/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in +++ b/lapack/eti/generated_specializations_hpp/KokkosLapack_geqrf_eti_spec_avail.hpp.in @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_ #define KOKKOSLAPACK_GEQRF_ETI_SPEC_AVAIL_HPP_ diff --git a/lapack/impl/KokkosLapack_gemqr_impl.hpp b/lapack/impl/KokkosLapack_gemqr_impl.hpp new file mode 100644 index 0000000000..734da67aae --- /dev/null +++ b/lapack/impl/KokkosLapack_gemqr_impl.hpp @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project + +#ifndef KOKKOSLAPACK_IMPL_GEMQR_HPP_ +#define KOKKOSLAPACK_IMPL_GEMQR_HPP_ + +/// \file KokkosLapack_gemqr_impl.hpp +/// \brief Implementation(s) of QR multiplication by Q. + +#include +#include + +namespace KokkosLapack { +namespace Impl { + +// NOTE: Might add the implementation of KokkosLapack::gemqr later + +} // namespace Impl +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_IMPL_GEMQR_HPP diff --git a/lapack/impl/KokkosLapack_gemqr_spec.hpp b/lapack/impl/KokkosLapack_gemqr_spec.hpp new file mode 100644 index 0000000000..26a91f7378 --- /dev/null +++ b/lapack/impl/KokkosLapack_gemqr_spec.hpp @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project + +#ifndef KOKKOSLAPACK_IMPL_GEMQR_SPEC_HPP_ +#define KOKKOSLAPACK_IMPL_GEMQR_SPEC_HPP_ + +#include +#include +#include + +// Include the actual functors +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#include +#endif + +namespace KokkosLapack { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct gemqr_eti_spec_avail { + enum : bool { value = false }; +}; +} // namespace Impl +} // namespace KokkosLapack + +// +// Macro for declaration of full specialization availability +// KokkosLapack::Impl::GEMQR. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _INST macro below) across one or +// more .cpp files. +// +#define KOKKOSLAPACK_GEMQR_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct gemqr_eti_spec_avail< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +// Include the actual specialization declarations +#include +#include + +namespace KokkosLapack { +namespace Impl { + +// Unification layer +template ::value, + bool eti_spec_avail = gemqr_eti_spec_avail::value> +struct GEMQR { + static void gemqr(const ExecutionSpace &space, const char side[], const char trans[], const AMatrix &A, + const TauArray &Tau, const CMatrix &C, const InfoArray &info); +}; + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +// Unification layer +template +struct GEMQR { + static void gemqr(const ExecutionSpace & /* space */, const char[] /*side*/, const char[] /*trans*/, + const AMatrix & /* A */, const TauArray & /* Tau */, const CMatrix & /* C */, + const InfoArray & /* Info */) { + // NOTE: Might add the implementation of KokkosLapack::gemqr later + throw std::runtime_error( + "No fallback implementation of GEMQR (apply Q from QR factorization) " + "exists. Enable LAPACK, CUSOLVER, ROCSOLVER or MAGMA TPL."); + } +}; + +#endif +} // namespace Impl +} // namespace KokkosLapack + +// +// Macro for declaration of full specialization of +// KokkosLapack::Impl::GEMQR. This is NOT for users!!! All +// the declarations of full specializations go in this header file. +// We may spread out definitions (see _DEF macro below) across one or +// more .cpp files. +// +#define KOKKOSLAPACK_GEMQR_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct GEMQR< \ + EXEC_SPACE_TYPE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +#define KOKKOSLAPACK_GEMQR_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct GEMQR, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + false, true>; + +#include + +#endif // KOKKOSLAPACK_IMPL_GEMQR_SPEC_HPP_ diff --git a/lapack/impl/KokkosLapack_geqrf_impl.hpp b/lapack/impl/KokkosLapack_geqrf_impl.hpp index a55161f284..865a9d4c50 100644 --- a/lapack/impl/KokkosLapack_geqrf_impl.hpp +++ b/lapack/impl/KokkosLapack_geqrf_impl.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOSLAPACK_IMPL_GEQRF_HPP_ #define KOKKOSLAPACK_IMPL_GEQRF_HPP_ diff --git a/lapack/impl/KokkosLapack_geqrf_spec.hpp b/lapack/impl/KokkosLapack_geqrf_spec.hpp index d8fce5e81d..b538dce66e 100644 --- a/lapack/impl/KokkosLapack_geqrf_spec.hpp +++ b/lapack/impl/KokkosLapack_geqrf_spec.hpp @@ -1,18 +1,6 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project + #ifndef KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_ #define KOKKOSLAPACK_IMPL_GEQRF_SPEC_HPP_ diff --git a/lapack/src/KokkosLapack_gemqr.hpp b/lapack/src/KokkosLapack_gemqr.hpp new file mode 100644 index 0000000000..7e312a73f1 --- /dev/null +++ b/lapack/src/KokkosLapack_gemqr.hpp @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project + +/// \file KokkosLapack_gemqr.hpp +/// \brief QR multiply by Q factor +/// +/// This file provides KokkosLapack::gemqr. This function performs a +/// local (no MPI) multiplication of Q, computed by geqrf, and a matrix. + +#ifndef KOKKOSLAPACK_GEMQR_HPP_ +#define KOKKOSLAPACK_GEMQR_HPP_ + +#include + +#include "KokkosLapack_gemqr_spec.hpp" +#include "KokkosKernels_Error.hpp" + +namespace KokkosLapack { + +/// \brief Multiplies matrix C with the Q factor, from a QR decomposition +/// +/// \tparam ExecutionSpace The space where the kernel will run. +/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. +/// \tparam TauArray Type of array Tau, as a 1-D Kokkos::View. +/// \tparam CMatrix Type of matrix C, as a 2-D Kokkos::View. +/// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View. +/// +/// \param space [in] Execution space instance used to specified how to execute +/// the gemqr kernels. +/// \param side [in] The side of C to be used to multiply by Q +/// \param trans [in] Operation applied to Q for the multiplcation: none, transpose +/// or hermitian +/// \param A [in] The i-th column must contain the vector which defines the +/// elementary reflector H(i), for i = 1,2,...,k, as returned by +/// GEQRF in the first k columns of its array argument A. +/// \param Tau [in] One-dimensional array of size k. TAU(i) must contain the scalar +/// factor of the elementary reflector H(i), as returned by GEQRF. +/// \param C [in,out] On entry, the M-by-N matrix C. +/// On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +/// \param Info [out] One-dimensional array of integers and of size 1: +/// Info[0] = 0: successful exit +/// Info[0] < 0: if equal to '-i', the i-th argument had an +/// illegal value +/// +template +void gemqr(const ExecutionSpace& space, const char side[], const char trans[], const AMatrix& A, const TauArray& Tau, + const CMatrix& C, const InfoArray& Info) { + // NOTE: Currently, KokkosLapack::gemqr only supports LAPACK, cuSOLVER and + // rocSOLVER TPLs. + + static_assert(Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::SpaceAccessibility::accessible); + static_assert(Kokkos::SpaceAccessibility::accessible); + + static_assert(Kokkos::is_view::value, "KokkosLapack::gemqr: A must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosLapack::gemqr: Tau must be Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosLapack::gemqr: C must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "KokkosLapack::gemqr: Info must be Kokkos::View."); + + static_assert(static_cast(AMatrix::rank) == 2, "KokkosLapack::gemqr: A must have rank 2."); + static_assert(static_cast(TauArray::rank) == 1, "KokkosLapack::gemqr: Tau must have rank 1."); + static_assert(static_cast(CMatrix::rank) == 2, "KokkosLapack::gemqr: C must have rank 2."); + static_assert(static_cast(InfoArray::rank) == 1, "KokkosLapack::gemqr: Info must have rank 1."); + + static_assert(std::is_same_v, + "KokkosLapack::gemqr: Info must be an array of integers."); + + if (side == nullptr || (side[0] != 'L' && side[0] != 'l' && side[0] != 'R' && side[0] != 'r')) { + std::ostringstream os; + os << "KokkosLapack::gemrf: side must be \"L\", \"l\", \"R\" or \"r\""; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + if (trans == nullptr || (trans[0] != 'N' && trans[0] != 'n' && trans[0] != 'T' && trans[0] != 't' && + trans[0] != 'C' && trans[0] != 'c')) { + std::ostringstream os; + os << "KokkosLapack::gemrf: trans must be \"N\", \"n\", \"T\", \"t\", \"C\" or \"c\""; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + const int64_t m = A.extent(0); + const int64_t n = A.extent(1); + const int64_t tau0 = Tau.extent(0); + const int64_t info0 = Info.extent(0); + + // Check validity of dimensions + if (tau0 != std::min(m, n)) { + std::ostringstream os; + os << "KokkosLapack::geqrf: length of Tau must be equal to min(m,n): " + << " A: " << m << " x " << n << ", Tau length = " << tau0; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + if (info0 < 1) { + std::ostringstream os; + os << "KokkosLapack::gemqr: length of Info must be at least 1, Info length = " << info0; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + if ((side[0] == 'L' || side[0] == 'l') && C.extent_int(0) != m) { + std::ostringstream os; + os << "KokkosLapack::gemqr: multiplying on the left but A.extent(0) != C.extent(0)"; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + if ((side[0] == 'R' || side[0] == 'r') && C.extent_int(0) != n) { + std::ostringstream os; + os << "KokkosLapack::gemqr: multiplying on the right but A.extent(1) != C.extent(0)"; + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + + using AMatrix_Internal = Kokkos::View>; + using TauArray_Internal = Kokkos::View>; + using CMatrix_Internal = Kokkos::View>; + using InfoArray_Internal = Kokkos::View>; + + AMatrix_Internal A_i = A; + TauArray_Internal Tau_i = Tau; + CMatrix_Internal C_i = C; + InfoArray_Internal Info_i = Info; + + KokkosLapack::Impl::GEMQR::gemqr(space, side, trans, A_i, Tau_i, C_i, Info_i); +} + +/// \brief Multiplies matrix C with the Q factor, from a QR decomposition +/// +/// \tparam AMatrix Type of matrix A, as a 2-D Kokkos::View. +/// \tparam TauArray Type of array Tau, as a 1-D Kokkos::View. +/// \tparam CMatrix Type of matrix C, as a 2-D Kokkos::View. +/// \tparam InfoArray Type of array Info, as a 1-D Kokkos::View. +/// +/// \param side [in] The side of C to be used to multiply by Q +/// \param trans [in] Operation applied to Q for the multiplication: none, transpose +/// or hermitian +/// \param A [in] The i-th column must contain the vector which defines the +/// elementary reflector H(i), for i = 1,2,...,k, as returned by +/// GEQRF in the first k columns of its array argument A. +/// \param Tau [in] One-dimensional array of size k. TAU(i) must contain the scalar +/// factor of the elementary reflector H(i), as returned by GEQRF. +/// \param C [in,out] On entry, the M-by-N matrix C. +/// On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. +/// \param Info [out] One-dimensional array of integers and of size 1: +/// Info[0] = 0: successful exit +/// Info[0] < 0: if equal to '-i', the i-th argument had an +/// illegal value +template +void gemqr(const char side[], const char trans[], const AMatrix& A, const TauArray& Tau, const CMatrix& C, + const InfoArray& Info) { + typename AMatrix::execution_space space{}; + gemqr(space, side, trans, A, Tau, C, Info); +} + +} // namespace KokkosLapack + +#endif // KOKKOSLAPACK_GEMQR_HPP_ diff --git a/lapack/src/KokkosLapack_geqrf.hpp b/lapack/src/KokkosLapack_geqrf.hpp index ba3c680ac1..17139efe95 100644 --- a/lapack/src/KokkosLapack_geqrf.hpp +++ b/lapack/src/KokkosLapack_geqrf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project /// \file KokkosLapack_geqrf.hpp /// \brief QR factorization diff --git a/lapack/tpls/KokkosLapack_Host_tpl.cpp b/lapack/tpls/KokkosLapack_Host_tpl.cpp index fd62d42812..f1761773ee 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.cpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.cpp @@ -67,6 +67,20 @@ void F77_BLAS_MANGLE(cgeqrf, CGEQRF)(const int*, const int*, std::complex void F77_BLAS_MANGLE(zgeqrf, ZGEQRF)(const int*, const int*, std::complex*, const int*, std::complex*, std::complex*, int*, int*); +/// +/// {Un,Or}mqr +/// +void F77_BLAS_MANGLE(sormqr, SORMQR)(const char*, const char*, const int*, const int*, const int*, float*, const int*, + float*, float*, const int*, float*, int*, int*); +void F77_BLAS_MANGLE(dormqr, DORMQR)(const char*, const char*, const int*, const int*, const int*, double*, const int*, + double*, double*, const int*, double*, int*, int*); +void F77_BLAS_MANGLE(cunmqr, CUNMQR)(const char*, const char*, const int*, const int*, const int*, std::complex*, + const int*, std::complex*, std::complex*, const int*, + std::complex*, int*, int*); +void F77_BLAS_MANGLE(zunmqr, ZUNMQR)(const char*, const char*, const int*, const int*, const int*, + std::complex*, const int*, std::complex*, std::complex*, + const int*, std::complex*, int*, int*); + /// /// Potrf /// @@ -97,6 +111,11 @@ void F77_BLAS_MANGLE(zpotrf, ZPOTRF)(const char*, const int*, std::complex::potrf(const char uplo, const int n, float* a, const int l return info; } +template <> +void HostLapack::gemqr(const char side, const char trans, const int m, const int n, const int k, float* a, + const int lda, float* tau, float* c, const int ldc, float* work, int lwork, int* info) { +#if defined(KOKKOSKERNELS_ENABLE_TPL_ACCELERATE) + sormqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info); +#else + F77_FUNC_SORMQR(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info); +#endif +} + /// /// double /// @@ -212,6 +241,17 @@ int HostLapack::potrf(const char uplo, const int n, double* a, const int return info; } +template <> +void HostLapack::gemqr(const char side, const char trans, const int m, const int n, const int k, double* a, + const int lda, double* tau, double* c, const int ldc, double* work, int lwork, + int* info) { +#if defined(KOKKOSKERNELS_ENABLE_TPL_ACCELERATE) + dormqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info); +#else + F77_FUNC_DORMQR(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info); +#endif +} + /// /// std::complex /// @@ -257,6 +297,17 @@ void HostLapack>::geqrf(const int m, const int n, std::compl #endif } template <> +void HostLapack>::gemqr(const char side, const char trans, const int m, const int n, const int k, + std::complex* a, const int lda, std::complex* tau, + std::complex* c, const int ldc, std::complex* work, int lwork, + int* info) { +#if defined(KOKKOSKERNELS_ENABLE_TPL_ACCELERATE) + cunmqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info); +#else + F77_FUNC_CUNMQR(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info); +#endif +} +template <> int HostLapack>::potrf(const char uplo, const int n, std::complex* a, const int lda) { int info = 0; #if defined(KOKKOSKERNELS_ENABLE_TPL_ACCELERATE) @@ -313,6 +364,17 @@ void HostLapack>::geqrf(const int m, const int n, std::comp #endif } template <> +void HostLapack>::gemqr(const char side, const char trans, const int m, const int n, const int k, + std::complex* a, const int lda, std::complex* tau, + std::complex* c, const int ldc, std::complex* work, + int lwork, int* info) { +#if defined(KOKKOSKERNELS_ENABLE_TPL_ACCELERATE) + zunmqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info); +#else + F77_FUNC_ZUNMQR(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info); +#endif +} +template <> int HostLapack>::potrf(const char uplo, const int n, std::complex* a, const int lda) { int info = 0; #if defined(KOKKOSKERNELS_ENABLE_TPL_ACCELERATE) diff --git a/lapack/tpls/KokkosLapack_Host_tpl.hpp b/lapack/tpls/KokkosLapack_Host_tpl.hpp index 0078c56ca2..45f4c837d2 100644 --- a/lapack/tpls/KokkosLapack_Host_tpl.hpp +++ b/lapack/tpls/KokkosLapack_Host_tpl.hpp @@ -27,6 +27,9 @@ struct HostLapack { static void geqrf(const int m, const int n, T *a, const int lda, T *tau, T *work, int lwork, int *info); + static void gemqr(const char side, const char trans, const int m, const int n, const int k, T *a, const int lda, + T *tau, T *c, const int ldc, T *work, int lwork, int *info); + static int potrf(const char uplo, const int n, T *a, const int lda); }; } // namespace Impl diff --git a/lapack/tpls/KokkosLapack_gemqr_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_gemqr_tpl_spec_avail.hpp new file mode 100644 index 0000000000..77d12cbd80 --- /dev/null +++ b/lapack/tpls/KokkosLapack_gemqr_tpl_spec_avail.hpp @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project + +#ifndef KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_HPP_ + +namespace KokkosLapack { +namespace Impl { +// Specialization struct which defines whether a specialization exists +template +struct gemqr_tpl_spec_avail { + enum : bool { value = false }; +}; + +// Generic Host side LAPACK (could be MKL or whatever) +#if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) || defined(KOKKOSKERNELS_ENABLE_TPL_ACCELERATE) + +#define KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUT, MEMSPACE) \ + template \ + struct gemqr_tpl_spec_avail< \ + ExecSpace, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif +} // namespace Impl +} // namespace KokkosLapack + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +namespace KokkosLapack { +namespace Impl { + +#define KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct gemqr_tpl_spec_avail< \ + Kokkos::Cuda, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // CUSOLVER + +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#include + +namespace KokkosLapack { +namespace Impl { + +#define KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_ROCSOLVER(SCALAR, LAYOUT, MEMSPACE) \ + template <> \ + struct gemqr_tpl_spec_avail< \ + Kokkos::HIP, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ + }; + +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER + +#endif // KOKKOSLAPACK_GEMQR_TPL_SPEC_AVAIL_HPP_ diff --git a/lapack/tpls/KokkosLapack_gemqr_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_gemqr_tpl_spec_decl.hpp new file mode 100644 index 0000000000..65b8a2150c --- /dev/null +++ b/lapack/tpls/KokkosLapack_gemqr_tpl_spec_decl.hpp @@ -0,0 +1,369 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project + +#ifndef KOKKOSLAPACK_GEMQR_TPL_SPEC_DECL_HPP_ +#define KOKKOSLAPACK_GEMQR_TPL_SPEC_DECL_HPP_ + +#include +#include + +namespace KokkosLapack { +namespace Impl { +template +inline void gemqr_print_specialization() { +#ifdef KOKKOSKERNELS_ENABLE_CHECK_SPECIALIZATION +#ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA + printf("KokkosLapack::gemqr<> TPL MAGMA specialization for < %s , %s, %s >\n", typeid(AViewType).name(), + typeid(TauViewType).name(), typeid(InfoViewType).name()); +#else +#ifdef KOKKOSKERNELS_ENABLE_TPL_LAPACK + printf("KokkosLapack::gemqr<> TPL Lapack specialization for < %s , %s, %s >\n", typeid(AViewType).name(), + typeid(TauViewType).name(), typeid(InfoViewType).name()); +#endif +#endif +#endif +} +} // namespace Impl +} // namespace KokkosLapack + +// Generic Host side LAPACK (could be MKL or whatever) +#if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) || defined(KOKKOSKERNELS_ENABLE_TPL_ACCELERATE) +#include + +namespace KokkosLapack { +namespace Impl { + +template +void lapackGemqrWrapper(const char side[], const char trans[], const AViewType& A, const TauViewType& Tau, + const CViewType& C, const InfoViewType& Info) { + using memory_space = typename AViewType::memory_space; + using Scalar = typename AViewType::non_const_value_type; + using ALayout_t = typename AViewType::array_layout; + static_assert(std::is_same_v, + "KokkosLapack - gemqr: A needs to have a Kokkos::LayoutLeft"); + const int m = C.extent_int(0); + const int n = C.extent_int(1); + const int k = Tau.extent_int(0); + const int lda = A.stride(1); + const int ldc = C.stride(1); + + int lwork = -1; + // work needs to be at least length 1 to store the returned value for lwork + Kokkos::View work("work array", 1); + + if constexpr (KokkosKernels::ArithTraits::is_complex) { + using MagType = typename KokkosKernels::ArithTraits::mag_type; + + HostLapack>::gemqr( + side[0], trans[0], m, n, k, reinterpret_cast*>(A.data()), lda, + reinterpret_cast*>(Tau.data()), reinterpret_cast*>(C.data()), ldc, + reinterpret_cast*>(work.data()), lwork, Info.data()); + + if (Info[0] < 0) return; + + lwork = static_cast(work(0).real()); + + work = Kokkos::View("gemqr work buffer", lwork); + + HostLapack>::gemqr( + side[0], trans[0], m, n, k, reinterpret_cast*>(A.data()), lda, + reinterpret_cast*>(Tau.data()), reinterpret_cast*>(C.data()), ldc, + reinterpret_cast*>(work.data()), lwork, Info.data()); + } else { + HostLapack::gemqr(side[0], trans[0], m, n, k, A.data(), lda, Tau.data(), C.data(), ldc, work.data(), lwork, + Info.data()); + + if (Info[0] < 0) return; + + lwork = static_cast(work(0)); + + work = Kokkos::View("gemqr work buffer", lwork); + + HostLapack::gemqr(side[0], trans[0], m, n, k, A.data(), lda, Tau.data(), C.data(), ldc, work.data(), lwork, + Info.data()); + } +} + +#define KOKKOSLAPACK_GEMQR_LAPACK(SCALAR, LAYOUT, EXECSPACE, MEM_SPACE) \ + template <> \ + struct GEMQR< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, true, \ + gemqr_eti_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using TauViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using CViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using InfoViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + \ + static void gemqr(const EXECSPACE& /* space */, const char side[], const char trans[], const AViewType& A, \ + const TauViewType& Tau, const CViewType& C, const InfoViewType& Info) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::gemqr[TPL_LAPACK," #SCALAR "]"); \ + gemqr_print_specialization(); \ + lapackGemqrWrapper(side, trans, A, Tau, C, Info); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#if defined(KOKKOS_ENABLE_SERIAL) +KOKKOSLAPACK_GEMQR_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSLAPACK_GEMQR_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSLAPACK_GEMQR_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +KOKKOSLAPACK_GEMQR_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Serial, Kokkos::HostSpace) +#endif + +#if defined(KOKKOS_ENABLE_OPENMP) +KOKKOSLAPACK_GEMQR_LAPACK(float, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSLAPACK_GEMQR_LAPACK(double, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSLAPACK_GEMQR_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +KOKKOSLAPACK_GEMQR_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::OpenMP, Kokkos::HostSpace) +#endif + +#if defined(KOKKOS_ENABLE_THREADS) +KOKKOSLAPACK_GEMQR_LAPACK(float, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +KOKKOSLAPACK_GEMQR_LAPACK(double, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +KOKKOSLAPACK_GEMQR_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +KOKKOSLAPACK_GEMQR_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::Threads, Kokkos::HostSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_LAPACK + +// CUSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#include "KokkosLapack_cusolver.hpp" + +namespace KokkosLapack { +namespace Impl { + +template +void cusolverGemqrWrapper(const ExecutionSpace& space, const char side[], const char trans[], const AViewType& A, + const TauViewType& Tau, const CViewType& C, const InfoViewType& Info) { + using memory_space = typename AViewType::memory_space; + using Scalar = typename AViewType::non_const_value_type; + + using ALayout_t = typename AViewType::array_layout; + static_assert(std::is_same_v, + "KokkosLapack - cusolver {or,un}mqr: A needs to have a Kokkos::LayoutLeft"); + const int m = C.extent_int(0); + const int n = C.extent_int(1); + const int k = Tau.extent_int(0); + const int lda = A.stride(1); + const int ldc = C.stride(1); + int lwork = 0; + + const cublasSideMode_t cu_side = KokkosBlas::Impl::side_mode_kk_to_cublas(side); + const cublasOperation_t cu_trans = KokkosBlas::Impl::trans_mode_kk_to_cublas(trans); + + CudaLapackSingleton& s = CudaLapackSingleton::singleton(); + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnSetStream(s.handle, space.cuda_stream())); + if constexpr (std::is_same_v) { + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnSormqr_bufferSize(s.handle, cu_side, cu_trans, m, n, k, A.data(), + lda, Tau.data(), C.data(), ldc, &lwork)); + Kokkos::View Workspace("cusolver sormqr workspace", lwork); + + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnSormqr(s.handle, cu_side, cu_trans, m, n, k, A.data(), lda, + Tau.data(), C.data(), ldc, Workspace.data(), lwork, + Info.data())); + } + if constexpr (std::is_same_v) { + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnDormqr_bufferSize(s.handle, cu_side, cu_trans, m, n, k, A.data(), + lda, Tau.data(), C.data(), ldc, &lwork)); + Kokkos::View Workspace("cusolver dormqr workspace", lwork); + + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnDormqr(s.handle, cu_side, cu_trans, m, n, k, A.data(), lda, + Tau.data(), C.data(), ldc, Workspace.data(), lwork, + Info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnCunmqr_bufferSize( + s.handle, cu_side, cu_trans, m, n, k, reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()), reinterpret_cast(C.data()), ldc, &lwork)); + Kokkos::View Workspace("cusolver cunmqr workspace", lwork); + + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL( + cusolverDnCunmqr(s.handle, cu_side, cu_trans, m, n, k, reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()), reinterpret_cast(C.data()), ldc, + reinterpret_cast(Workspace.data()), lwork, Info.data())); + } + if constexpr (std::is_same_v>) { + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnZunmqr_bufferSize( + s.handle, cu_side, cu_trans, m, n, k, reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()), reinterpret_cast(C.data()), ldc, &lwork)); + Kokkos::View Workspace("cusolver zunmqr workspace", lwork); + + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL( + cusolverDnZunmqr(s.handle, cu_side, cu_trans, m, n, k, reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()), reinterpret_cast(C.data()), + ldc, reinterpret_cast(Workspace.data()), lwork, Info.data())); + } + KOKKOSLAPACK_IMPL_CUSOLVER_SAFE_CALL(cusolverDnSetStream(s.handle, NULL)); +} + +#define KOKKOSLAPACK_GEMQR_CUSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GEMQR< \ + Kokkos::Cuda, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, \ + gemqr_eti_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using TauViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using CViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using InfoViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + \ + static void gemqr(const Kokkos::Cuda& space, const char side[], const char trans[], const AViewType& A, \ + const TauViewType& Tau, const CViewType& C, const InfoViewType& Info) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::gemqr[TPL_CUSOLVER," #SCALAR "]"); \ + gemqr_print_specialization(); \ + \ + cusolverGemqrWrapper(space, side, trans, A, Tau, C, Info); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSLAPACK_GEMQR_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEMQR_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEMQR_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSLAPACK_GEMQR_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) + +#if defined(KOKKOSKERNELS_INST_MEMSPACE_CUDAUVMSPACE) +KOKKOSLAPACK_GEMQR_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEMQR_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEMQR_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSLAPACK_GEMQR_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +#endif + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER + +// ROCSOLVER +#ifdef KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER +#include +#include + +namespace KokkosLapack { +namespace Impl { + +template +void rocsolverGemqrWrapper(const ExecutionSpace& space, const char side[], const char trans[], const AViewType& A, + const TauViewType& Tau, const CViewType& C, const InfoViewType& Info) { + using Scalar = typename AViewType::non_const_value_type; + + using ALayout_t = typename AViewType::array_layout; + static_assert(std::is_same_v, + "KokkosLapack - rocsolver {un,or}mqr: A needs to have a Kokkos::LayoutLeft"); + const rocblas_int m = static_cast(C.extent(0)); + const rocblas_int n = static_cast(C.extent(1)); + const rocblas_int k = static_cast(Tau.extent(0)); + const rocblas_int lda = static_cast(A.stride(1)); + const rocblas_int ldc = static_cast(C.stride(1)); + + rocblas_side roc_side = KokkosBlas::Impl::side_mode_kk_to_rocblas(side); + rocblas_operation roc_trans = KokkosBlas::Impl::trans_mode_kk_to_rocblas(trans); + + KokkosBlas::Impl::RocBlasSingleton& s = KokkosBlas::Impl::RocBlasSingleton::singleton(); + KOKKOSBLAS_IMPL_ROCBLAS_SAFE_CALL(rocblas_set_stream(s.handle, space.hip_stream())); + if constexpr (std::is_same_v) { + KOKKOSBLAS_IMPL_ROCBLAS_SAFE_CALL( + rocsolver_sormqr(s.handle, roc_side, roc_trans, m, n, k, A.data(), lda, Tau.data(), C.data(), ldc)); + } + if constexpr (std::is_same_v) { + KOKKOSBLAS_IMPL_ROCBLAS_SAFE_CALL( + rocsolver_dormqr(s.handle, roc_side, roc_trans, m, n, k, A.data(), lda, Tau.data(), C.data(), ldc)); + } + if constexpr (std::is_same_v>) { + KOKKOSBLAS_IMPL_ROCBLAS_SAFE_CALL(rocsolver_cunmqr( + s.handle, roc_side, roc_trans, m, n, k, reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()), reinterpret_cast(C.data()), ldc)); + } + if constexpr (std::is_same_v>) { + KOKKOSBLAS_IMPL_ROCBLAS_SAFE_CALL(rocsolver_zunmqr(s.handle, roc_side, roc_trans, m, n, k, + reinterpret_cast(A.data()), lda, + reinterpret_cast(Tau.data()), + reinterpret_cast(C.data()), ldc)); + } + Kokkos::deep_copy(Info, 0); // Success + KOKKOSBLAS_IMPL_ROCBLAS_SAFE_CALL(rocblas_set_stream(s.handle, NULL)); +} + +#define KOKKOSLAPACK_GEMQR_ROCSOLVER(SCALAR, LAYOUT, MEM_SPACE) \ + template <> \ + struct GEMQR< \ + Kokkos::HIP, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + true, \ + gemqr_eti_spec_avail, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>>::value> { \ + using AViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using TauViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using CViewType = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using InfoViewType = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + \ + static void gemqr(const Kokkos::HIP& space, const char side[], const char trans[], const AViewType& A, \ + const TauViewType& Tau, const CViewType& C, const InfoViewType& Info) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::gemqr[TPL_ROCSOLVER," #SCALAR "]"); \ + gemqr_print_specialization(); \ + \ + rocsolverGemqrWrapper(space, side, trans, A, Tau, C, Info); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSLAPACK_GEMQR_ROCSOLVER(float, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEMQR_ROCSOLVER(double, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEMQR_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSLAPACK_GEMQR_ROCSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::HIPSpace) + +} // namespace Impl +} // namespace KokkosLapack +#endif // KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER + +#endif diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp index 131f35da95..b3fb8359b4 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_avail.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_HPP_ #define KOKKOSLAPACK_GEQRF_TPL_SPEC_AVAIL_HPP_ diff --git a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp index ee7e3cc8ec..500a7289c4 100644 --- a/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp +++ b/lapack/tpls/KokkosLapack_geqrf_tpl_spec_decl.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project #ifndef KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_ #define KOKKOSLAPACK_GEQRF_TPL_SPEC_DECL_HPP_ diff --git a/lapack/unit_test/Test_Lapack.hpp b/lapack/unit_test/Test_Lapack.hpp index 7c7bf55559..584b47137b 100644 --- a/lapack/unit_test/Test_Lapack.hpp +++ b/lapack/unit_test/Test_Lapack.hpp @@ -7,6 +7,7 @@ #include "Test_Lapack_trtri.hpp" #include "Test_Lapack_svd.hpp" #include "Test_Lapack_geqrf.hpp" +#include "Test_Lapack_gemqr.hpp" #include "Test_Lapack_potrf.hpp" #endif // TEST_LAPACK_HPP diff --git a/lapack/unit_test/Test_Lapack_gemqr.hpp b/lapack/unit_test/Test_Lapack_gemqr.hpp new file mode 100644 index 0000000000..375cf29241 --- /dev/null +++ b/lapack/unit_test/Test_Lapack_gemqr.hpp @@ -0,0 +1,416 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project + +// Only enable this test where KokkosLapack supports geqrf: +// CUDA+CUSOLVER, HIP+ROCSOLVER and HOST+LAPACK +#if (defined(TEST_CUDA_LAPACK_CPP) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER)) || \ + (defined(TEST_HIP_LAPACK_CPP) && defined(KOKKOSKERNELS_ENABLE_TPL_ROCSOLVER)) || \ + ((defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) || defined(KOKKOSKERNELS_ENABLE_TPL_ACCELERATE)) && \ + (defined(TEST_OPENMP_LAPACK_CPP) || defined(TEST_SERIAL_LAPACK_CPP) || defined(TEST_THREADS_LAPACK_CPP))) + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace Test { + +template +void impl_test_gemqr(int m, int n) { + using ALayout_t = typename ViewTypeA::array_layout; + using ViewTypeInfo = Kokkos::View; + using execution_space = typename Device::execution_space; + using ScalarA = typename ViewTypeA::value_type; + using ats = KokkosKernels::ArithTraits; + + Kokkos::Random_XorShift64_Pool rand_pool(13718); + + const int minMN = std::min(m, n); + + // ******************************************************************** + // Create device views + // ******************************************************************** + ViewTypeA A("A", m, n); + ViewTypeA Aorig("Aorig", m, n); + ViewTypeTau Tau("Tau", minMN); + ViewTypeInfo Info("Info", 1); + + // ******************************************************************** + // Create host mirrors of device views + // ******************************************************************** + typename ViewTypeA::host_mirror_type h_A = Kokkos::create_mirror_view(A); + typename ViewTypeA::host_mirror_type h_Aorig = Kokkos::create_mirror_view(Aorig); + typename ViewTypeTau::host_mirror_type h_tau = Kokkos::create_mirror_view(Tau); + typename ViewTypeInfo::host_mirror_type h_info = Kokkos::create_mirror_view(Info); + + // ******************************************************************** + // Initialize data + // ******************************************************************** + if ((m == 3) && (n == 3)) { + h_A(0, 0) = ScalarA(12.); + h_A(0, 1) = ScalarA(-51.); + h_A(0, 2) = ScalarA(4.); + + h_A(1, 0) = ScalarA(6.); + h_A(1, 1) = ScalarA(167.); + h_A(1, 2) = ScalarA(-68.); + + h_A(2, 0) = ScalarA(-4.); + h_A(2, 1) = ScalarA(24.); + h_A(2, 2) = ScalarA(-41.); + + Kokkos::deep_copy(A, h_A); + } else { + Kokkos::fill_random(A, rand_pool, Kokkos::rand, ScalarA>::max()); + Kokkos::deep_copy(h_A, A); + } + + Kokkos::deep_copy(h_Aorig, h_A); + Kokkos::fence(); + + // ******************************************************************** + // Perform the QR factorization + // ******************************************************************** + execution_space space{}; + try { + KokkosLapack::geqrf(space, A, Tau, Info); + } catch (const std::runtime_error& e) { + std::cout << "KokkosLapack::geqrf(): caught exception '" << e.what() << "'" << std::endl; + FAIL(); + return; + } + + Kokkos::fence(); + + Kokkos::deep_copy(h_info, Info); + EXPECT_EQ(h_info[0], 0) << "Failed geqrf() test: Info[0] = " << h_info[0]; + + // ******************************************************************** + // Get the results + // ******************************************************************** + Kokkos::deep_copy(h_A, A); + Kokkos::deep_copy(h_tau, Tau); + + typename KokkosKernels::ArithTraits::mag_type absTol(1.e-8); + if constexpr (std::is_same_v::mag_type, + float>) { + absTol = 5.e-5; + } + + // ******************************************************************** + // Check outputs h_A and h_tau + // ******************************************************************** + if ((m == 3) && (n == 3)) { + Kokkos::View refMatrix("ref matrix", m, n); + Kokkos::View refTau("ref tau", m); + + refMatrix(0, 0) = ScalarA(-14.); + refMatrix(0, 1) = ScalarA(-21.); + refMatrix(0, 2) = ScalarA(14.); + + refMatrix(1, 0) = ScalarA(0.2307692307692308); + refMatrix(1, 1) = ScalarA(-175.); + refMatrix(1, 2) = ScalarA(70.); + + refMatrix(2, 0) = ScalarA(-0.1538461538461539); + refMatrix(2, 1) = ScalarA(1. / 18.); + refMatrix(2, 2) = ScalarA(-35.); + + refTau(0) = ScalarA(1.857142857142857); + refTau(1) = ScalarA(1.993846153846154); + refTau(2) = ScalarA(0.); + + { + bool test_flag_A = true; + for (int i = 0; (i < m) && test_flag_A; ++i) { + for (int j = 0; (j < n) && test_flag_A; ++j) { + if (ats::abs(h_A(i, j) - refMatrix(i, j)) > absTol) { + std::cout << "h_Aoutput checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j + << ", h_Aoutput(i,j) = " << std::setprecision(16) << h_A(i, j) + << ", refMatrix(i,j) = " << std::setprecision(16) << refMatrix(i, j) + << ", |diff| = " << std::setprecision(16) << ats::abs(h_A(i, j) - refMatrix(i, j)) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; + test_flag_A = false; + } + } + } + ASSERT_EQ(test_flag_A, true); + } + + { + bool test_flag_tau = true; + for (int i = 0; (i < m) && test_flag_tau; ++i) { + if (ats::abs(h_tau(i) - refTau(i)) > absTol) { + std::cout << "tau checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", h_tau(i,j) = " << std::setprecision(16) + << h_tau(i) << ", refTau(i,j) = " << std::setprecision(16) << refTau(i) + << ", |diff| = " << std::setprecision(16) << ats::abs(h_tau(i) - refTau(i)) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; + test_flag_tau = false; + } + } + ASSERT_EQ(test_flag_tau, true); + } + } + + // ******************************************************************** + // Compute Q, R, and QR + // ******************************************************************** + ViewTypeA Q("Q", m, m); + ViewTypeA R("R", m, n); + ViewTypeA QR("QR", m, n); + + typename ViewTypeA::host_mirror_type h_Q = Kokkos::create_mirror_view(Q); + typename ViewTypeA::host_mirror_type h_R = Kokkos::create_mirror_view(R); + typename ViewTypeA::host_mirror_type h_QR = Kokkos::create_mirror_view(QR); + + // Load identity matrix in Q + for (int idx = 0; idx < m; ++idx) { + h_Q(idx, idx) = 1.0; + } + Kokkos::deep_copy(Q, h_Q); + + // Load R from A + for (int rowIdx = 0; rowIdx < minMN; ++rowIdx) { + for (int colIdx = 0; colIdx < n; ++colIdx) { + if (rowIdx <= colIdx) { + h_R(rowIdx, colIdx) = h_A(rowIdx, colIdx); + } + } + } + Kokkos::deep_copy(R, h_R); + + // Apply Q stored in A to our Q that is currently set as the identity + KokkosLapack::gemqr(space, "L", "N", A, Tau, Q, Info); + Kokkos::deep_copy(h_Q, Q); + + // Recompute A from Q and R factors + KokkosBlas::gemm("N", "N", 1., Q, R, 0., QR); + Kokkos::deep_copy(h_QR, QR); + + // ******************************************************************** + // Check Q, R, and QR + // ******************************************************************** + if ((m == 3) && (n == 3)) { + Kokkos::View refQ("ref Q", m, n); + Kokkos::View refR("ref R", m, n); + + refQ(0, 0) = ScalarA(-6. / 7.); + refQ(0, 1) = ScalarA(69. / 175.); + refQ(0, 2) = ScalarA(58. / 175.); + + refQ(1, 0) = ScalarA(-3. / 7.); + refQ(1, 1) = ScalarA(-158. / 175.); + refQ(1, 2) = ScalarA(-6. / 175.); + + refQ(2, 0) = ScalarA(2. / 7.); + refQ(2, 1) = ScalarA(-6. / 35.); + refQ(2, 2) = ScalarA(33. / 35.); + + refR(0, 0) = ScalarA(-14.); + refR(0, 1) = ScalarA(-21.); + refR(0, 2) = ScalarA(14.); + + refR(1, 1) = ScalarA(-175.); + refR(1, 2) = ScalarA(70.); + + refR(2, 2) = ScalarA(-35.); + + { + bool test_flag_Q = true; + for (int i = 0; (i < m) && test_flag_Q; ++i) { + for (int j = 0; (j < m) && test_flag_Q; ++j) { + if (ats::abs(h_Q(i, j) - refQ(i, j)) > absTol) { + std::cout << "Q checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j + << ", h_Q(i,j) = " << std::setprecision(16) << h_Q(i, j) + << ", refQ(i,j) = " << std::setprecision(16) << refQ(i, j) + << ", |diff| = " << std::setprecision(16) << ats::abs(h_Q(i, j) - refQ(i, j)) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; + test_flag_Q = false; + } + } + } + ASSERT_EQ(test_flag_Q, true); + } + + { + bool test_flag_R = true; + for (int i = 0; (i < m) && test_flag_R; ++i) { + for (int j = 0; (j < n) && test_flag_R; ++j) { + if (ats::abs(h_R(i, j) - refR(i, j)) > absTol) { + std::cout << "R checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j + << ", h_R(i,j) = " << std::setprecision(16) << h_R(i, j) + << ", refR(i,j) = " << std::setprecision(16) << refR(i, j) + << ", |diff| = " << std::setprecision(16) << ats::abs(h_R(i, j) - refR(i, j)) + << ", absTol = " << std::setprecision(16) << absTol << std::endl; + test_flag_R = false; + } + } + } + ASSERT_EQ(test_flag_R, true); + } + } + + // ******************************************************************** + // Check that A = QR + // ******************************************************************** + { + bool test_flag_QR = true; + for (int i = 0; (i < m) && test_flag_QR; ++i) { + for (int j = 0; (j < n) && test_flag_QR; ++j) { + if ((ats::abs(h_QR(i, j) - h_Aorig(i, j)) > absTol)) { + std::cout << "QR checking" + << ", m = " << m << ", n = " << n << ", i = " << i << ", j = " << j + << ", h_Aorig(i,j) = " << std::setprecision(16) << h_Aorig(i, j) + << ", h_QR(i,j) = " << std::setprecision(16) << h_QR(i, j) << ", |diff| = " << std::setprecision(16) + << ats::abs(h_QR(i, j) - h_Aorig(i, j)) << ", absTol = " << std::setprecision(16) << absTol + << std::endl; + test_flag_QR = false; + } + } + } + ASSERT_EQ(test_flag_QR, true); + } +} + +template +void applyQ_analytic() { + using ALayout_t = typename ViewTypeA::array_layout; + using ViewTypeInfo = Kokkos::View; + using execution_space = typename Device::execution_space; + using Scalar = typename ViewTypeA::value_type; + + ViewTypeA A("A", 3, 3); + ViewTypeTau Tau("tau", 3); + ViewTypeInfo Info("Info", 1); + ViewTypeA Q("Q", 3, 3); + ViewTypeA Qref("Q ref", 3, 3); + + typename ViewTypeA::host_mirror_type h_A = Kokkos::create_mirror_view(A); + typename ViewTypeA::host_mirror_type h_Q = Kokkos::create_mirror_view(Q); + typename ViewTypeA::host_mirror_type h_Qref = Kokkos::create_mirror_view(Qref); + + h_A(0, 0) = 12; + h_A(0, 1) = -51; + h_A(0, 2) = 4; + h_A(1, 0) = 6; + h_A(1, 1) = 167; + h_A(1, 2) = -68; + h_A(2, 0) = -4; + h_A(2, 1) = 24; + h_A(2, 2) = -41; + Kokkos::deep_copy(A, h_A); + + // Store the identity so once Q is applied to + // this matrix we will recover the entries of Q + h_Q(0, 0) = 1; + h_Q(0, 1) = 0; + h_Q(0, 2) = 0; + h_Q(1, 0) = 0; + h_Q(1, 1) = 1; + h_Q(1, 2) = 0; + h_Q(2, 0) = 0; + h_Q(2, 1) = 0; + h_Q(2, 2) = 1; + Kokkos::deep_copy(Q, h_Q); + + h_Qref(0, 0) = -6. / 7.; + h_Qref(0, 1) = 69. / 175.; + h_Qref(0, 2) = 58. / 175.; + h_Qref(1, 0) = -3. / 7.; + h_Qref(1, 1) = -158. / 175.; + h_Qref(1, 2) = -6. / 175.; + h_Qref(2, 0) = 2. / 7.; + h_Qref(2, 1) = -6. / 35.; + h_Qref(2, 2) = 33. / 35.; + Kokkos::deep_copy(Qref, h_Qref); + + try { + execution_space space{}; + KokkosLapack::geqrf(space, A, Tau, Info); + KokkosLapack::gemqr(space, "L", "N", A, Tau, Q, Info); + } catch (const std::runtime_error& e) { + std::cout << "KokkosLapack::gemqr(): caught exception '" << e.what() << "'" << std::endl; + FAIL(); + return; + } + Kokkos::fence(); + + Kokkos::deep_copy(h_Q, Q); + for (int rowIdx = 0; rowIdx < 3; ++rowIdx) { + for (int colIdx = 0; colIdx < 3; ++colIdx) { + Test::EXPECT_NEAR_KK_REL(h_Qref(rowIdx, colIdx), h_Q(rowIdx, colIdx), + 10 * KokkosKernels::ArithTraits::eps()); + } + } +} + +} // namespace Test + +template +void test_gemqr() { +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + using view_type_a_ll = Kokkos::View; + using view_type_tau_ll = Kokkos::View; + + Test::applyQ_analytic(); + + Test::impl_test_gemqr(1, 1); + Test::impl_test_gemqr(2, 1); + Test::impl_test_gemqr(2, 2); + Test::impl_test_gemqr(3, 1); + Test::impl_test_gemqr(3, 2); + Test::impl_test_gemqr(3, 3); + + Test::impl_test_gemqr(100, 70); + Test::impl_test_gemqr(70, 100); + Test::impl_test_gemqr(100, 100); +#endif +} + +#if defined(KOKKOSKERNELS_INST_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, gemqr_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::gemqr_float"); + test_gemqr(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, gemqr_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::gemqr_double"); + test_gemqr(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, gemqr_complex_float) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::gemqr_complex_float"); + test_gemqr, TestDevice>(); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || \ + (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F(TestCategory, gemqr_complex_double) { + Kokkos::Profiling::pushRegion("KokkosLapack::Test::gemqr_complex_double"); + test_gemqr, TestDevice>(); + Kokkos::Profiling::popRegion(); +} +#endif + +#endif // CUDA+CUSOLVER or HIP+ROCSOLVER or LAPACK+HOST diff --git a/lapack/unit_test/Test_Lapack_geqrf.hpp b/lapack/unit_test/Test_Lapack_geqrf.hpp index 10f20ffa7a..b597bb6315 100644 --- a/lapack/unit_test/Test_Lapack_geqrf.hpp +++ b/lapack/unit_test/Test_Lapack_geqrf.hpp @@ -1,18 +1,5 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER +// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project // Only enable this test where KokkosLapack supports geqrf: // CUDA+CUSOLVER, HIP+ROCSOLVER and HOST+LAPACK @@ -33,11 +20,15 @@ namespace Test { template -void getQR(int const m, int const n, typename ViewTypeA::host_mirror_type const& h_A, - typename ViewTypeTau::host_mirror_type const& h_tau, typename ViewTypeA::host_mirror_type& h_Q, - typename ViewTypeA::host_mirror_type& h_R, typename ViewTypeA::host_mirror_type& h_QR) { +void getQR(int const m, int const n, ViewTypeA const& h_A, ViewTypeTau const& h_tau, ViewTypeA& h_Q, ViewTypeA& h_R, + ViewTypeA& h_QR) { using ScalarA = typename ViewTypeA::value_type; + using m_ViewTypeA = Kokkos::View; + using m_ViewTypeTau = Kokkos::View; + // ******************************************************************** // Populate h_R // ******************************************************************** @@ -54,8 +45,9 @@ void getQR(int const m, int const n, typename ViewTypeA::host_mirror_type const& // ******************************************************************** // Instantiate the m x m identity matrix h_I // ******************************************************************** - ViewTypeA I("I", m, m); - typename ViewTypeA::host_mirror_type h_I = Kokkos::create_mirror_view(I); + // ViewTypeA I("I", m, m); + // typename ViewTypeA::host_mirror_type h_I = Kokkos::create_mirror_view(I); + m_ViewTypeA h_I("host I", m, m); for (int i = 0; i < m; ++i) { h_I(i, i) = ScalarA(1.); } @@ -64,14 +56,11 @@ void getQR(int const m, int const n, typename ViewTypeA::host_mirror_type const& // Compute h_Q // ******************************************************************** int minMN(std::min(m, n)); - ViewTypeTau v("v", m); - typename ViewTypeTau::host_mirror_type h_v = Kokkos::create_mirror_view(v); + m_ViewTypeTau h_v("host v", m); - ViewTypeA Qk("Qk", m, m); - typename ViewTypeA::host_mirror_type h_Qk = Kokkos::create_mirror_view(Qk); + m_ViewTypeA h_Qk("host Qk", m, m); - ViewTypeA auxM("auxM", m, m); - typename ViewTypeA::host_mirror_type h_auxM = Kokkos::create_mirror_view(auxM); + m_ViewTypeA h_auxM("host auxM", m, m); // Q = H(0) H(1) . . . H(min(M,N)-1), where for k=0,1,...,min(m,n)-1: // H(k) = I - Tau(k) * v * v**H, and @@ -302,7 +291,21 @@ void impl_test_geqrf(int m, int n) { typename ViewTypeA::host_mirror_type h_R = Kokkos::create_mirror_view(R); typename ViewTypeA::host_mirror_type h_QR = Kokkos::create_mirror_view(QR); - getQR(m, n, h_A, h_tau, h_Q, h_R, h_QR); + using ViewTypeA_alias = + Kokkos::View, Kokkos::MemoryTraits>; + using ViewTypeTau_alias = + Kokkos::View, Kokkos::MemoryTraits>; + + ViewTypeA_alias ha_A(h_A.data(), h_A.extent(0), h_A.extent(1)); + ViewTypeA_alias ha_Q(h_Q.data(), h_Q.extent(0), h_Q.extent(1)); + ViewTypeA_alias ha_R(h_R.data(), h_R.extent(0), h_R.extent(1)); + ViewTypeA_alias ha_QR(h_QR.data(), h_QR.extent(0), h_QR.extent(1)); + + ViewTypeTau_alias ha_tau(h_tau.data(), h_tau.extent(0)); + + getQR(m, n, ha_A, ha_tau, ha_Q, ha_R, ha_QR); // ******************************************************************** // Check Q, R, and QR diff --git a/lapack/unit_test/Test_Lapack_svd.hpp b/lapack/unit_test/Test_Lapack_svd.hpp index bee19e5c10..46872f81f1 100644 --- a/lapack/unit_test/Test_Lapack_svd.hpp +++ b/lapack/unit_test/Test_Lapack_svd.hpp @@ -542,7 +542,7 @@ template int test_svd_wrapper() { #if defined(KOKKOSKERNELS_ENABLE_TPL_LAPACK) || defined(KOKKOSKERNELS_ENABLE_TPL_MKL) if constexpr (std::is_same_v) { - // Using a device side space with LAPACK/MKL + // Using a host side space with LAPACK/MKL return test_svd(); } #endif diff --git a/scripts/check_api_updates.py b/scripts/check_api_updates.py index 4962144eb5..fe70f1de18 100644 --- a/scripts/check_api_updates.py +++ b/scripts/check_api_updates.py @@ -56,6 +56,7 @@ ('blas/src/KokkosBlas3_trmm.hpp', ['blas3_trmm.rst']), ('blas/src/KokkosBlas3_trsm.hpp', ['blas3_trsm.rst']), ('lapack/src/KokkosLapack_geqrf.hpp', ['docs/source/API/lapack/geqrf.rst']), + ('lapack/src/KokkosLapack_gemqr.hpp', ['docs/source/API/lapack/gemqr.rst']), ('lapack/src/KokkosLapack_potrf.hpp', ['docs/source/API/lapack/potrf.rst']), ('lapack/src/KokkosLapack_gesv.hpp', ['docs/source/API/lapack/gesv.rst']), ('lapack/src/KokkosLapack_svd.hpp', ['docs/source/API/lapack/gesvd.rst']), diff --git a/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp index 4c48166639..d3e5c15046 100644 --- a/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp @@ -908,17 +908,15 @@ void spmv_bsr_rocsparse(const Kokkos::HIP& exec, Handle* handle, const char mode } \ }; -KOKKOSSPARSE_SPMV_ROCSPARSE(float, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace); -KOKKOSSPARSE_SPMV_ROCSPARSE(float, rocsparse_int, rocsparse_int, Kokkos::LayoutRight, Kokkos::HIPSpace); -KOKKOSSPARSE_SPMV_ROCSPARSE(double, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace); -KOKKOSSPARSE_SPMV_ROCSPARSE(double, rocsparse_int, rocsparse_int, Kokkos::LayoutRight, Kokkos::HIPSpace); -KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace); -KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, Kokkos::LayoutRight, - Kokkos::HIPSpace); -KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, - Kokkos::HIPSpace); +KOKKOSSPARSE_SPMV_ROCSPARSE(float, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_ROCSPARSE(float, rocsparse_int, rocsparse_int, Kokkos::LayoutRight, Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_ROCSPARSE(double, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_ROCSPARSE(double, rocsparse_int, rocsparse_int, Kokkos::LayoutRight, Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, Kokkos::LayoutRight, Kokkos::HIPSpace) +KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, Kokkos::LayoutLeft, Kokkos::HIPSpace) KOKKOSSPARSE_SPMV_ROCSPARSE(Kokkos::complex, rocsparse_int, rocsparse_int, Kokkos::LayoutRight, - Kokkos::HIPSpace); + Kokkos::HIPSpace) #undef KOKKOSSPARSE_SPMV_ROCSPARSE diff --git a/sparse/tpls/KokkosSparse_spmv_sellmatrix_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_sellmatrix_tpl_spec_decl.hpp index 1dfd6eb762..8d779c0320 100644 --- a/sparse/tpls/KokkosSparse_spmv_sellmatrix_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_sellmatrix_tpl_spec_decl.hpp @@ -162,9 +162,8 @@ template void spmv_sellmatrix_rocsparse(const Kokkos::HIP& exec, const char mode[], typename YVector::const_value_type& alpha, const AMatrix& A, const XVector& x, typename YVector::const_value_type& beta, const YVector& y) { - using offset_type = typename AMatrix::non_const_size_type; - using entry_type = typename AMatrix::non_const_ordinal_type; - using value_type = typename AMatrix::non_const_value_type; + using entry_type = typename AMatrix::non_const_ordinal_type; + using value_type = typename AMatrix::non_const_value_type; /* initialize rocsparse library */ rocsparse_handle rocsparseHandle = KokkosKernels::Impl::RocsparseSingleton::singleton().rocsparseHandle; @@ -175,8 +174,7 @@ void spmv_sellmatrix_rocsparse(const Kokkos::HIP& exec, const char mode[], typen rocsparse_operation myRocsparseOperation = mode_kk_to_rocsparse(mode); /* Set the index type */ - rocsparse_indextype offset_index_type = rocsparse_index_type(); - rocsparse_indextype entry_index_type = rocsparse_index_type(); + rocsparse_indextype entry_index_type = rocsparse_index_type(); /* Set the scalar type */ rocsparse_datatype compute_type = rocsparse_compute_type();