From 2ec14a6c1abf01db18edcad676a4cf0e49ffb642 Mon Sep 17 00:00:00 2001 From: dialecticDolt Date: Mon, 1 Nov 2021 13:57:05 -0500 Subject: [PATCH 1/8] Adding UNMQR and GEQRF wappers --- CHANGELOG.md | 7 + cm_generate_makefile.bash | 7 + cmake/KokkosKernels_config.h.in | 2 + cmake/Modules/FindTPLCUSOLVER.cmake | 18 + cmake/kokkoskernels_tpls.cmake | 4 + master_history.txt | 8 +- src/CMakeLists.txt | 36 + src/blas/KokkosBlas_geqrf.hpp | 173 +++ src/blas/KokkosBlas_unmqr.hpp | 257 ++++ src/blas/impl/KokkosBlas_geqrf_impl.hpp | 34 + src/blas/impl/KokkosBlas_geqrf_spec.hpp | 155 +++ src/blas/impl/KokkosBlas_unmqr_impl.hpp | 36 + src/blas/impl/KokkosBlas_unmqr_spec.hpp | 176 +++ .../KokkosBlas_geqrf_eti_spec_inst.cpp.in | 54 + ...sBlas_geqrf_workspace_eti_spec_inst.cpp.in | 54 + .../KokkosBlas_unmqr_eti_spec_inst.cpp.in | 54 + ...sBlas_unmqr_workspace_eti_spec_inst.cpp.in | 54 + .../KokkosBlas_geqrf_eti_spec_avail.hpp.in | 53 + .../KokkosBlas_geqrf_eti_spec_decl.hpp.in | 54 + ...Blas_geqrf_workspace_eti_spec_avail.hpp.in | 53 + ...sBlas_geqrf_workspace_eti_spec_decl.hpp.in | 54 + .../KokkosBlas_unmqr_eti_spec_avail.hpp.in | 53 + .../KokkosBlas_unmqr_eti_spec_decl.hpp.in | 54 + ...Blas_unmqr_workspace_eti_spec_avail.hpp.in | 53 + ...sBlas_unmqr_workspace_eti_spec_decl.hpp.in | 54 + src/impl/tpls/KokkosBlas_Cuda_tpl.hpp | 25 +- .../tpls/KokkosBlas_geqrf_tpl_spec_avail.hpp | 164 +++ .../tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp | 770 ++++++++++++ src/impl/tpls/KokkosBlas_tpl_spec.hpp | 68 + .../tpls/KokkosBlas_unmqr_tpl_spec_avail.hpp | 143 +++ .../tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp | 1089 +++++++++++++++++ src/impl/tpls/KokkosLapack_Host_tpl.cpp | 322 +++++ src/impl/tpls/KokkosLapack_Host_tpl.hpp | 45 + unit_test/blas/Test_Blas.hpp | 3 + unit_test/blas/Test_Blas_qr.hpp | 262 ++++ 35 files changed, 4446 insertions(+), 2 deletions(-) create mode 100644 cmake/Modules/FindTPLCUSOLVER.cmake create mode 100644 src/blas/KokkosBlas_geqrf.hpp create mode 100644 src/blas/KokkosBlas_unmqr.hpp create mode 100644 src/blas/impl/KokkosBlas_geqrf_impl.hpp create mode 100644 src/blas/impl/KokkosBlas_geqrf_spec.hpp create mode 100644 src/blas/impl/KokkosBlas_unmqr_impl.hpp create mode 100644 src/blas/impl/KokkosBlas_unmqr_spec.hpp create mode 100644 src/impl/generated_specializations_cpp/geqrf/KokkosBlas_geqrf_eti_spec_inst.cpp.in create mode 100644 src/impl/generated_specializations_cpp/geqrf/KokkosBlas_geqrf_workspace_eti_spec_inst.cpp.in create mode 100644 src/impl/generated_specializations_cpp/unmqr/KokkosBlas_unmqr_eti_spec_inst.cpp.in create mode 100644 src/impl/generated_specializations_cpp/unmqr/KokkosBlas_unmqr_workspace_eti_spec_inst.cpp.in create mode 100644 src/impl/generated_specializations_hpp/KokkosBlas_geqrf_eti_spec_avail.hpp.in create mode 100644 src/impl/generated_specializations_hpp/KokkosBlas_geqrf_eti_spec_decl.hpp.in create mode 100644 src/impl/generated_specializations_hpp/KokkosBlas_geqrf_workspace_eti_spec_avail.hpp.in create mode 100644 src/impl/generated_specializations_hpp/KokkosBlas_geqrf_workspace_eti_spec_decl.hpp.in create mode 100644 src/impl/generated_specializations_hpp/KokkosBlas_unmqr_eti_spec_avail.hpp.in create mode 100644 src/impl/generated_specializations_hpp/KokkosBlas_unmqr_eti_spec_decl.hpp.in create mode 100644 src/impl/generated_specializations_hpp/KokkosBlas_unmqr_workspace_eti_spec_avail.hpp.in create mode 100644 src/impl/generated_specializations_hpp/KokkosBlas_unmqr_workspace_eti_spec_decl.hpp.in create mode 100644 src/impl/tpls/KokkosBlas_geqrf_tpl_spec_avail.hpp create mode 100644 src/impl/tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp create mode 100644 src/impl/tpls/KokkosBlas_unmqr_tpl_spec_avail.hpp create mode 100644 src/impl/tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp create mode 100644 src/impl/tpls/KokkosLapack_Host_tpl.cpp create mode 100644 src/impl/tpls/KokkosLapack_Host_tpl.hpp create mode 100644 unit_test/blas/Test_Blas_qr.hpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 7abfc7b730..76de9db0d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -199,6 +199,13 @@ - Nightly test failure: spgemm unit tests failing on White \(Power8\) [\#780](https://github.com/kokkos/kokkos-kernels/issues/780) - supernodal does not build with UVM enabled [\#633](https://github.com/kokkos/kokkos-kernels/issues/633) +## [3.1.01](https://github.com/kokkos/kokkos-kernels/tree/3.1.01) (2020-05-04) +[Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/3.1.00...3.1.01) + +** Fixed bugs:** + +- KokkosBatched QR PR breaking nightly tests [\#691](https://github.com/kokkos/kokkos-kernels/issues/691) + ## [3.1.00](https://github.com/kokkos/kokkos-kernels/tree/3.1.00) (2020-04-14) [Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/3.0.00...3.1.00) diff --git a/cm_generate_makefile.bash b/cm_generate_makefile.bash index d633a139c8..3c5f784e5c 100755 --- a/cm_generate_makefile.bash +++ b/cm_generate_makefile.bash @@ -166,6 +166,7 @@ get_kernels_tpls_list() { KOKKOSKERNELS_USER_TPL_PATH_CMD= KOKKOSKERNELS_USER_TPL_LIBNAME_CMD= CUBLAS_DEFAULT=OFF + CUSOLVER_DEFAULT=OFF CUSPARSE_DEFAULT=OFF PARSE_TPLS_LIST=$(echo $KOKKOSKERNELS_TPLS | tr "," "\n") for TPLS_ in $PARSE_TPLS_LIST @@ -178,6 +179,9 @@ get_kernels_tpls_list() { if [ "$UC_TPLS" == "CUSPARSE" ]; then CUSPARSE_DEFAULT=ON fi + if [ "$UC_TPLS" == "CUSOLVER" ]; then + CUSOLVER_DEFAULT=ON + fi if [ "$UC_TPLS" == "BLAS" ]; then if [ "$BLAS_PATH" != "" ]; then echo User BLAS_PATH=$BLAS_PATH @@ -202,6 +206,9 @@ get_kernels_tpls_list() { if [ "$CUBLAS_DEFAULT" == "OFF" ]; then KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_CUBLAS=OFF ${KOKKOSKERNELS_TPLS_CMD}" fi + if [ "$CUSOLVER_DEFAULT" == "OFF" ]; then + KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_CUSOLVER=OFF ${KOKKOSKERNELS_TPLS_CMD}" + fi if [ "$CUSPARSE_DEFAULT" == "OFF" ]; then KOKKOSKERNELS_TPLS_CMD="-DKokkosKernels_ENABLE_TPL_CUSPARSE=OFF ${KOKKOSKERNELS_TPLS_CMD}" fi diff --git a/cmake/KokkosKernels_config.h.in b/cmake/KokkosKernels_config.h.in index aeb7a74efa..5e9f1e6c97 100644 --- a/cmake/KokkosKernels_config.h.in +++ b/cmake/KokkosKernels_config.h.in @@ -105,6 +105,8 @@ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_CUSPARSE /* CUBLAS */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_CUBLAS +/* CUSOLVER */ +#cmakedefine KOKKOSKERNELS_ENABLE_TPL_CUSOLVER /* MAGMA */ #cmakedefine KOKKOSKERNELS_ENABLE_TPL_MAGMA /* SuperLU */ diff --git a/cmake/Modules/FindTPLCUSOLVER.cmake b/cmake/Modules/FindTPLCUSOLVER.cmake new file mode 100644 index 0000000000..f521174de4 --- /dev/null +++ b/cmake/Modules/FindTPLCUSOLVER.cmake @@ -0,0 +1,18 @@ +FIND_PACKAGE(CUDA) + +INCLUDE(FindPackageHandleStandardArgs) +IF (NOT CUDA_FOUND) + #Important note here: this find Module is named TPLCUSOLVER + #The eventual target is named CUSOLVER. To avoid naming conflicts + #the find module is called TPLCUSOLVER. This call will cause + #the find_package call to fail in a "standard" CMake way + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUSOLVER REQUIRED_VARS CUDA_FOUND) +ELSE() + #The libraries might be empty - OR they might explicitly be not found + IF("${CUDA_CUSOLVER_LIBRARIES}" MATCHES "NOTFOUND") + FIND_PACKAGE_HANDLE_STANDARD_ARGS(TPLCUSOLVER REQUIRED_VARS CUDA_cusolver_LIBRARY) + ELSE() + KOKKOSKERNELS_CREATE_IMPORTED_TPL(CUSOLVER INTERFACE + LINK_LIBRARIES "${CUDA_cusolver_LIBRARY}") + ENDIF() +ENDIF() diff --git a/cmake/kokkoskernels_tpls.cmake b/cmake/kokkoskernels_tpls.cmake index 154b11c039..4725a42348 100644 --- a/cmake/kokkoskernels_tpls.cmake +++ b/cmake/kokkoskernels_tpls.cmake @@ -460,11 +460,14 @@ SET(CUSPARSE_DEFAULT ${KOKKOS_ENABLE_CUDA}) IF(KOKKOSKERNELS_NO_DEFAULT_CUDA_TPLS) SET(CUBLAS_DEFAULT OFF) SET(CUSPARSE_DEFAULT OFF) + SET(CUSOLVER_DEFAULT OFF) ENDIF() KOKKOSKERNELS_ADD_TPL_OPTION(CUBLAS ${CUBLAS_DEFAULT} "Whether to enable CUBLAS" DEFAULT_DOCSTRING "ON if CUDA-enabled Kokkos, otherwise OFF") KOKKOSKERNELS_ADD_TPL_OPTION(CUSPARSE ${CUSPARSE_DEFAULT} "Whether to enable CUSPARSE" DEFAULT_DOCSTRING "ON if CUDA-enabled Kokkos, otherwise OFF") +KOKKOSKERNELS_ADD_TPL_OPTION(CUSOLVER ${CUSOLVER_DEFAULT} "Whether to enable CUSOLVER" + DEFAULT_DOCSTRING "ON if CUDA-enabled Kokkos, otherwise OFF") KOKKOSKERNELS_ADD_OPTION(NO_DEFAULT_ROCM_TPLS OFF BOOL "Whether ROCM TPLs should be enabled by default. Default: OFF") # Unlike CUDA, ROCm does not automatically install these TPLs @@ -508,6 +511,7 @@ IF (NOT KOKKOSKERNELS_HAS_TRILINOS) KOKKOSKERNELS_IMPORT_TPL(MKL) KOKKOSKERNELS_IMPORT_TPL(CUBLAS) KOKKOSKERNELS_IMPORT_TPL(CUSPARSE) + KOKKOSKERNELS_IMPORT_TPL(CUSOLVER) KOKKOSKERNELS_IMPORT_TPL(CBLAS) KOKKOSKERNELS_IMPORT_TPL(LAPACKE) KOKKOSKERNELS_IMPORT_TPL(CHOLMOD) diff --git a/master_history.txt b/master_history.txt index 9ce9f32bb4..5c63ba453d 100644 --- a/master_history.txt +++ b/master_history.txt @@ -7,4 +7,10 @@ tag: 2.7.24 date: 11/05/2018 master: 1a7b524b develop: fab89e37 tag: 2.8.00 date: 02/05/2019 master: a6e05e06 develop: 6a790321 tag: 2.9.00 date: 06/24/2019 master: 4ee5f3c6 develop: 094da30c tag: 3.0.00 date: 01/31/2020 master: d86db111 release-candidate-3.0: cf24ab90 -tag: 3.1.00 date: 04/14/2020 master: f199f45d develop: 8d063eae +tag: 3.1.00 date: 04/14/2020 master: f199f45d develop: 8d063eae +tag: 3.1.01 date: 05/04/2020 master: 43773523 release: 6fce7502 +tag: 3.2.00 date: 08/19/2020 master: 07a60bcc release: ea3f2b77 +tag: 3.3.00 date: 12/16/2020 master: 42defc56 release: e5279e55 +tag: 3.3.01 date: 01/18/2021 master: f64b1c57 release: 4e1cc00b +tag: 3.4.00 date: 04/26/2021 master: fe439b21 release: d3c33910 +tag: 3.4.01 date: 05/20/2021 master: 564dccb3 release: 4c62eb86 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b9878df2aa..ffed6ac2a7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -52,6 +52,12 @@ IF (KOKKOSKERNELS_ENABLE_TPL_BLAS OR KOKKOSKERNELS_ENABLE_TPL_MKL) APPEND_GLOB(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/impl/tpls/KokkosBlas_Host_tpl.cpp) ENDIF() +#Include LAPACKE host wrapper +IF (KOKKOSKERNELS_ENABLE_TPL_LAPACKE) + #Do NOT add this to include path + APPEND_GLOB(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/impl/tpls/KokkosLapack_Host_tpl.cpp) +ENDIF() + include(kokkoskernels_eti.cmake) SET(ETI_HEADERS) @@ -283,6 +289,34 @@ KOKKOSKERNELS_GENERATE_ETI(Blas_trtri trtri TYPE_LISTS FLOATS LAYOUTS DEVICES ) +KOKKOSKERNELS_GENERATE_ETI(Blas_geqrf geqrf + COMPONENTS blas + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS LAYOUTS DEVICES +) + +KOKKOSKERNELS_GENERATE_ETI(Blas_geqrf_workspace geqrf + COMPONENTS blas + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS LAYOUTS DEVICES +) + +KOKKOSKERNELS_GENERATE_ETI(Blas_unmqr unmqr + COMPONENTS blas + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS LAYOUTS DEVICES +) + +KOKKOSKERNELS_GENERATE_ETI(Blas_unmqr_workspace unmqr + COMPONENTS blas + HEADER_LIST ETI_HEADERS + SOURCE_LIST SOURCES + TYPE_LISTS FLOATS LAYOUTS DEVICES +) + KOKKOSKERNELS_GENERATE_ETI(Sparse_sptrsv_solve sptrsv_solve COMPONENTS sparse HEADER_LIST ETI_HEADERS @@ -394,6 +428,7 @@ LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h) LIST(APPEND SOURCES batched/KokkosBatched_Util.cpp impl/tpls/KokkosBlas_Host_tpl.cpp + impl/tpls/KokkosLapack_Host_tpl.cpp impl/tpls/KokkosBlas_Cuda_tpl.cpp impl/tpls/KokkosBlas_Rocm_tpl.cpp impl/tpls/KokkosKernels_tpl_handles.cpp @@ -440,6 +475,7 @@ KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC SUPERLU) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CHOLMOD) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC MKL) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CUBLAS) +KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CUSOLVER) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC CUSPARSE) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC ROCBLAS) KOKKOSKERNELS_LINK_TPL(kokkoskernels PUBLIC ROCSPARSE) diff --git a/src/blas/KokkosBlas_geqrf.hpp b/src/blas/KokkosBlas_geqrf.hpp new file mode 100644 index 0000000000..7ab409754e --- /dev/null +++ b/src/blas/KokkosBlas_geqrf.hpp @@ -0,0 +1,173 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSBLAS_GEQRF_HPP_ +#define KOKKOSBLAS_GEQRF_HPP_ + +/// \file KokkosBlas_qeqrf.hpp + +#include "KokkosKernels_Macros.hpp" +#include "KokkosBlas_geqrf_spec.hpp" +#include "KokkosKernels_helpers.hpp" +#include +#include + +namespace KokkosBlas { + +/// \brief Compute the QR factorization of M x N matrix A. (geqrf) + +/// \tparam AViewType Input(A) / Output (Solution) M x N matrix , as a 2-D +/// Kokkos::View \tparam TauViewType Input k vector , as a 1-D Kokkos::View +/// \tparam WViewType Input Workspace, as a 1-D Kokkos::View +/// +/// \param A [in, out] Input matrix, as a 2-D Kokkos::View +/// On entry, M-by-N matrix +/// On exit, overwritten with the solution. +/// \param tau [in] Input vector, as a 1-D Kokkos::View. Scalar factors of +/// reflectors. \param workspace [in] Input vector, as a 1-D Kokkos::View. +/// Scratchspace for calculations. + +template +void geqrf(AViewType& A, TauViewType& tau, WViewType& workspace) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBlas::geqrf: A must be a Kokkos::View"); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBlas::geqrf: tau must be a Kokkos::View"); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBlas::geqrf: workspace must be a Kokkos::View"); + + static_assert(static_cast(AViewType::rank) == 2, + "KokkosBlas::geqrf: A must have rank 2"); + static_assert(static_cast(TauViewType::rank) == 1, + "KokkosBlas::geqrf: Tau must have rank 1"); + static_assert(static_cast(WViewType::rank) == 1, + "KokkosBlas::geqrf: Workspace must have rank 1"); + + int64_t A0 = A.extent(0); // M + int64_t A1 = A.extent(1); // N + int64_t minmn = (A0 < A1) ? A0 : A1; + + int64_t tau0 = tau.extent(0); + int64_t lwork = workspace.extent(0); + + // Check validity of Tau + if (tau0 < minmn) { + std::ostringstream os; + os << "KokkosBlas::geqrf: Dimensions of tau and MIN(M, N) do not match " + "(require len(tau) >= min(M, N) ): " + << "min(M, N): " << minmn << "Tau: " << tau0; + Kokkos::Impl::throw_runtime_exception(os.str()); + } + +#endif // KOKKOSKERNELS_DEBUG_LEVEL > 0 + + // return if degenerate matrix provided + if ((A.extent(0) == 0) || (A.extent(1) == 0)) return; + + // standardize particular View specializations + typedef Kokkos::View > + AVT; + + typedef Kokkos::View > + TVT; + + typedef Kokkos::View > + WVT; + + AVT A_i = A; + TVT tau_i = tau; + WVT W_i = workspace; + + typedef KokkosBlas::Impl::GEQRF impl_type; + impl_type::geqrf(A_i, tau_i, W_i); + +} // function geqrf + +template +int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { + // return if degenerate matrix provided + if ((A.extent(0) == 0) || (A.extent(1) == 0)) return 0; + + // standardize particular View specializations + typedef Kokkos::View > + AVT; + + typedef Kokkos::View > + TVT; + + AVT A_i = A; + TVT tau_i = tau; + + typedef KokkosBlas::Impl::GEQRF_WORKSPACE impl_type; + return impl_type::geqrf_workspace(A_i, tau_i); + +} // function geqrf_workspace + +template +void geqrf(AViewType& A, TauViewType& tau) { + int64_t lwork = geqrf_workspace(A, tau); + TauViewType workspace("KokkosBlas::temporary_geqrf_workspace", lwork); + geqrf(A, tau, workspace); + +} // function geqrf with temp workspace + +} // namespace KokkosBlas + +#endif // KOKKOSBLAS_GEQRF_HPP_ diff --git a/src/blas/KokkosBlas_unmqr.hpp b/src/blas/KokkosBlas_unmqr.hpp new file mode 100644 index 0000000000..a6fe78d3e2 --- /dev/null +++ b/src/blas/KokkosBlas_unmqr.hpp @@ -0,0 +1,257 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOSBLAS_UNMQR_HPP_ +#define KOKKOSBLAS_UNMQR_HPP_ + +/// \file KokkosBlas_unmqr.hpp + +#include "KokkosKernels_Macros.hpp" +#include "KokkosBlas_unmqr_spec.hpp" +#include "KokkosKernels_helpers.hpp" +#include +#include + +namespace KokkosBlas { + +/// \brief Multiply rectangular matrix C by Q or Q^H (where Q is the unitary +/// output of QR by geqrf or geqp3) + +/// \tparam AViewType Input matrix M-by-k matrix , as a 2-D Kokkos::View +/// \tparam CViewType Input (RHS)/Output (Solution) M-by-N matrix, as a 2-D +/// Kokkos::View \tparam TauViewType Input k vector , as a 1-D Kokkos::View +/// \tparam WViewType Input Workspace, as a 1-D Kokkos::View +/// +/// \param side [in] "L" or "l" indicates matrix Q is applied on the left of C +/// "R" or "r" indicates matrix Q is applied on the right of C +/// \param transpose [in] Specifies what op does to Q: +// "N" or "n" for non-transpose, +// "T" or "t" for transpose +/// \param k [in] Number of elementary reflectors that define Q +/// \param A [in] Input matrix, as a 2-D Kokkos::View, output of geqrf or +/// geqp3. \param tau [in] Input vector, as a 1-D Kokkos::View. Scalar factors +/// of reflectors. \param C [in,out] Input/Output matrix, as a 2-D Kokkos::View +/// On entry, M-by-N matrix +/// On exit, overwritten with the solution. +/// \param workspace [in] Input vector, as a 1-D Kokkos::View. Scratchspace for +/// calculations. + +template +void unmqr(const char side[], const char trans[], int k, AViewType& A, + TauViewType& tau, CViewType& C, WViewType& workspace) { +#if (KOKKOSKERNELS_DEBUG_LEVEL > 0) + static_assert(Kokkos::Impl::is_view::value, + "KokkosBlas::unmqr: A must be a Kokkos::View"); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBlas::unmqr: tau must be a Kokkos::View"); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBlas::unmqr: C must be a Kokkos::View"); + static_assert(Kokkos::Impl::is_view::value, + "KokkosBlas::unmqr: workspace must be a Kokkos::View"); + + static_assert(static_cast(AViewType::rank) == 2, + "KokkosBlas::unmqr: A must have rank 2"); + static_assert(static_cast(TauViewType::rank) == 1, + "KokkosBlas::unmqr: Tau must have rank 1"); + static_assert(static_cast(CViewType::rank) == 2, + "KokkosBlas::unmqr: C must have rank 2"); + static_assert(static_cast(WViewType::rank) == 1, + "KokkosBlas::unmqr: Workspace must have rank 1"); + + // Check validity of side argument + bool valid_side = (side[0] == 'L') || (side[0] == 'l') || (side[0] == 'R') || + (side[0] == 'r'); + + bool valid_trans = (trans[0] == 'T') || (trans[0] == 't') || + (trans[0] == 'C') || (trans[0] == 'c') || + (trans[0] == 'N') || (trans[0] == 'n'); + + if (!(valid_side)) { + std::ostringstream os; + os << "KokkosBlas::unmqr: side[0] = '" << side[0] << "'. " + << "Valid values include 'L' or 'l' (Left), 'R' or 'r' (Right)."; + Kokkos::Impl::throw_runtime_exception(os.str()); + } + if (!(valid_trans)) { + std::ostringstream os; + os << "KokkosBlas::unmqr: trans[0] = '" << trans[0] << "'. " + << "Valid values include 'T' or 't' (Transpose), 'N' or 'n' (No " + "transpose)."; + Kokkos::Impl::throw_runtime_exception(os.str()); + } + + int64_t A0 = A.extent(0); // M if 'L', N if 'R' + int64_t A1 = A.extent(1); // > k + int64_t C0 = C.extent(0); // M + int64_t C1 = C.extent(1); // N + int64_t tau0 = tau.extent(0); + + // Check validity of Tau + if (tau0 < k) { + std::ostringstream os; + os << "KokkosBlas::unmqr: Dimensions of tau and k do not match (require " + "len(tau) >=k ): " + << "k: " << k << "Tau: " << tau0; + Kokkos::Impl::throw_runtime_exception(os.str()); + } + + // Check validity of k + if ((side[0] == 'L') || (side[0] == 'l')) { + if ((k > C0) || (k < 0)) { + std::ostringstream os; + os << "KokkosBlas::unmqr: Number of reflectors k must not exceed M. " + << "M: " << C0 << " " + << "k: " << k; + Kokkos::Impl::throw_runtime_exception(os.str()); + } + if ((A0 != C0)) { + std::ostringstream os; + os << "KokkosBlas::unmqr: A must be of size M x k: " + << "A: " << A0 << " x " << A1 << " " + << "M: " << C0; + Kokkos::Impl::throw_runtime_exception(os.str()); + } + } else { + if ((k > C1) || (k < 0)) { + std::ostringstream os; + os << "KokkosBlas::unmqr: Number of reflectors k must not exceed N. " + << "N: " << C1 << " " + << "k: " << k; + Kokkos::Impl::throw_runtime_exception(os.str()); + } + if ((A0 != C1)) { + std::ostringstream os; + os << "KokkosBlas::unmqr: A must be of size N x k: " + << "A: " << A0 << " x " << A1 << " " + << "N: " << C1; + Kokkos::Impl::throw_runtime_exception(os.str()); + } + } +#endif // KOKKOSKERNELS_DEBUG_LEVEL > 0 + + // return if degenerate matrix provided + if ((A.extent(0) == 0) || (A.extent(1) == 0)) return; + if ((C.extent(0) == 0) || (C.extent(1) == 0)) return; + if ((k == 0)) return; + + // standardize particular View specializations + typedef Kokkos::View< + typename AViewType::const_value_type**, typename AViewType::array_layout, + typename AViewType::device_type, Kokkos::MemoryTraits > + AVT; + + typedef Kokkos::View > + TVT; + + typedef Kokkos::View > + CVT; + + typedef Kokkos::View > + WVT; + + AVT A_i = A; + TVT tau_i = tau; + CVT C_i = C; + WVT W_i = workspace; + + typedef KokkosBlas::Impl::UNMQR impl_type; + impl_type::unmqr(side[0], trans[0], k, A_i, tau_i, C_i, W_i); + +} // function unmqr + +template +int64_t unmqr_workspace(const char side[], const char trans[], int k, + AViewType& A, TauViewType& tau, CViewType& C) { + // return if degenerate matrix provided + if ((A.extent(0) == 0) || (A.extent(1) == 0)) return 0; + if ((C.extent(0) == 0) || (C.extent(1) == 0)) return 0; + if ((k == 0)) return 0; + + // standardize particular View specializations + typedef Kokkos::View< + typename AViewType::const_value_type**, typename AViewType::array_layout, + typename AViewType::device_type, Kokkos::MemoryTraits > + AVT; + + typedef Kokkos::View > + TVT; + + typedef Kokkos::View > + CVT; + + AVT A_i = A; + TVT tau_i = tau; + CVT C_i = C; + + typedef KokkosBlas::Impl::UNMQR_WORKSPACE impl_type; + return impl_type::unmqr_workspace(side[0], trans[0], k, A_i, tau_i, C_i); + +} // function unmqr_workspace + +template +void unmqr(const char side[], const char trans[], int k, AViewType& A, + TauViewType& tau, CViewType& C) { + int64_t lwork = unmqr_workspace(side, trans, k, A, tau, C); + TauViewType workspace("KokkosBlas::temporary_geqrf_workspace", lwork); + unmqr(side, trans, k, A, tau, C, workspace); +} // function unmqr with temp workspace + +} // namespace KokkosBlas + +#endif // KOKKOSBLAS_UNMQR_HPP_ \ No newline at end of file diff --git a/src/blas/impl/KokkosBlas_geqrf_impl.hpp b/src/blas/impl/KokkosBlas_geqrf_impl.hpp new file mode 100644 index 0000000000..6c8a8b9246 --- /dev/null +++ b/src/blas/impl/KokkosBlas_geqrf_impl.hpp @@ -0,0 +1,34 @@ +#ifndef KOKKOSBLAS_IMPL_GEQRF_HPP_ +#define KOKKOSBLAS_IMPL_GEQRF_HPP_ + +#include +#include +#include +#include +#include + +namespace KokkosBlas { +namespace Impl { +// Put non TPL implementation here + +template +void execute_geqrf(AVT& A, TVT& tau, WVT& C) { + std::ostringstream os; + os << "There is no ETI implementation of GEQRF. Compile with TPL (LAPACKE or " + "CUSOLVER).\n"; + Kokkos::Impl::throw_runtime_exception(os.str()); +} + +template +int64_t execute_geqrf_workspace(AVT& A, TVT& tau) { + std::ostringstream os; + os << "There is no ETI implementation of GEQRF (Workspace Query). Compile " + "with TPL (LAPACKE or CUSOLVER).\n"; + Kokkos::Impl::throw_runtime_exception(os.str()); + return 0; +} + +} // namespace Impl +} // namespace KokkosBlas + +#endif // KOKKOSBLAS_IMPL_GEQRF_HPP_ diff --git a/src/blas/impl/KokkosBlas_geqrf_spec.hpp b/src/blas/impl/KokkosBlas_geqrf_spec.hpp new file mode 100644 index 0000000000..e5dfef8ced --- /dev/null +++ b/src/blas/impl/KokkosBlas_geqrf_spec.hpp @@ -0,0 +1,155 @@ +#ifndef KOKKOSBLAS_GEQRF_SPEC_HPP_ +#define KOKKOSBLAS_GEQRF_SPEC_HPP_ + +#include "KokkosKernels_config.h" +#include "Kokkos_Core.hpp" + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#include "KokkosBlas_geqrf_impl.hpp" +#endif + +namespace KokkosBlas { +namespace Impl { + +template +struct geqrf_eti_spec_avail { + enum : bool { value = false }; +}; + +template +struct geqrf_workspace_eti_spec_avail { + enum : bool { value = false }; +}; + +} // namespace Impl +} // namespace KokkosBlas + +#define KOKKOSBLAS_GEQRF_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct geqrf_eti_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +#define KOKKOSBLAS_GEQRF_WORKSPACE_ETI_SPEC_AVAIL( \ + SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct geqrf_workspace_eti_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +#include +#include +#include + +namespace KokkosBlas { +namespace Impl { +// Unification Layer + +template ::value, + bool eti_spec_avail = geqrf_eti_spec_avail::value> +struct GEQRF { + static void geqrf(AVT& A, TVT& tau, WVT& workspace); +}; + +template ::value, + bool eti_spec_avail = geqrf_workspace_eti_spec_avail::value> +struct GEQRF_WORKSPACE { + static int64_t geqrf_workspace(AVT& A, TVT& tau); +}; + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +// specialization layer for no TPL +template +struct GEQRF { + static void geqrf(AVT& A, TVT& tau, WVT& workspace) { + execute_geqrf(A, tau, workspace); + } +}; +#endif + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +// specialization layer for no TPL +template +struct GEQRF_WORKSPACE { + static int64_t geqrf_workspace(AVT& A, TVT& tau) { + return execute_geqrf_workspace(A, tau); + } +}; +#endif + +} // namespace Impl +} // namespace KokkosBlas + +#define KOKKOSBLAS_GEQRF_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct GEQRF< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#define KOKKOSBLAS_GEQRF_WORKSPACE_ETI_SPEC_DECL( \ + SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct GEQRF_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#define KOKKOSBLAS_GEQRF_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct GEQRF< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#define KOKKOSBLAS_GEQRF_WORKSPACE_ETI_SPEC_INST( \ + SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct GEQRF_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#include +#include +#include + +#endif // KOKKOSBLAS_IMPL_GEQRF_HPP_ diff --git a/src/blas/impl/KokkosBlas_unmqr_impl.hpp b/src/blas/impl/KokkosBlas_unmqr_impl.hpp new file mode 100644 index 0000000000..63d7d425a8 --- /dev/null +++ b/src/blas/impl/KokkosBlas_unmqr_impl.hpp @@ -0,0 +1,36 @@ +#ifndef KOKKOSBLAS_IMPL_UNMQR_HPP_ +#define KOKKOSBLAS_IMPL_UNMQR_HPP_ + +#include +#include +#include +#include +#include + +namespace KokkosBlas { +namespace Impl { +// Put non TPL implementation here + +template +void execute_unmqr(char side, char trans, int k, AVT& A, TVT& tau, CVT& C, + WVT& workspace) { + std::ostringstream os; + os << "There is no ETI implementation of UNMQR. Compile with TPL (LAPACKE or " + "CUSOLVER).\n"; + Kokkos::Impl::throw_runtime_exception(os.str()); +} + +template +int64_t execute_unmqr_workspace(char side, char trans, int k, AVT& A, TVT& tau, + CVT& C) { + std::ostringstream os; + os << "There is no ETI implementation of UNMQR Workspace. Compile with TPL " + "(LAPACKE or CUSOLVER).\n"; + Kokkos::Impl::throw_runtime_exception(os.str()); + return 0; +} + +} // namespace Impl +} // namespace KokkosBlas + +#endif // KOKKOSBLAS_IMPL_UNMQR_HPP_ diff --git a/src/blas/impl/KokkosBlas_unmqr_spec.hpp b/src/blas/impl/KokkosBlas_unmqr_spec.hpp new file mode 100644 index 0000000000..dec49127f6 --- /dev/null +++ b/src/blas/impl/KokkosBlas_unmqr_spec.hpp @@ -0,0 +1,176 @@ +#ifndef KOKKOSBLAS_UNMQR_SPEC_HPP_ +#define KOKKOSBLAS_UNMQR_SPEC_HPP_ + +#include "KokkosKernels_config.h" +#include "Kokkos_Core.hpp" + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +#include "KokkosBlas_unmqr_impl.hpp" +#endif + +namespace KokkosBlas { +namespace Impl { + +template +struct unmqr_eti_spec_avail { + enum : bool { value = false }; +}; + +template +struct unmqr_workspace_eti_spec_avail { + enum : bool { value = false }; +}; + +} // namespace Impl +} // namespace KokkosBlas + +#define KOKKOSBLAS_UNMQR_ETI_SPEC_AVAIL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct unmqr_eti_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +#define KOKKOSBLAS_UNMQR_WORKSPACE_ETI_SPEC_AVAIL( \ + SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template <> \ + struct unmqr_workspace_eti_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +#include +#include +#include + +namespace KokkosBlas { +namespace Impl { +// Unification Layer + +template ::value, + bool eti_spec_avail = unmqr_eti_spec_avail::value> +struct UNMQR { + static void unmqr(const char side, const char trans, int k, AVT& A, TVT& tau, + CVT& C, WVT& workspace); +}; + +template < + class AVT, class TVT, class CVT, + bool tpl_spec_avail = unmqr_workspace_tpl_spec_avail::value, + bool eti_spec_avail = unmqr_workspace_eti_spec_avail::value> +struct UNMQR_WORKSPACE { + static int64_t unmqr_workspace(const char side, const char trans, int k, + AVT& A, TVT& tau, CVT& C); +}; + +#if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY +// specialization layer for no TPL +template +struct UNMQR { + static void unmqr(const char side, const char trans, int k, AVT& A, TVT& tau, + CVT& C, WVT& workspace) { + execute_unmqr(side, trans, k, A, tau, C, workspace); + } +}; + +template +struct UNMQR_WORKSPACE { + static int64_t unmqr_workspace(const char side, const char trans, int k, + AVT& A, TVT& tau, CVT& C) { + return execute_unmqr_workspace(side, trans, k, A, tau, C); + } +}; +#endif + +} // namespace Impl +} // namespace KokkosBlas + +#define KOKKOSBLAS_UNMQR_ETI_SPEC_DECL(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct UNMQR< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#define KOKKOSBLAS_UNMQR_WORKSPACE_ETI_SPEC_DECL( \ + SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + extern template struct UNMQR_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#define KOKKOSBLAS_UNMQR_ETI_SPEC_INST(SCALAR_TYPE, LAYOUT_TYPE, \ + EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct UNMQR< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#define KOKKOSBLAS_UNMQR_WORKSPACE_ETI_SPEC_INST( \ + SCALAR_TYPE, LAYOUT_TYPE, EXEC_SPACE_TYPE, MEM_SPACE_TYPE) \ + template struct UNMQR_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + false, true>; + +#include +#include +#include + +#endif // KOKKOSBLAS_IMPL_UNMQR_HPP_ diff --git a/src/impl/generated_specializations_cpp/geqrf/KokkosBlas_geqrf_eti_spec_inst.cpp.in b/src/impl/generated_specializations_cpp/geqrf/KokkosBlas_geqrf_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..dcb1871f08 --- /dev/null +++ b/src/impl/generated_specializations_cpp/geqrf/KokkosBlas_geqrf_eti_spec_inst.cpp.in @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" +#include "KokkosBlas_geqrf_spec.hpp" + +namespace KokkosBlas { +namespace Impl { +@BLAS_GEQRF_ETI_INST_BLOCK@ + } //IMPL +} //Kokkos diff --git a/src/impl/generated_specializations_cpp/geqrf/KokkosBlas_geqrf_workspace_eti_spec_inst.cpp.in b/src/impl/generated_specializations_cpp/geqrf/KokkosBlas_geqrf_workspace_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..dcb1871f08 --- /dev/null +++ b/src/impl/generated_specializations_cpp/geqrf/KokkosBlas_geqrf_workspace_eti_spec_inst.cpp.in @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" +#include "KokkosBlas_geqrf_spec.hpp" + +namespace KokkosBlas { +namespace Impl { +@BLAS_GEQRF_ETI_INST_BLOCK@ + } //IMPL +} //Kokkos diff --git a/src/impl/generated_specializations_cpp/unmqr/KokkosBlas_unmqr_eti_spec_inst.cpp.in b/src/impl/generated_specializations_cpp/unmqr/KokkosBlas_unmqr_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..b369d5d2ac --- /dev/null +++ b/src/impl/generated_specializations_cpp/unmqr/KokkosBlas_unmqr_eti_spec_inst.cpp.in @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" +#include "KokkosBlas_unmqr_spec.hpp" + +namespace KokkosBlas { +namespace Impl { +@BLAS_UNMQR_ETI_INST_BLOCK@ + } //IMPL +} //Kokkos diff --git a/src/impl/generated_specializations_cpp/unmqr/KokkosBlas_unmqr_workspace_eti_spec_inst.cpp.in b/src/impl/generated_specializations_cpp/unmqr/KokkosBlas_unmqr_workspace_eti_spec_inst.cpp.in new file mode 100644 index 0000000000..0e1b62a163 --- /dev/null +++ b/src/impl/generated_specializations_cpp/unmqr/KokkosBlas_unmqr_workspace_eti_spec_inst.cpp.in @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#define KOKKOSKERNELS_IMPL_COMPILE_LIBRARY true +#include "KokkosKernels_config.h" +#include "KokkosBlas_unmqr_spec.hpp" + +namespace KokkosBlas { +namespace Impl { +@BLAS_UNMQR_WORKSPACE_ETI_INST_BLOCK@ + } //IMPL +} //Kokkos diff --git a/src/impl/generated_specializations_hpp/KokkosBlas_geqrf_eti_spec_avail.hpp.in b/src/impl/generated_specializations_hpp/KokkosBlas_geqrf_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..b11228f1ba --- /dev/null +++ b/src/impl/generated_specializations_hpp/KokkosBlas_geqrf_eti_spec_avail.hpp.in @@ -0,0 +1,53 @@ +#ifndef KOKKOSBLAS_GEQRF_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSBLAS_GEQRF_ETI_SPEC_AVAIL_HPP_ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace KokkosBlas { +namespace Impl { + +@BLAS_GEQRF_ETI_AVAIL_BLOCK@ + +} // Impl +} // KokkosBlas +#endif // KOKKOSBLAS_GEQRF_ETI_SPEC_AVAIL_HPP_ diff --git a/src/impl/generated_specializations_hpp/KokkosBlas_geqrf_eti_spec_decl.hpp.in b/src/impl/generated_specializations_hpp/KokkosBlas_geqrf_eti_spec_decl.hpp.in new file mode 100644 index 0000000000..74941b4764 --- /dev/null +++ b/src/impl/generated_specializations_hpp/KokkosBlas_geqrf_eti_spec_decl.hpp.in @@ -0,0 +1,54 @@ +#ifndef KOKKOSBLAS_GEQRF_ETI_SPEC_DECL_HPP_ +#define KOKKOSBLAS_GEQRF_ETI_SPEC_DECL_HPP_ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace KokkosBlas { +namespace Impl { + +@BLAS_GEQRF_ETI_DECL_BLOCK@ + +} // Impl +} // KokkosBlas +#endif // KOKKOSBLAS_GEQRF_ETI_SPEC_DECL_HPP_ diff --git a/src/impl/generated_specializations_hpp/KokkosBlas_geqrf_workspace_eti_spec_avail.hpp.in b/src/impl/generated_specializations_hpp/KokkosBlas_geqrf_workspace_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..a547dabe9e --- /dev/null +++ b/src/impl/generated_specializations_hpp/KokkosBlas_geqrf_workspace_eti_spec_avail.hpp.in @@ -0,0 +1,53 @@ +#ifndef KOKKOSBLAS_GEQRF_WORKSPACE_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSBLAS_GEQRF_WORKSPACE_ETI_SPEC_AVAIL_HPP_ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace KokkosBlas { +namespace Impl { + +@BLAS_GEQRF_WORKSPACE_ETI_AVAIL_BLOCK@ + +} // Impl +} // KokkosBlas +#endif // KOKKOSBLAS_GEQRF_WORKSPACE_ETI_SPEC_AVAIL_HPP_ diff --git a/src/impl/generated_specializations_hpp/KokkosBlas_geqrf_workspace_eti_spec_decl.hpp.in b/src/impl/generated_specializations_hpp/KokkosBlas_geqrf_workspace_eti_spec_decl.hpp.in new file mode 100644 index 0000000000..0fd5e8c15b --- /dev/null +++ b/src/impl/generated_specializations_hpp/KokkosBlas_geqrf_workspace_eti_spec_decl.hpp.in @@ -0,0 +1,54 @@ +#ifndef KOKKOSBLAS_GEQRF_WORKSPACE_ETI_SPEC_DECL_HPP_ +#define KOKKOSBLAS_GEQRF_WORKSPACE_ETI_SPEC_DECL_HPP_ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace KokkosBlas { +namespace Impl { + +@BLAS_GEQRF_WORKSPACE_ETI_DECL_BLOCK@ + +} // Impl +} // KokkosBlas +#endif // KOKKOSBLAS_GEQRF_WORKSPACE_ETI_SPEC_DECL_HPP_ diff --git a/src/impl/generated_specializations_hpp/KokkosBlas_unmqr_eti_spec_avail.hpp.in b/src/impl/generated_specializations_hpp/KokkosBlas_unmqr_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..a939b9962f --- /dev/null +++ b/src/impl/generated_specializations_hpp/KokkosBlas_unmqr_eti_spec_avail.hpp.in @@ -0,0 +1,53 @@ +#ifndef KOKKOSBLAS_UNMQR_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSBLAS_UNMQR_ETI_SPEC_AVAIL_HPP_ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace KokkosBlas { +namespace Impl { + +@BLAS_UNMQR_ETI_AVAIL_BLOCK@ + +} // Impl +} // KokkosBlas +#endif // KOKKOSBLAS_UNMQR_ETI_SPEC_AVAIL_HPP_ diff --git a/src/impl/generated_specializations_hpp/KokkosBlas_unmqr_eti_spec_decl.hpp.in b/src/impl/generated_specializations_hpp/KokkosBlas_unmqr_eti_spec_decl.hpp.in new file mode 100644 index 0000000000..96df7b1f56 --- /dev/null +++ b/src/impl/generated_specializations_hpp/KokkosBlas_unmqr_eti_spec_decl.hpp.in @@ -0,0 +1,54 @@ +#ifndef KOKKOSBLAS_UNMQR_ETI_SPEC_DECL_HPP_ +#define KOKKOSBLAS_UNMQR_ETI_SPEC_DECL_HPP_ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace KokkosBlas { +namespace Impl { + +@BLAS_UNMQR_ETI_DECL_BLOCK@ + +} // Impl +} // KokkosBlas +#endif // KOKKOSBLAS_UNMQR_ETI_SPEC_DECL_HPP_ diff --git a/src/impl/generated_specializations_hpp/KokkosBlas_unmqr_workspace_eti_spec_avail.hpp.in b/src/impl/generated_specializations_hpp/KokkosBlas_unmqr_workspace_eti_spec_avail.hpp.in new file mode 100644 index 0000000000..531ac3a9af --- /dev/null +++ b/src/impl/generated_specializations_hpp/KokkosBlas_unmqr_workspace_eti_spec_avail.hpp.in @@ -0,0 +1,53 @@ +#ifndef KOKKOSBLAS_UNMQR_WORKSPACE_ETI_SPEC_AVAIL_HPP_ +#define KOKKOSBLAS_UNMQR_WORKSPACE_ETI_SPEC_AVAIL_HPP_ +/* +//@HEADER +// ************************************************************************ +// +// KokkosKernels 0.9: Linear Algebra and Graph Kernels +// Copyright 2017 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace KokkosBlas { +namespace Impl { + +@BLAS_UNMQR_WORKSPACE_ETI_AVAIL_BLOCK@ + +} // Impl +} // KokkosBlas +#endif // KOKKOSBLAS_UNMQR_WORKSPACE_ETI_SPEC_AVAIL_HPP_ diff --git a/src/impl/generated_specializations_hpp/KokkosBlas_unmqr_workspace_eti_spec_decl.hpp.in b/src/impl/generated_specializations_hpp/KokkosBlas_unmqr_workspace_eti_spec_decl.hpp.in new file mode 100644 index 0000000000..e3cfdc6fd6 --- /dev/null +++ b/src/impl/generated_specializations_hpp/KokkosBlas_unmqr_workspace_eti_spec_decl.hpp.in @@ -0,0 +1,54 @@ +#ifndef KOKKOSBLAS_UNMQR_WORKSPACE_ETI_SPEC_DECL_HPP_ +#define KOKKOSBLAS_UNMQR_WORKSPACE_ETI_SPEC_DECL_HPP_ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace KokkosBlas { +namespace Impl { + +@BLAS_UNMQR_WORKSPACE_ETI_DECL_BLOCK@ + +} // Impl +} // KokkosBlas +#endif // KOKKOSBLAS_UNMQR_WORKSPACE_ETI_SPEC_DECL_HPP_ diff --git a/src/impl/tpls/KokkosBlas_Cuda_tpl.hpp b/src/impl/tpls/KokkosBlas_Cuda_tpl.hpp index 9e0bff4c55..0a78a959a8 100644 --- a/src/impl/tpls/KokkosBlas_Cuda_tpl.hpp +++ b/src/impl/tpls/KokkosBlas_Cuda_tpl.hpp @@ -22,7 +22,29 @@ CudaBlasSingleton& CudaBlasSingleton::singleton() { } // namespace Impl } // namespace KokkosBlas -#endif // defined (KOKKOSKERNELS_ENABLE_TPL_CUBLAS) +#endif + +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) +#include + +namespace KokkosBlas { +namespace Impl { +CudaSolverSingleton::CudaSolverSingleton() { + auto stat = cusolverDnCreate(&handle); + if (stat != CUSOLVER_STATUS_SUCCESS) + Kokkos::abort("CUSOLVER initialization failed\n"); + + Kokkos::push_finalize_hook([&]() { cusolverDnDestroy(handle); }); +} + +CudaSolverSingleton& CudaSolverSingleton::singleton() { + static CudaSolverSingleton s; + return s; +} + +} // namespace Impl +} // namespace KokkosBlas +#endif // KOKKOS_KERNELS_ENABLE_TPL_CUSOLVER #if defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) #include @@ -44,6 +66,7 @@ MagmaSingleton& MagmaSingleton::singleton() { } // namespace Impl } // namespace KokkosBlas + #endif // defined(KOKKOSKERNELS_ENABLE_TPL_MAGMA) #endif // KOKKOSBLAS_CUDA_TPL_HPP_ diff --git a/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_avail.hpp b/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_avail.hpp new file mode 100644 index 0000000000..a543de4628 --- /dev/null +++ b/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_avail.hpp @@ -0,0 +1,164 @@ +#ifndef KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_HPP_ + +namespace KokkosBlas { +namespace Impl { + +template +struct geqrf_tpl_spec_avail { + enum : bool { value = false }; +}; + +template +struct geqrf_workspace_tpl_spec_avail { + enum : bool { value = false }; +}; + +#if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_LAPACKE) + +#define KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUTA, MEMSPACE) \ + template \ + struct geqrf_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +#define KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUTA, \ + MEMSPACE) \ + template \ + struct geqrf_workspace_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +#if defined(KOKKOSKERNELS_INST_DOUBLE) && defined(KOKKOSKERNELS_INST_LAYOUTLEFT) +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, + Kokkos::HostSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, + Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_FLOAT) && defined(KOKKOSKERNELS_INST_LAYOUTLEFT) +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, + Kokkos::HostSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, + Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) && \ + defined(KOKKOSKERNELS_INST_LAYOUTLEFT) +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::HostSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) && \ + defined(KOKKOSKERNELS_INST_LAYOUTLEFT) +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::HostSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) && \ + defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutRight, + Kokkos::HostSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutRight, + Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_FLOAT) && defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutRight, + Kokkos::HostSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutRight, + Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) && \ + defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutRight, + Kokkos::HostSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutRight, Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) && \ + defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutRight, + Kokkos::HostSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutRight, Kokkos::HostSpace) +#endif + +#endif // if BLAS && LAPACK + +// CUSOLVER +// +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) + +#define KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUTA, MEMSPACE) \ + template \ + struct geqrf_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +#define KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUTA, \ + MEMSPACE) \ + template \ + struct geqrf_workspace_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) + +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::CudaSpace) + +#endif // if CUBLAS && CUSOLVER + +} // namespace Impl +} // namespace KokkosBlas + +#endif // KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_HPP_ diff --git a/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp new file mode 100644 index 0000000000..4938c88236 --- /dev/null +++ b/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp @@ -0,0 +1,770 @@ +#ifndef KOKKOSBLAS_GEQRF_TPL_SPEC_DECL_HPP_ +#define KOKKOSBLAS_GEQRF_TPL_SPEC_DECL_HPP_ + +#if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_LAPACKE) +#include "KokkosBlas_Host_tpl.hpp" +#include "KokkosLapack_Host_tpl.hpp" +#include + +namespace KokkosBlas { +namespace Impl { + +// FUNCTION + +#define KOKKOSBLAS_DGEQRF_LAPACK(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEQRF< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void geqrf(AViewType& A, TauViewType& tau, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::geqrf[TPL_LAPACK, double]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int lwork = workspace.extent(0); \ + HostLapack::geqrf(A_is_lr, M, N, A.data(), LDA, tau.data(), \ + workspace.data(), lwork); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS_SGEQRF_LAPACK(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEQRF< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void geqrf(AViewType& A, TauViewType& tau, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK, float]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int lwork = workspace.extent(0); \ + HostLapack::geqrf(A_is_lr, M, N, A.data(), LDA, tau.data(), \ + workspace.data(), lwork); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS_ZGEQRF_LAPACK(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEQRF**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef std::complex S2; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + static void geqrf(AViewType& A, TauViewType& tau, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::geqrf[TPL_LAPACK, complex]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int lwork = workspace.extent(0); \ + HostLapack::geqrf(A_is_lr, M, N, reinterpret_cast(A.data()), \ + LDA, reinterpret_cast(tau.data()), \ + reinterpret_cast(workspace.data()), lwork); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS_CGEQRF_LAPACK(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEQRF**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef std::complex S2; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void geqrf(AViewType& A, TauViewType& tau, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::geqrf[TPL_LAPACK, complex]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int lwork = workspace.extent(0); \ + HostLapack::geqrf(A_is_lr, M, N, reinterpret_cast(A.data()), \ + LDA, reinterpret_cast(tau.data()), \ + reinterpret_cast(workspace.data()), lwork); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +// WORKSPACE QUERIES + +#define KOKKOSBLAS_DGEQRF_WORKSPACE_LAPACK(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEQRF_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + \ + static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::geqrf[TPL_LAPACK, double]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + int lwork = -1; \ + SCALAR query = 0; \ + HostLapack::geqrf(A_is_lr, M, N, A.data(), LDA, tau.data(), \ + &query, lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)query; \ + } \ + }; + +#define KOKKOSBLAS_SGEQRF_WORKSPACE_LAPACK(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEQRF_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + \ + static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::geqrf[TPL_LAPACK, float]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + int lwork = -1; \ + SCALAR query = 0; \ + HostLapack::geqrf(A_is_lr, M, N, A.data(), LDA, tau.data(), \ + &query, lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)query; \ + } \ + }; + +#define KOKKOSBLAS_ZGEQRF_WORKSPACE_LAPACK(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEQRF_WORKSPACE< \ + Kokkos::View**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef std::complex S2; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::geqrf[TPL_LAPACK, complex]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + int lwork = -1; \ + SCALAR query = 0; \ + HostLapack::geqrf(A_is_lr, M, N, reinterpret_cast(A.data()), \ + LDA, reinterpret_cast(tau.data()), \ + reinterpret_cast(&query), lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)query.real(); \ + } \ + }; + +#define KOKKOSBLAS_CGEQRF_WORKSPACE_LAPACK(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEQRF_WORKSPACE< \ + Kokkos::View**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef std::complex S2; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + \ + static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::geqrf[TPL_LAPACK, complex]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + int lwork = -1; \ + SCALAR query = 0; \ + HostLapack::geqrf(A_is_lr, M, N, reinterpret_cast(A.data()), \ + LDA, reinterpret_cast(tau.data()), \ + reinterpret_cast(&query), lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)query.real(); \ + } \ + }; + +KOKKOSBLAS_DGEQRF_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_DGEQRF_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_DGEQRF_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_DGEQRF_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS_SGEQRF_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_SGEQRF_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_SGEQRF_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_SGEQRF_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS_ZGEQRF_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_ZGEQRF_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_ZGEQRF_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_ZGEQRF_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS_CGEQRF_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_CGEQRF_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_CGEQRF_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_CGEQRF_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS_DGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_DGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_DGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_DGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, + false) + +KOKKOSBLAS_SGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_SGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_SGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_SGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, + false) + +KOKKOSBLAS_ZGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_ZGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_ZGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_ZGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, + false) + +KOKKOSBLAS_CGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_CGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_CGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_CGEQRF_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::HostSpace, + false) + +} // namespace Impl +} // namespace KokkosBlas + +#endif // ENABLE BLAS/LAPACK + +// CUSOLVER + +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#include + +namespace KokkosBlas { +namespace Impl { + +#define KOKKOSBLAS_DGEQRF_CUSOLVER(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEQRF< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void geqrf(AViewType& A, TauViewType& tau, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::geqrf[TPL_CUSOLVER, double]"); \ + int devinfo = 0; \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int lwork = workspace.extent(0); \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + cusolverDnDgeqrf(s.handle, M, N, A.data(), LDA, tau.data(), \ + workspace.data(), lwork, &devinfo); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS_SGEQRF_CUSOLVER(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEQRF< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void geqrf(AViewType& A, TauViewType& tau, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::geqrf[TPL_CUSOLVER, single]"); \ + int devinfo = 0; \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + const int lwork = workspace.extent(0); \ + cusolverDnSgeqrf(s.handle, M, N, A.data(), LDA, tau.data(), \ + workspace.data(), lwork, &devinfo); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS_ZGEQRF_CUSOLVER(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEQRF**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef double PRECISION; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void geqrf(AViewType& A, TauViewType& tau, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::geqrf[TPL_CUSOLVER, Kokkos::complex]"); \ + int devinfo = 0; \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + const int lwork = workspace.extent(0); \ + cusolverDnZgeqrf(s.handle, M, N, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(tau.data()), \ + reinterpret_cast(workspace.data()), \ + lwork, &devinfo); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS_CGEQRF_CUSOLVER(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEQRF**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef float PRECISION; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void geqrf(AViewType& A, TauViewType& tau, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::geqrf[TPL_CUSOLVER, Kokkos::complex]"); \ + int devinfo = 0; \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + int lwork = workspace.extent(0); \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + cusolverDnCgeqrf(s.handle, M, N, reinterpret_cast(A.data()), \ + LDA, reinterpret_cast(tau.data()), \ + reinterpret_cast(workspace.data()), lwork, \ + &devinfo); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +// WORKSPACE_QUERIES + +#define KOKKOSBLAS_DGEQRF_WORKSPACE_CUSOLVER(LAYOUTA, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct GEQRF_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + \ + static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::geqrf[TPL_CUSOLVER, double]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + int lwork = 0; \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + cusolverDnDgeqrf_bufferSize(s.handle, M, N, A.data(), LDA, &lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)lwork; \ + } \ + }; + +#define KOKKOSBLAS_SGEQRF_WORKSPACE_CUSOLVER(LAYOUTA, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct GEQRF_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + \ + static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::geqrf[TPL_CUSOLVER, single]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + int lwork = 0; \ + cusolverDnSgeqrf_bufferSize(s.handle, M, N, A.data(), LDA, &lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)lwork; \ + } \ + }; + +#define KOKKOSBLAS_ZGEQRF_WORKSPACE_CUSOLVER(LAYOUTA, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct GEQRF_WORKSPACE< \ + Kokkos::View**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef double PRECISION; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + \ + static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::geqrf[TPL_CUSOLVER, Kokkos::complex]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + int lwork = 0; \ + cusolverDnZgeqrf_bufferSize( \ + s.handle, M, N, reinterpret_cast(A.data()), LDA, \ + &lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)lwork; \ + } \ + }; + +#define KOKKOSBLAS_CGEQRF_WORKSPACE_CUSOLVER(LAYOUTA, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct GEQRF_WORKSPACE< \ + Kokkos::View**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef float PRECISION; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + \ + static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::geqrf[TPL_CUSOLVER, Kokkos::complex]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + int lwork = 0; \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + cusolverDnCgeqrf_bufferSize(s.handle, M, N, \ + reinterpret_cast(A.data()), LDA, \ + &lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)lwork; \ + } \ + }; + +KOKKOSBLAS_DGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS_DGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS_SGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS_SGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS_ZGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS_ZGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS_CGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS_CGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS_DGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, + true) +KOKKOSBLAS_DGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, + false) + +KOKKOSBLAS_SGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, + true) +KOKKOSBLAS_SGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, + false) + +KOKKOSBLAS_ZGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, + true) +KOKKOSBLAS_ZGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, + false) + +KOKKOSBLAS_CGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, + true) +KOKKOSBLAS_CGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, + false) + +} // namespace Impl +} // namespace KokkosBlas + +#endif // IF CUSOLVER && CUBLAS + +#endif // KOKKOSBLAS_GEQRF_TPL_SPEC_DECL_HPP_ diff --git a/src/impl/tpls/KokkosBlas_tpl_spec.hpp b/src/impl/tpls/KokkosBlas_tpl_spec.hpp index dd7632fd2d..1ebc30f48f 100644 --- a/src/impl/tpls/KokkosBlas_tpl_spec.hpp +++ b/src/impl/tpls/KokkosBlas_tpl_spec.hpp @@ -218,6 +218,74 @@ inline void rocblas_internal_safe_call(rocblas_status rocblasState, #endif // KOKKOSKERNELS_ENABLE_TPL_ROCBLAS +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) +#include "cuda_runtime.h" +#include "cublas_v2.h" +#include "cusolverDn.h" +#include "cusolver_common.h" + +namespace KokkosBlas { +namespace Impl { + +struct CudaSolverSingleton { + cusolverDnHandle_t handle; + + CudaSolverSingleton(); + + static CudaSolverSingleton& singleton(); +}; + +inline void cusolver_internal_error_throw(cusolverStatus_t cublasState, + const char* name, const char* file, + const int line) { + std::ostringstream out; + // out << name << " error( " << cublasGetStatusName(cublasState) + // << "): " << cublasGetStatusString(cublasState); + out << name << " error( "; + switch (cublasState) { + case CUSOLVER_STATUS_NOT_INITIALIZED: + out << "CUBLAS_STATUS_NOT_INITIALIZED): the library was not initialized."; + break; + case CUSOLVER_STATUS_ALLOC_FAILED: + out << "CUBLAS_STATUS_ALLOC_FAILED): the resource allocation failed."; + break; + case CUSOLVER_STATUS_INVALID_VALUE: + out << "CUBLAS_STATUS_INVALID_VALUE): an invalid numerical value was " + "used as an argument."; + break; + case CUSOLVER_STATUS_ARCH_MISMATCH: + out << "CUBLAS_STATUS_ARCH_MISMATCH): an absent device architectural " + "feature is required."; + break; + case CUSOLVER_STATUS_EXECUTION_FAILED: + out << "CUBLAS_STATUS_EXECUTION_FAILED): the GPU program failed to " + "execute."; + break; + case CUSOLVER_STATUS_INTERNAL_ERROR: + out << "CUBLAS_STATUS_INTERNAL_ERROR): an internal operation failed."; + break; + case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: + out << "CUBLAS_STATUS_NOT_SUPPORTED): the feature required is not " + "supported."; + break; + default: out << "unrecognized error code): this is bad!"; break; + } + if (file) { + out << " " << file << ":" << line; + } + throw std::runtime_error(out.str()); +} + +// The macro below defines the interface for the safe cusolver calls. +// The functions themselves are protected by impl namespace and this +// is not meant to be used by external application or libraries. +#define KOKKOS_CUSOLVER_SAFE_CALL_IMPL(call) \ + KokkosBlas::Impl::cusolver_internal_safe_call(call, #call, __FILE__, __LINE__) + +} // namespace Impl +} // namespace KokkosBlas +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER + // If LAPACK TPL is enabled, it is preferred over magma's LAPACK #ifdef KOKKOSKERNELS_ENABLE_TPL_MAGMA #include "magma_v2.h" diff --git a/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_avail.hpp b/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_avail.hpp new file mode 100644 index 0000000000..a25cc1a51d --- /dev/null +++ b/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_avail.hpp @@ -0,0 +1,143 @@ +#ifndef KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_HPP_ +#define KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_HPP_ + +namespace KokkosBlas { + namespace Impl { + + template + struct unmqr_tpl_spec_avail { + enum : bool {value = false}; + }; + + template + struct unmqr_workspace_tpl_spec_avail { + enum : bool {value = false}; + }; + + //Hostspace LAPACKE(netlib) or MKL + //TODO: Check if these have the same syntax + + #if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) && defined(KOKKOSKERNELS_ENABLE_TPL_LAPACKE) + + #define KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE) \ + template \ + struct unmqr_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > \ + > {enum : bool {value = true}; }; + + #define KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE) \ + template \ + struct unmqr_workspace_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > \ + > {enum : bool {value = true}; }; + + #if defined (KOKKOSKERNELS_INST_DOUBLE)\ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) + KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) + KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) + #endif + + #if defined (KOKKOSKERNELS_INST_FLOAT) \ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) + KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) + KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) + #endif + + #if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) \ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) + KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) + KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) + #endif + + #if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) \ + && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) + KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) + KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) + #endif + + #if defined (KOKKOSKERNELS_INST_DOUBLE)\ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) + KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) + KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) + #endif + + #if defined (KOKKOSKERNELS_INST_FLOAT) \ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) + KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) + KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) + #endif + + #if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) \ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) + KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) + KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) + #endif + + #if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) \ + && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) + KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) + KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) + #endif + + #endif //if BLAS && LAPACK + + + //CUSOLVER + // + #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) + + #define KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE) \ + template \ + struct unmqr_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > \ + > {enum : bool {value = true}; }; + + + #define KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE) \ + template \ + struct unmqr_workspace_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > \ + > {enum : bool {value = true}; }; + + KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) + KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) + + KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) + KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) + + KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) + KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) + + KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) + KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) + + #endif //if CUBLAS && CUSOLVER + + } //namespace Impl +} //namespace KokkosBlas + +#endif // KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_HPP_ diff --git a/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp new file mode 100644 index 0000000000..e2dfc66a9e --- /dev/null +++ b/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp @@ -0,0 +1,1089 @@ +#ifndef KOKKOSBLAS_UNMQR_TPL_SPEC_DECL_HPP_ +#define KOKKOSBLAS_UNMQR_TPL_SPEC_DECL_HPP_ + +#if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_LAPACKE) +#include "KokkosBlas_Host_tpl.hpp" +#include "KokkosLapack_Host_tpl.hpp" +#include + +namespace KokkosBlas { +namespace Impl { + +#define KOKKOSBLAS_DUNMQR_LAPACK(LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct UNMQR< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void unmqr(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::unmqr[TPL_LAPACK, double]"); \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + const int lwork = workspace.extent(0); \ + HostLapack::unmqr(A_is_lr, side, trans, M, N, k, A.data(), LDA, \ + tau.data(), C.data(), LDC, workspace.data(), \ + lwork); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS_SUNMQR_LAPACK(LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct UNMQR< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void unmqr(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::unmqr[TPL_LAPACK, float]"); \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + const int lwork = workspace.extent(0); \ + char ctrans = (side == 'T' || side == 't') ? 'C' : side; \ + HostLapack::unmqr(A_is_lr, side, trans, M, N, k, A.data(), LDA, \ + tau.data(), C.data(), LDC, workspace.data(), \ + lwork); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS_ZUNMQR_LAPACK(LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct UNMQR**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTB, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUTC, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTB, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef std::complex S2; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + static void unmqr(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::unmqr[TPL_LAPACK, complex]"); \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + const int lwork = workspace.extent(0); \ + char ctrans = (trans == 'T' || trans == 't') ? 'C' : trans; \ + HostLapack::unmqr(A_is_lr, side, ctrans, M, N, k, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(tau.data()), \ + reinterpret_cast(C.data()), LDC, \ + reinterpret_cast(workspace.data()), lwork); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS_CUNMQR_LAPACK(LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct UNMQR**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTB, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUTC, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTB, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef std::complex S2; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void unmqr(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::unmqr[TPL_LAPACK, complex]"); \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + const int lwork = workspace.extent(0); \ + char ctrans = (trans == 'T' || trans == 't') ? 'C' : trans; \ + HostLapack::unmqr(A_is_lr, side, ctrans, M, N, k, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(tau.data()), \ + reinterpret_cast(C.data()), LDC, \ + reinterpret_cast(workspace.data()), lwork); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +// WORKSPACE QUERIES + +#define KOKKOSBLAS_DUNMQR_WORKSPACE_LAPACK(LAYOUTA, LAYOUTB, LAYOUTC, \ + MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct UNMQR_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + \ + static int64_t unmqr_workspace(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::unmqr[TPL_LAPACK, double]"); \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + int lwork = -1; \ + SCALAR query = 0; \ + HostLapack::unmqr(A_is_lr, side, trans, M, N, k, A.data(), LDA, \ + tau.data(), C.data(), LDC, &query, lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)query; \ + } \ + }; + +#define KOKKOSBLAS_SUNMQR_WORKSPACE_LAPACK(LAYOUTA, LAYOUTB, LAYOUTC, \ + MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct UNMQR_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + \ + static int64_t unmqr_workspace(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C) { \ + Kokkos::Profiling::pushRegion("KokkosLapack::unmqr[TPL_LAPACK, float]"); \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + int lwork = -1; \ + SCALAR query = 0; \ + HostLapack::unmqr(A_is_lr, side, trans, M, N, k, A.data(), LDA, \ + tau.data(), C.data(), LDC, &query, lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)query; \ + } \ + }; + +#define KOKKOSBLAS_ZUNMQR_WORKSPACE_LAPACK(LAYOUTA, LAYOUTB, LAYOUTC, \ + MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct UNMQR_WORKSPACE< \ + Kokkos::View**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTB, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUTC, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef std::complex S2; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + static int64_t unmqr_workspace(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::unmqr[TPL_LAPACK, complex]"); \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + int lwork = -1; \ + SCALAR query = 0; \ + char ctrans = (trans == 'T' || trans == 't') ? 'C' : trans; \ + HostLapack::unmqr(A_is_lr, side, ctrans, M, N, k, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(tau.data()), \ + reinterpret_cast(C.data()), LDC, \ + reinterpret_cast(&query), lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)query.real(); \ + } \ + }; + +#define KOKKOSBLAS_CUNMQR_WORKSPACE_LAPACK(LAYOUTA, LAYOUTB, LAYOUTC, \ + MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct UNMQR_WORKSPACE< \ + Kokkos::View**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTB, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUTC, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef std::complex S2; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + \ + static int64_t unmqr_workspace(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosLapack::unmqr[TPL_LAPACK, complex]"); \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + int lwork = -1; \ + SCALAR query = 0; \ + char ctrans = (trans == 'T' || trans == 't') ? 'C' : trans; \ + HostLapack::unmqr(A_is_lr, side, ctrans, M, N, k, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(tau.data()), \ + reinterpret_cast(C.data()), LDC, \ + reinterpret_cast(&query), lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)query.real(); \ + } \ + }; + +KOKKOSBLAS_DUNMQR_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_DUNMQR_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_DUNMQR_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_DUNMQR_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS_SUNMQR_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_SUNMQR_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_SUNMQR_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_SUNMQR_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS_ZUNMQR_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_ZUNMQR_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_ZUNMQR_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_ZUNMQR_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS_CUNMQR_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_CUNMQR_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_CUNMQR_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_CUNMQR_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, false) + +KOKKOSBLAS_DUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_DUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_DUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_DUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, + false) + +KOKKOSBLAS_SUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_SUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_SUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_SUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, + false) + +KOKKOSBLAS_ZUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_ZUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_ZUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_ZUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, + false) + +KOKKOSBLAS_CUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, true) +KOKKOSBLAS_CUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace, false) +KOKKOSBLAS_CUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, true) +KOKKOSBLAS_CUNMQR_WORKSPACE_LAPACK(Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace, + false) + +} // namespace Impl +} // namespace KokkosBlas + +#endif // ENABLE BLAS/LAPACK + +// CUSOLVER + +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSOLVER +#include + +namespace KokkosBlas { +namespace Impl { + +#define KOKKOSBLAS_DUNMQR_CUSOLVER(LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct UNMQR< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void unmqr(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::unmqr[TPL_CUSOLVER, double]"); \ + int devinfo = 0; \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + cublasSideMode_t m_side = \ + (side == 'L' || side == 'l') ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT; \ + cublasOperation_t m_trans = \ + (trans == 'T' || trans == 't' || trans == 'C' || trans == 'c') \ + ? CUBLAS_OP_T \ + : CUBLAS_OP_N; \ + const int lwork = workspace.extent(0); \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + cusolverDnDormqr(s.handle, m_side, m_trans, M, N, k, A.data(), LDA, \ + tau.data(), C.data(), LDC, workspace.data(), lwork, \ + &devinfo); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS_SUNMQR_CUSOLVER(LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct UNMQR< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void unmqr(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::unmqr[TPL_CUSOLVER, single]"); \ + int devinfo = 0; \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + cublasSideMode_t m_side = \ + (side == 'L' || side == 'l') ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT; \ + cublasOperation_t m_trans = \ + (trans == 'T' || trans == 't' || trans == 'C' || trans == 'c') \ + ? CUBLAS_OP_T \ + : CUBLAS_OP_N; \ + const int lwork = workspace.extent(0); \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + cusolverDnSormqr(s.handle, m_side, m_trans, M, N, k, A.data(), LDA, \ + tau.data(), C.data(), LDC, workspace.data(), lwork, \ + &devinfo); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS_ZUNMQR_CUSOLVER(LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct UNMQR**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTB, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUTC, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTB, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef double PRECISION; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void unmqr(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::unmqr[TPL_CUSOLVER, Kokkos::complex]"); \ + int devinfo = 0; \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + cublasSideMode_t m_side = \ + (side == 'L' || side == 'l') ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT; \ + cublasOperation_t m_trans = \ + (trans == 'T' || trans == 't' || trans == 'C' || trans == 'c') \ + ? CUBLAS_OP_C \ + : CUBLAS_OP_N; \ + const int lwork = workspace.extent(0); \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + cusolverDnZunmqr(s.handle, m_side, m_trans, M, N, k, \ + reinterpret_cast(A.data()), \ + LDA, \ + reinterpret_cast(tau.data()), \ + reinterpret_cast(C.data()), LDC, \ + reinterpret_cast(workspace.data()), \ + lwork, &devinfo); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +#define KOKKOSBLAS_CUNMQR_CUSOLVER(LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct UNMQR**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTB, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUTC, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTB, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef float PRECISION; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void unmqr(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::unmqr[TPL_CUSOLVER, Kokkos::complex]"); \ + int devinfo = 0; \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + cublasSideMode_t m_side = \ + (side == 'L' || side == 'l') ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT; \ + cublasOperation_t m_trans = \ + (trans == 'T' || trans == 't' || trans == 'C' || trans == 'c') \ + ? CUBLAS_OP_C \ + : CUBLAS_OP_N; \ + const int lwork = workspace.extent(0); \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + cusolverDnCunmqr(s.handle, m_side, m_trans, M, N, k, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(tau.data()), \ + reinterpret_cast(C.data()), LDC, \ + reinterpret_cast(workspace.data()), lwork, \ + &devinfo); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +// WORKSPACE QUERIES + +#define KOKKOSBLAS_DUNMQR_WORKSPACE_CUSOLVER(LAYOUTA, LAYOUTB, LAYOUTC, \ + MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct UNMQR_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + \ + static int64_t unmqr_workspace(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::unmqr[TPL_CUSOLVER, double]"); \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + cublasSideMode_t m_side = \ + (side == 'L' || side == 'l') ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT; \ + cublasOperation_t m_trans = \ + (trans == 'T' || trans == 't' || trans == 'C' || trans == 'c') \ + ? CUBLAS_OP_T \ + : CUBLAS_OP_N; \ + int lwork = 0; \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + cusolverDnDormqr_bufferSize(s.handle, m_side, m_trans, M, N, k, \ + A.data(), LDA, tau.data(), C.data(), LDC, \ + &lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)lwork; \ + } \ + }; + +#define KOKKOSBLAS_SUNMQR_WORKSPACE_CUSOLVER(LAYOUTA, LAYOUTB, LAYOUTC, \ + MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct UNMQR_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + \ + static int64_t unmqr_workspace(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::unmqr[TPL_CUSOLVER, single]"); \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + cublasSideMode_t m_side = \ + (side == 'L' || side == 'l') ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT; \ + cublasOperation_t m_trans = \ + (trans == 'T' || trans == 't' || trans == 'C' || trans == 'c') \ + ? CUBLAS_OP_T \ + : CUBLAS_OP_N; \ + int lwork = 0; \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + cusolverDnSormqr_bufferSize(s.handle, m_side, m_trans, M, N, k, \ + A.data(), LDA, tau.data(), C.data(), LDC, \ + &lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)lwork; \ + } \ + }; + +#define KOKKOSBLAS_ZUNMQR_WORKSPACE_CUSOLVER(LAYOUTA, LAYOUTB, LAYOUTC, \ + MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct UNMQR_WORKSPACE< \ + Kokkos::View**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTB, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUTC, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef double PRECISION; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + \ + static int64_t unmqr_workspace(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::unmqr[TPL_CUSOLVER, Kokkos::complex]"); \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + cublasSideMode_t m_side = \ + (side == 'L' || side == 'l') ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT; \ + cublasOperation_t m_trans = \ + (trans == 'T' || trans == 't' || trans == 'C' || trans == 'c') \ + ? CUBLAS_OP_C \ + : CUBLAS_OP_N; \ + int lwork = 0; \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + cusolverDnZunmqr_bufferSize( \ + s.handle, m_side, m_trans, M, N, k, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(tau.data()), \ + reinterpret_cast(C.data()), LDC, &lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)lwork; \ + } \ + }; + +#define KOKKOSBLAS_CUNMQR_WORKSPACE_CUSOLVER(LAYOUTA, LAYOUTB, LAYOUTC, \ + MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct UNMQR_WORKSPACE< \ + Kokkos::View**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTB, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View**, LAYOUTC, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef float PRECISION; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + CViewType; \ + \ + static int64_t unmqr_workspace(char side, char trans, int k, AViewType& A, \ + TauViewType& tau, CViewType& C) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::unmqr[TPL_CUSOLVER, Kokkos::complex]"); \ + int M = C.extent(0); \ + int N = C.extent(1); \ + bool A_is_lr = std::is_same::value; \ + bool C_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + const int CST = C_is_lr ? C.stride(0) : C.stride(1), \ + LDC = CST == 0 ? 1 : CST; \ + cublasSideMode_t m_side = \ + (side == 'L' || side == 'l') ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT; \ + cublasOperation_t m_trans = \ + (trans == 'T' || trans == 't' || trans == 'C' || trans == 'c') \ + ? CUBLAS_OP_C \ + : CUBLAS_OP_N; \ + int lwork = 0; \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + cusolverDnCunmqr_bufferSize( \ + s.handle, m_side, m_trans, M, N, k, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(tau.data()), \ + reinterpret_cast(C.data()), LDC, &lwork); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)lwork; \ + } \ + }; + +KOKKOSBLAS_DUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS_DUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS_SUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS_SUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS_ZUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS_ZUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS_CUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, true) +KOKKOSBLAS_CUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, false) + +KOKKOSBLAS_DUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, + true) +KOKKOSBLAS_DUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, + false) + +KOKKOSBLAS_SUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, + true) +KOKKOSBLAS_SUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, + false) + +KOKKOSBLAS_ZUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, + true) +KOKKOSBLAS_ZUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, + false) + +KOKKOSBLAS_CUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, + true) +KOKKOSBLAS_CUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace, + false) + +} // namespace Impl +} // namespace KokkosBlas + +#endif // IF CUSOLVER && CUBLAS + +#endif // KOKKOSBLAS_UNMQR_TPL_SPEC_DECL_HPP_ diff --git a/src/impl/tpls/KokkosLapack_Host_tpl.cpp b/src/impl/tpls/KokkosLapack_Host_tpl.cpp new file mode 100644 index 0000000000..f133afac1a --- /dev/null +++ b/src/impl/tpls/KokkosLapack_Host_tpl.cpp @@ -0,0 +1,322 @@ +#ifndef LAPACK_HOST_TPL_CPP +#define LAPACK_HOST_TPL_CPP + +#include "KokkosKernels_config.h" + +#if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_LAPACKE) + +#include "KokkosBlas_Host_tpl.hpp" +#include "KokkosLapack_Host_tpl.hpp" +#include +#include + +namespace KokkosBlas { +namespace Impl { + +// float + +template <> +void HostLapack::geqp3(bool matrix_layout, int m, int n, float* a, + int lda, int* jpvt, float* tau) { + if (matrix_layout) { + LAPACKE_sgeqp3(LAPACK_ROW_MAJOR, m, n, a, lda, jpvt, tau); + } else { + LAPACKE_sgeqp3(LAPACK_COL_MAJOR, m, n, a, lda, jpvt, tau); + } +} + +template <> +void HostLapack::geqrf(bool matrix_layout, int m, int n, float* a, + int lda, float* tau, float* work, int lwork) { + if (matrix_layout) { + LAPACKE_sgeqrf_work(LAPACK_ROW_MAJOR, m, n, a, lda, tau, work, lwork); + } else { + LAPACKE_sgeqrf_work(LAPACK_COL_MAJOR, m, n, a, lda, tau, work, lwork); + } +} + +template <> +void HostLapack::unmqr(bool matrix_layout, char side, char trans, int m, + int n, int k, const float* a, int lda, + const float* tau, float* c, int ldc, float* work, + int lwork) { + if (matrix_layout) { + LAPACKE_sormqr_work(LAPACK_ROW_MAJOR, side, trans, m, n, k, a, lda, tau, c, + ldc, work, lwork); + } else { + LAPACKE_sormqr_work(LAPACK_COL_MAJOR, side, trans, m, n, k, a, lda, tau, c, + ldc, work, lwork); + } +} + +template <> +void HostLapack::ormqr(bool matrix_layout, char side, char trans, int m, + int n, int k, const float* a, int lda, + const float* tau, float* c, int ldc, float* work, + int lwork) { + if (matrix_layout) { + LAPACKE_sormqr_work(LAPACK_ROW_MAJOR, side, trans, m, n, k, a, lda, tau, c, + ldc, work, lwork); + } else { + LAPACKE_sormqr_work(LAPACK_COL_MAJOR, side, trans, m, n, k, a, lda, tau, c, + ldc, work, lwork); + } +} + +template <> +void HostLapack::potrf(bool matrix_layout, char uplo, int n, float* a, + int lda) { + if (matrix_layout) { + LAPACKE_spotrf(LAPACK_ROW_MAJOR, uplo, n, a, lda); + } else { + LAPACKE_spotrf(LAPACK_COL_MAJOR, uplo, n, a, lda); + } +} + +// double + +template <> +void HostLapack::geqp3(bool matrix_layout, int m, int n, double* a, + int lda, int* jpvt, double* tau) { + if (matrix_layout) { + LAPACKE_dgeqp3(LAPACK_ROW_MAJOR, m, n, a, lda, jpvt, tau); + } else { + LAPACKE_dgeqp3(LAPACK_COL_MAJOR, m, n, a, lda, jpvt, tau); + } +} + +template <> +void HostLapack::geqrf(bool matrix_layout, int m, int n, double* a, + int lda, double* tau, double* work, int lwork) { + if (matrix_layout) { + LAPACKE_dgeqrf_work(LAPACK_ROW_MAJOR, m, n, a, lda, tau, work, lwork); + } else { + LAPACKE_dgeqrf_work(LAPACK_COL_MAJOR, m, n, a, lda, tau, work, lwork); + } +} + +template <> +void HostLapack::unmqr(bool matrix_layout, char side, char trans, int m, + int n, int k, const double* a, int lda, + const double* tau, double* c, int ldc, + double* work, int lwork) { + if (matrix_layout) { + LAPACKE_dormqr_work(LAPACK_ROW_MAJOR, side, trans, m, n, k, a, lda, tau, c, + ldc, work, lwork); + } else { + LAPACKE_dormqr_work(LAPACK_COL_MAJOR, side, trans, m, n, k, a, lda, tau, c, + ldc, work, lwork); + } +} + +template <> +void HostLapack::ormqr(bool matrix_layout, char side, char trans, int m, + int n, int k, const double* a, int lda, + const double* tau, double* c, int ldc, + double* work, int lwork) { + if (matrix_layout) { + LAPACKE_dormqr_work(LAPACK_ROW_MAJOR, side, trans, m, n, k, a, lda, tau, c, + ldc, work, lwork); + } else { + LAPACKE_dormqr_work(LAPACK_ROW_MAJOR, side, trans, m, n, k, a, lda, tau, c, + ldc, work, lwork); + } +} + +template <> +void HostLapack::potrf(bool matrix_layout, char uplo, int n, double* a, + int lda) { + if (matrix_layout) { + LAPACKE_dpotrf(LAPACK_ROW_MAJOR, uplo, n, a, lda); + } else { + LAPACKE_dpotrf(LAPACK_COL_MAJOR, uplo, n, a, lda); + } +} + +// std::complex + +template <> +void HostLapack>::geqp3(bool matrix_layout, int m, int n, + std::complex* a, int lda, + int* jpvt, + std::complex* tau) { + if (matrix_layout) { + LAPACKE_cgeqp3(LAPACK_ROW_MAJOR, m, n, + reinterpret_cast<__complex__ float*>(a), lda, jpvt, + reinterpret_cast<__complex__ float*>(tau)); + } else { + LAPACKE_cgeqp3(LAPACK_COL_MAJOR, m, n, + reinterpret_cast<__complex__ float*>(a), lda, jpvt, + reinterpret_cast<__complex__ float*>(tau)); + } +} + +template <> +void HostLapack>::geqrf(bool matrix_layout, int m, int n, + std::complex* a, int lda, + std::complex* tau, + std::complex* work, + int lwork) { + if (matrix_layout) { + LAPACKE_cgeqrf_work(LAPACK_ROW_MAJOR, m, n, + reinterpret_cast<__complex__ float*>(a), lda, + reinterpret_cast<__complex__ float*>(tau), + reinterpret_cast<__complex__ float*>(work), lwork); + } else { + LAPACKE_cgeqrf_work(LAPACK_COL_MAJOR, m, n, + reinterpret_cast<__complex__ float*>(a), lda, + reinterpret_cast<__complex__ float*>(tau), + reinterpret_cast<__complex__ float*>(work), lwork); + } +} + +template <> +void HostLapack>::unmqr( + bool matrix_layout, char side, char trans, int m, int n, int k, + const std::complex* a, int lda, const std::complex* tau, + std::complex* c, int ldc, std::complex* work, int lwork) { + if (matrix_layout) { + LAPACKE_cunmqr_work(LAPACK_ROW_MAJOR, side, trans, m, n, k, + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast<__complex__ float*>(c), ldc, + reinterpret_cast<__complex__ float*>(work), lwork); + } else { + LAPACKE_cunmqr_work(LAPACK_COL_MAJOR, side, trans, m, n, k, + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast<__complex__ float*>(c), ldc, + reinterpret_cast<__complex__ float*>(work), lwork); + } +} + +template <> +void HostLapack>::ormqr( + bool matrix_layout, char side, char trans, int m, int n, int k, + const std::complex* a, int lda, const std::complex* tau, + std::complex* c, int ldc, std::complex* work, int lwork) { + if (matrix_layout) { + LAPACKE_cunmqr_work(LAPACK_ROW_MAJOR, side, trans, m, n, k, + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast<__complex__ float*>(c), ldc, + reinterpret_cast<__complex__ float*>(work), lwork); + } else { + LAPACKE_cunmqr_work(LAPACK_COL_MAJOR, side, trans, m, n, k, + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast<__complex__ float*>(c), ldc, + reinterpret_cast<__complex__ float*>(work), lwork); + } +} + +template <> +void HostLapack>::potrf(bool matrix_layout, char uplo, + int n, std::complex* a, + int lda) { + if (matrix_layout) { + LAPACKE_cpotrf(LAPACK_ROW_MAJOR, uplo, n, + reinterpret_cast<__complex__ float*>(a), lda); + } else { + LAPACKE_cpotrf(LAPACK_COL_MAJOR, uplo, n, + reinterpret_cast<__complex__ float*>(a), lda); + } +} + +// std::complex + +template <> +void HostLapack>::geqp3(bool matrix_layout, int m, int n, + std::complex* a, int lda, + int* jpvt, + std::complex* tau) { + if (matrix_layout) { + LAPACKE_zgeqp3(LAPACK_ROW_MAJOR, m, n, + reinterpret_cast<__complex__ double*>(a), lda, jpvt, + reinterpret_cast<__complex__ double*>(tau)); + } else { + LAPACKE_zgeqp3(LAPACK_COL_MAJOR, m, n, + reinterpret_cast<__complex__ double*>(a), lda, jpvt, + reinterpret_cast<__complex__ double*>(tau)); + } +} + +template <> +void HostLapack>::geqrf(bool matrix_layout, int m, int n, + std::complex* a, int lda, + std::complex* tau, + std::complex* work, + int lwork) { + if (matrix_layout) { + LAPACKE_zgeqrf_work(LAPACK_ROW_MAJOR, m, n, + reinterpret_cast<__complex__ double*>(a), lda, + reinterpret_cast<__complex__ double*>(tau), + reinterpret_cast<__complex__ double*>(work), lwork); + } else { + LAPACKE_zgeqrf_work(LAPACK_COL_MAJOR, m, n, + reinterpret_cast<__complex__ double*>(a), lda, + reinterpret_cast<__complex__ double*>(tau), + reinterpret_cast<__complex__ double*>(work), lwork); + } +} + +template <> +void HostLapack>::unmqr( + bool matrix_layout, char side, char trans, int m, int n, int k, + const std::complex* a, int lda, const std::complex* tau, + std::complex* c, int ldc, std::complex* work, int lwork) { + if (matrix_layout) { + LAPACKE_zunmqr_work(LAPACK_ROW_MAJOR, side, trans, m, n, k, + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast<__complex__ double*>(c), ldc, + reinterpret_cast<__complex__ double*>(work), lwork); + } else { + LAPACKE_zunmqr_work(LAPACK_COL_MAJOR, side, trans, m, n, k, + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast<__complex__ double*>(c), ldc, + reinterpret_cast<__complex__ double*>(work), lwork); + } +} + +template <> +void HostLapack>::ormqr( + bool matrix_layout, char side, char trans, int m, int n, int k, + const std::complex* a, int lda, const std::complex* tau, + std::complex* c, int ldc, std::complex* work, int lwork) { + if (matrix_layout) { + LAPACKE_zunmqr_work(LAPACK_ROW_MAJOR, side, trans, m, n, k, + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast<__complex__ double*>(c), ldc, + reinterpret_cast<__complex__ double*>(work), lwork); + } else { + LAPACKE_zunmqr_work(LAPACK_COL_MAJOR, side, trans, m, n, k, + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast<__complex__ double*>(c), ldc, + reinterpret_cast<__complex__ double*>(work), lwork); + } +} + +template <> +void HostLapack>::potrf(bool matrix_layout, char uplo, + int n, std::complex* a, + int lda) { + if (matrix_layout) { + LAPACKE_zpotrf(LAPACK_ROW_MAJOR, uplo, n, + reinterpret_cast<__complex__ double*>(a), lda); + } else { + LAPACKE_zpotrf(LAPACK_COL_MAJOR, uplo, n, + reinterpret_cast<__complex__ double*>(a), lda); + } +} + +} // namespace Impl +} // namespace KokkosBlas + +#endif // ENABLE BLAS/LAPACK + +#endif // DEF diff --git a/src/impl/tpls/KokkosLapack_Host_tpl.hpp b/src/impl/tpls/KokkosLapack_Host_tpl.hpp new file mode 100644 index 0000000000..83f8489a21 --- /dev/null +++ b/src/impl/tpls/KokkosLapack_Host_tpl.hpp @@ -0,0 +1,45 @@ +#ifndef KOKKOSLAPACK_HOST_TPL_HPP_ +#define KOKKOSLAPACK_HOST_TPL_HPP_ + +#include "KokkosKernels_config.h" +#include "Kokkos_ArithTraits.hpp" + +#if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_LAPACKE) +#include +#include + +namespace KokkosBlas { +namespace Impl { + +template +struct HostLapack { + typedef Kokkos::ArithTraits ats; + typedef typename ats::mag_type mag_type; + + static void potrf(bool matrix_layout, char uplo, int n, T* a, int lda); + + static void geqp3(bool matrix_layout, int m, int n, T* a, int lda, int* jpvt, + T* tau); + + static void geqrf(bool matrix_layout, int m, int n, T* a, int lda, T* tau, + T* work, int lwork); + + static void unmqr(bool matrix_layout, char side, char trans, int m, int n, + int k, const T* a, int lda, const T* tau, + /* */ T* c, int ldc, + /* */ T* work, int lwork); + + static void ormqr(bool matrix_layout, char side, char trans, int m, int n, + int k, const T* a, int lda, const T* tau, + /* */ T* c, int ldc, + /* */ T* work, int lwork); + +}; // HostLapack + +} // namespace Impl +} // namespace KokkosBlas + +#endif // ENABLE BLAS/LAPACK + +#endif // KOKKOSLAPACK_HOST_TPL_HPP_ diff --git a/unit_test/blas/Test_Blas.hpp b/unit_test/blas/Test_Blas.hpp index 642a0bf5f0..3e4a8933f8 100644 --- a/unit_test/blas/Test_Blas.hpp +++ b/unit_test/blas/Test_Blas.hpp @@ -4,6 +4,9 @@ #include "Test_Blas_gesv.hpp" #include "Test_Blas_trtri.hpp" +//LAPACK +#include "Test_Blas_qr.hpp" + // Blas 1 #include "Test_Blas1_abs.hpp" #include "Test_Blas1_asum.hpp" diff --git a/unit_test/blas/Test_Blas_qr.hpp b/unit_test/blas/Test_Blas_qr.hpp new file mode 100644 index 0000000000..540f688e09 --- /dev/null +++ b/unit_test/blas/Test_Blas_qr.hpp @@ -0,0 +1,262 @@ +#include +#include +#include +#include +#include +#include + +namespace Test { + + template + struct DiffGEMM_QR { + int N; + ViewTypeC C,C2; + + typedef typename ViewTypeC::value_type ScalarC; + typedef Kokkos::Details::ArithTraits APT; + typedef typename APT::mag_type mag_type; + + KOKKOS_INLINE_FUNCTION + void operator() (const typename Kokkos::TeamPolicy::member_type& team, mag_type& diff) const { + const int i = team.league_rank(); + mag_type diff_row = 0; + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,N), [&] (const int& j,mag_type& diff_ij) { + //printf("A (%i %i) (%i %i) (%i %i)\n",C.extent(0),C.extent(1),C2.extent(0),C2.extent(1),i,j); + diff_ij += APT::abs(C(i,j)-C2(i,j)); + //printf("B (%i %i) (%i %i) (%i %i)\n",C.extent(0),C.extent(1),C2.extent(0),C2.extent(1),i,j); + },diff_row); + Kokkos::single(Kokkos::PerTeam(team), [&] () { + diff += diff_row; + }); + } + }; + + template + struct Identity_QR { + int N; + ViewTypeC C; + + typedef typename ViewTypeC::value_type ScalarC; + typedef Kokkos::Details::ArithTraits APT; + typedef typename APT::mag_type mag_type; + + KOKKOS_INLINE_FUNCTION + void operator() (const typename Kokkos::TeamPolicy::member_type& team) const { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,N), [&] (const int& j) { + //printf("A (%i %i) (%i %i) (%i %i)\n",C.extent(0),C.extent(1),C2.extent(0),C2.extent(1),i,j); + const int i = team.league_rank(); + const ScalarC one = 1.0; + const ScalarC zero = 0.0; + if(i == j){ + C(i,j) = one; + } + else{ + C(i, j) = zero; + } + }); + } + }; + + template + struct CopyUpper_QR { + int N; + ViewTypeC C; + + typedef typename ViewTypeC::value_type ScalarC; + typedef Kokkos::Details::ArithTraits APT; + typedef typename APT::mag_type mag_type; + + KOKKOS_INLINE_FUNCTION + void operator() (const typename Kokkos::TeamPolicy::member_type& team) const { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,N), [&] (const int& j) { + //printf("A (%i %i) (%i %i) (%i %i)\n",C.extent(0),C.extent(1),C2.extent(0),C2.extent(1),i,j); + const int i = team.league_rank(); + const ScalarC zero = 0.0; + if(j < i){ + C(i,j) = zero; + } + }); + } + }; + + template + void impl_test_qr(int M, int N) { + + typedef typename ViewTypeA::device_type::execution_space execution_space; + typedef typename ViewTypeA::value_type ScalarA; + typedef Kokkos::Details::ArithTraits APT; + typedef typename APT::mag_type mag_type; + + double machine_eps = APT::epsilon(); + double eps = 10*machine_eps; + + ViewTypeA A("A",M,N); + + int minmn = M < N? M : N; + + ViewTypeA Aref("Aref", M, N); + ViewTypeT T("Tau", minmn); + ViewTypeA Q("Q", M, M); + ViewTypeA R("R", M, N); + ViewTypeA Iref("Iref", M, M); + + typename ViewTypeA::HostMirror host_A = Kokkos::create_mirror_view(Aref); + typename ViewTypeA::HostMirror host_Q = Kokkos::create_mirror_view(Q); + typename ViewTypeA::HostMirror host_Iref = Kokkos::create_mirror_view(Iref); + + uint64_t seed = Kokkos::Impl::clock_tic(); + Kokkos::Random_XorShift64_Pool rand_pool(seed); + + Kokkos::fill_random(A,rand_pool, Kokkos::rand::generator_type,ScalarA>::max()); + + //Make Copy of A + Kokkos::deep_copy(Aref, A); + + //Take QR of A + KokkosBlas::geqrf(A, T); + + //Extract upper portion of R + Kokkos::deep_copy(R, A); + struct CopyUpper_QR copy_upper; + copy_upper.C = R; + copy_upper.N = N; + Kokkos::parallel_for("KokkosBlas::Test::CopyUpper", Kokkos::TeamPolicy(M,Kokkos::AUTO,16), copy_upper); + + //Fill Iref with Identity + struct Identity_QR make_id; + make_id.C = Iref; + make_id.N = M; + Kokkos::parallel_for("KokkosBlas::Test::Identity", Kokkos::TeamPolicy(M,Kokkos::AUTO,16), make_id); + Kokkos::deep_copy(host_Iref, Iref); + + //Fill Q with Identity + Kokkos::deep_copy(Q, Iref); + + //Compute Q @ R + KokkosBlas::unmqr("L", "N", minmn, A, T, R); + + //Compare Aref with R + mag_type diff = 0; + struct DiffGEMM_QR diffgemm; + diffgemm.N = N; + diffgemm.C = Aref; + diffgemm.C2 = R; + Kokkos::parallel_reduce("KokkosBlas::Test::DiffGEMM", Kokkos::TeamPolicy(M,Kokkos::AUTO,16), diffgemm, diff); + + //Check Aref vs QR + if( N!=0 && M!=0) { + double diff_average = diff/(N*M); + // Expected Result: Random Walk in the least significant bit (i.e. ~ sqrt(K)*eps + // eps scales with the total sum and has a factor in it for the accuracy of the operations -> + // eps = K * 75 * machine_eps * 7 + double diff_expected = 5*machine_eps; + + if ( (diff_average >= diff_expected ) ) { + printf("Result: %e %e\n",diff_average,diff_expected); + } + + EXPECT_TRUE( (diff_average < diff_expected ) ); + } + + //Compute QI = Q + KokkosBlas::unmqr("L", "N", minmn, A, T, Q); + + //Compute Q^TQ = I + KokkosBlas::unmqr("L", "T", minmn, A, T, Q); + + //Check Identity + Kokkos::deep_copy(host_Q, Q); + bool test_flag = true; + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + if (APT::abs(host_Iref(i, j) - host_Q(i, j)) > eps) { + test_flag = false; + break; + } + } + if (!test_flag) break; + } + ASSERT_EQ(test_flag, true); + + //Reset + Kokkos::deep_copy(Q, Iref); + //Compute IQ = Q + KokkosBlas::unmqr("R", "N", minmn, A, T, Q); + + //Compute QQ^T = I + KokkosBlas::unmqr("R", "T", minmn, A, T, Q); + + //Check Identity + Kokkos::deep_copy(host_Q, Q); + + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + if (APT::abs(host_Iref(i, j) - host_Q(i, j)) > eps) { + test_flag = false; + break; + } + } + if (!test_flag) break; + } + ASSERT_EQ(test_flag, true); + } + +} //namespace Test + + +template +int test_qr() { + +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + typedef Kokkos::View view_type_a_ll; + typedef Kokkos::View view_type_b_ll; + Test::impl_test_qr(0,0); + Test::impl_test_qr(13,15); + Test::impl_test_qr(179,15); + Test::impl_test_qr(12,323); +#endif + +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) + typedef Kokkos::View view_type_a_lr; + typedef Kokkos::View view_type_b_lr; + Test::impl_test_qr(0,0); + Test::impl_test_qr(13,15); + Test::impl_test_qr(179,15); + Test::impl_test_qr(12,323); +#endif + + return 1; +} + +#if defined(KOKKOSKERNELS_INST_FLOAT) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F( TestCategory, qr_float ) { + Kokkos::Profiling::pushRegion("KokkosBlas::Test::qr_float"); + test_qr (); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F( TestCategory, qr_double ) { + Kokkos::Profiling::pushRegion("KokkosBlas::Test::qr_double"); + test_qr (); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_DOUBLE) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F( TestCategory, qr_complex_double ) { + Kokkos::Profiling::pushRegion("KokkosBlas::Test::qr_complex_double"); + test_qr,TestExecSpace> (); + Kokkos::Profiling::popRegion(); +} +#endif + +#if defined(KOKKOSKERNELS_INST_COMPLEX_FLOAT) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +TEST_F( TestCategory, qr_complex_float ) { + Kokkos::Profiling::pushRegion("KokkosBlas::Test::qr_complex_float"); + test_qr,TestExecSpace> (); + Kokkos::Profiling::popRegion(); +} +#endif + From c1b9becd8b792c65e91c76cf1deed3c3adc919d6 Mon Sep 17 00:00:00 2001 From: dialecticDolt Date: Tue, 2 Nov 2021 18:58:49 -0500 Subject: [PATCH 2/8] Fixing merge artifacts on CHANGELOG file --- CHANGELOG.md | 7 ------- master_history.txt | 8 +------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76de9db0d0..7abfc7b730 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -199,13 +199,6 @@ - Nightly test failure: spgemm unit tests failing on White \(Power8\) [\#780](https://github.com/kokkos/kokkos-kernels/issues/780) - supernodal does not build with UVM enabled [\#633](https://github.com/kokkos/kokkos-kernels/issues/633) -## [3.1.01](https://github.com/kokkos/kokkos-kernels/tree/3.1.01) (2020-05-04) -[Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/3.1.00...3.1.01) - -** Fixed bugs:** - -- KokkosBatched QR PR breaking nightly tests [\#691](https://github.com/kokkos/kokkos-kernels/issues/691) - ## [3.1.00](https://github.com/kokkos/kokkos-kernels/tree/3.1.00) (2020-04-14) [Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/3.0.00...3.1.00) diff --git a/master_history.txt b/master_history.txt index 5c63ba453d..9ce9f32bb4 100644 --- a/master_history.txt +++ b/master_history.txt @@ -7,10 +7,4 @@ tag: 2.7.24 date: 11/05/2018 master: 1a7b524b develop: fab89e37 tag: 2.8.00 date: 02/05/2019 master: a6e05e06 develop: 6a790321 tag: 2.9.00 date: 06/24/2019 master: 4ee5f3c6 develop: 094da30c tag: 3.0.00 date: 01/31/2020 master: d86db111 release-candidate-3.0: cf24ab90 -tag: 3.1.00 date: 04/14/2020 master: f199f45d develop: 8d063eae -tag: 3.1.01 date: 05/04/2020 master: 43773523 release: 6fce7502 -tag: 3.2.00 date: 08/19/2020 master: 07a60bcc release: ea3f2b77 -tag: 3.3.00 date: 12/16/2020 master: 42defc56 release: e5279e55 -tag: 3.3.01 date: 01/18/2021 master: f64b1c57 release: 4e1cc00b -tag: 3.4.00 date: 04/26/2021 master: fe439b21 release: d3c33910 -tag: 3.4.01 date: 05/20/2021 master: 564dccb3 release: 4c62eb86 +tag: 3.1.00 date: 04/14/2020 master: f199f45d develop: 8d063eae From 4135014932dc483b3a0af4e949b7411307a775b6 Mon Sep 17 00:00:00 2001 From: dialecticDolt Date: Thu, 4 Nov 2021 14:40:12 -0500 Subject: [PATCH 3/8] Adding copyright notice --- src/blas/impl/KokkosBlas_geqrf_impl.hpp | 43 +++++++++++++++++++++++++ src/blas/impl/KokkosBlas_geqrf_spec.hpp | 43 +++++++++++++++++++++++++ src/blas/impl/KokkosBlas_unmqr_impl.hpp | 43 +++++++++++++++++++++++++ src/blas/impl/KokkosBlas_unmqr_spec.hpp | 43 +++++++++++++++++++++++++ src/impl/tpls/KokkosBlas_Cuda_tpl.hpp | 43 +++++++++++++++++++++++++ src/impl/tpls/KokkosBlas_Rocm_tpl.hpp | 43 +++++++++++++++++++++++++ src/impl/tpls/KokkosLapack_Host_tpl.hpp | 43 +++++++++++++++++++++++++ 7 files changed, 301 insertions(+) diff --git a/src/blas/impl/KokkosBlas_geqrf_impl.hpp b/src/blas/impl/KokkosBlas_geqrf_impl.hpp index 6c8a8b9246..ca77e6beb3 100644 --- a/src/blas/impl/KokkosBlas_geqrf_impl.hpp +++ b/src/blas/impl/KokkosBlas_geqrf_impl.hpp @@ -1,3 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ #ifndef KOKKOSBLAS_IMPL_GEQRF_HPP_ #define KOKKOSBLAS_IMPL_GEQRF_HPP_ diff --git a/src/blas/impl/KokkosBlas_geqrf_spec.hpp b/src/blas/impl/KokkosBlas_geqrf_spec.hpp index e5dfef8ced..168fc40b2d 100644 --- a/src/blas/impl/KokkosBlas_geqrf_spec.hpp +++ b/src/blas/impl/KokkosBlas_geqrf_spec.hpp @@ -1,3 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ #ifndef KOKKOSBLAS_GEQRF_SPEC_HPP_ #define KOKKOSBLAS_GEQRF_SPEC_HPP_ diff --git a/src/blas/impl/KokkosBlas_unmqr_impl.hpp b/src/blas/impl/KokkosBlas_unmqr_impl.hpp index 63d7d425a8..ff1888838b 100644 --- a/src/blas/impl/KokkosBlas_unmqr_impl.hpp +++ b/src/blas/impl/KokkosBlas_unmqr_impl.hpp @@ -1,3 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ #ifndef KOKKOSBLAS_IMPL_UNMQR_HPP_ #define KOKKOSBLAS_IMPL_UNMQR_HPP_ diff --git a/src/blas/impl/KokkosBlas_unmqr_spec.hpp b/src/blas/impl/KokkosBlas_unmqr_spec.hpp index dec49127f6..4a5d097bcd 100644 --- a/src/blas/impl/KokkosBlas_unmqr_spec.hpp +++ b/src/blas/impl/KokkosBlas_unmqr_spec.hpp @@ -1,3 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ #ifndef KOKKOSBLAS_UNMQR_SPEC_HPP_ #define KOKKOSBLAS_UNMQR_SPEC_HPP_ diff --git a/src/impl/tpls/KokkosBlas_Cuda_tpl.hpp b/src/impl/tpls/KokkosBlas_Cuda_tpl.hpp index 0a78a959a8..6e9be648aa 100644 --- a/src/impl/tpls/KokkosBlas_Cuda_tpl.hpp +++ b/src/impl/tpls/KokkosBlas_Cuda_tpl.hpp @@ -1,3 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ #ifndef KOKKOSBLAS_CUDA_TPL_HPP_ #define KOKKOSBLAS_CUDA_TPL_HPP_ diff --git a/src/impl/tpls/KokkosBlas_Rocm_tpl.hpp b/src/impl/tpls/KokkosBlas_Rocm_tpl.hpp index f5ba380d11..da5f33ed9b 100644 --- a/src/impl/tpls/KokkosBlas_Rocm_tpl.hpp +++ b/src/impl/tpls/KokkosBlas_Rocm_tpl.hpp @@ -1,3 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ #ifndef KOKKOSBLAS_ROCM_TPL_HPP_ #define KOKKOSBLAS_ROCM_TPL_HPP_ diff --git a/src/impl/tpls/KokkosLapack_Host_tpl.hpp b/src/impl/tpls/KokkosLapack_Host_tpl.hpp index 83f8489a21..0557047c9b 100644 --- a/src/impl/tpls/KokkosLapack_Host_tpl.hpp +++ b/src/impl/tpls/KokkosLapack_Host_tpl.hpp @@ -1,3 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Siva Rajamanickam (srajama@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ #ifndef KOKKOSLAPACK_HOST_TPL_HPP_ #define KOKKOSLAPACK_HOST_TPL_HPP_ From 6029e1f07ce1659f3a5be74b494e13685c482f6a Mon Sep 17 00:00:00 2001 From: dialecticDolt Date: Thu, 4 Nov 2021 14:56:24 -0500 Subject: [PATCH 4/8] Adding CUDAUVMSpace specializations --- .../tpls/KokkosBlas_geqrf_tpl_spec_avail.hpp | 26 ++ .../tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp | 34 ++ .../tpls/KokkosBlas_unmqr_tpl_spec_avail.hpp | 394 ++++++++++++------ .../tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp | 50 +++ 4 files changed, 365 insertions(+), 139 deletions(-) diff --git a/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_avail.hpp b/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_avail.hpp index a543de4628..71007b7c8f 100644 --- a/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_avail.hpp +++ b/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_avail.hpp @@ -134,6 +134,7 @@ KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, enum : bool { value = true }; \ }; +// CUDA Space KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, @@ -156,6 +157,31 @@ KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +// Cuda UVM Space +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) + +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) + +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) + +KOKKOSBLAS_GEQRF_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSBLAS_GEQRF_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) + #endif // if CUBLAS && CUSOLVER } // namespace Impl diff --git a/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp index 4938c88236..992742d8c4 100644 --- a/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp @@ -730,6 +730,7 @@ namespace Impl { } \ }; +// CUDA Space KOKKOSBLAS_DGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, true) KOKKOSBLAS_DGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) @@ -762,6 +763,39 @@ KOKKOSBLAS_CGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, KOKKOSBLAS_CGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +// CUDA UVM Space +KOKKOSBLAS_DGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS_DGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS_SGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS_SGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS_ZGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS_ZGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS_CGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS_CGEQRF_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS_DGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + true) +KOKKOSBLAS_DGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + false) + +KOKKOSBLAS_SGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + true) +KOKKOSBLAS_SGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + false) + +KOKKOSBLAS_ZGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + true) +KOKKOSBLAS_ZGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + false) + +KOKKOSBLAS_CGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + true) +KOKKOSBLAS_CGEQRF_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + false) + } // namespace Impl } // namespace KokkosBlas diff --git a/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_avail.hpp b/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_avail.hpp index a25cc1a51d..0b73ea5ae4 100644 --- a/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_avail.hpp +++ b/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_avail.hpp @@ -2,142 +2,258 @@ #define KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_HPP_ namespace KokkosBlas { - namespace Impl { - - template - struct unmqr_tpl_spec_avail { - enum : bool {value = false}; - }; - - template - struct unmqr_workspace_tpl_spec_avail { - enum : bool {value = false}; - }; - - //Hostspace LAPACKE(netlib) or MKL - //TODO: Check if these have the same syntax - - #if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) && defined(KOKKOSKERNELS_ENABLE_TPL_LAPACKE) - - #define KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE) \ - template \ - struct unmqr_tpl_spec_avail< \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > \ - > {enum : bool {value = true}; }; - - #define KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE) \ - template \ - struct unmqr_workspace_tpl_spec_avail< \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > \ - > {enum : bool {value = true}; }; - - #if defined (KOKKOSKERNELS_INST_DOUBLE)\ - && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) - KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) - KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) - #endif - - #if defined (KOKKOSKERNELS_INST_FLOAT) \ - && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) - KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) - KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) - #endif - - #if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) \ - && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) - KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) - KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) - #endif - - #if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) \ - && defined (KOKKOSKERNELS_INST_LAYOUTLEFT) - KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) - KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::HostSpace) - #endif - - #if defined (KOKKOSKERNELS_INST_DOUBLE)\ - && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) - KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) - KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) - #endif - - #if defined (KOKKOSKERNELS_INST_FLOAT) \ - && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) - KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) - KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) - #endif - - #if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) \ - && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) - KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) - KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) - #endif - - #if defined (KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) \ - && defined (KOKKOSKERNELS_INST_LAYOUTRIGHT) - KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) - KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::HostSpace) - #endif - - #endif //if BLAS && LAPACK - - - //CUSOLVER - // - #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) - - #define KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE) \ - template \ - struct unmqr_tpl_spec_avail< \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > \ - > {enum : bool {value = true}; }; - - - #define KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE) \ - template \ - struct unmqr_workspace_tpl_spec_avail< \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits > \ - > {enum : bool {value = true}; }; - - KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) - KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) - - KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) - KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) - - KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) - KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) - - KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) - KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace) - - #endif //if CUBLAS && CUSOLVER - - } //namespace Impl -} //namespace KokkosBlas - -#endif // KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_HPP_ +namespace Impl { + +template +struct unmqr_tpl_spec_avail { + enum : bool { value = false }; +}; + +template +struct unmqr_workspace_tpl_spec_avail { + enum : bool { value = false }; +}; + +// Hostspace LAPACKE(netlib) or MKL +// TODO: Check if these have the same syntax + +#if defined(KOKKOSKERNELS_ENABLE_TPL_BLAS) && \ + defined(KOKKOSKERNELS_ENABLE_TPL_LAPACKE) + +#define KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(SCALAR, LAYOUTA, LAYOUTB, \ + LAYOUTC, MEMSPACE) \ + template \ + struct unmqr_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +#define KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK( \ + SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE) \ + template \ + struct unmqr_workspace_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +#if defined(KOKKOSKERNELS_INST_DOUBLE) && defined(KOKKOSKERNELS_INST_LAYOUTLEFT) +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::HostSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_FLOAT) && defined(KOKKOSKERNELS_INST_LAYOUTLEFT) +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::HostSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) && \ + defined(KOKKOSKERNELS_INST_LAYOUTLEFT) +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::HostSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) && \ + defined(KOKKOSKERNELS_INST_LAYOUTLEFT) +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::HostSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_DOUBLE) && \ + defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutRight, + Kokkos::LayoutRight, + Kokkos::LayoutRight, + Kokkos::HostSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(double, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_FLOAT) && defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutRight, + Kokkos::LayoutRight, + Kokkos::LayoutRight, + Kokkos::HostSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(float, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_DOUBLE_) && \ + defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutRight, + Kokkos::LayoutRight, + Kokkos::LayoutRight, + Kokkos::HostSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace) +#endif + +#if defined(KOKKOSKERNELS_INST_KOKKOS_COMPLEX_FLOAT_) && \ + defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutRight, + Kokkos::LayoutRight, + Kokkos::LayoutRight, + Kokkos::HostSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_LAPACK(Kokkos::complex, + Kokkos::LayoutRight, Kokkos::LayoutRight, + Kokkos::LayoutRight, Kokkos::HostSpace) +#endif + +#endif // if BLAS && LAPACK + +// CUSOLVER +// +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSOLVER) + +#define KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(SCALAR, LAYOUTA, LAYOUTB, \ + LAYOUTC, MEMSPACE) \ + template \ + struct unmqr_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +#define KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER( \ + SCALAR, LAYOUTA, LAYOUTB, LAYOUTC, MEMSPACE) \ + template \ + struct unmqr_workspace_tpl_spec_avail< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +// CUDA Space +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::CudaSpace) + +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace) + +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::CudaSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaSpace) + +// CUDA UVM Space +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(double, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) + +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(float, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) + +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) + +KOKKOSBLAS_UNMQR_WORKSPACE_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) +KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_CUSOLVER(Kokkos::complex, + Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace) + +#endif // if CUBLAS && CUSOLVER + +} // namespace Impl +} // namespace KokkosBlas + +#endif // KOKKOSBLAS_UNMQR_TPL_SPEC_AVAIL_HPP_ diff --git a/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp index e2dfc66a9e..f04bedb16d 100644 --- a/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp @@ -1033,6 +1033,7 @@ namespace Impl { } \ }; +// CudaSpace KOKKOSBLAS_DUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, true) KOKKOSBLAS_DUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, @@ -1081,6 +1082,55 @@ KOKKOSBLAS_CUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::CudaSpace, false) +// CUDA UVM Space +KOKKOSBLAS_DUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS_DUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS_SUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS_SUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS_ZUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS_ZUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS_CUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, true) +KOKKOSBLAS_CUNMQR_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, false) + +KOKKOSBLAS_DUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + true) +KOKKOSBLAS_DUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, + Kokkos::CudCudaUVMSpaceaSpace, false) + +KOKKOSBLAS_SUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + true) +KOKKOSBLAS_SUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + false) + +KOKKOSBLAS_ZUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + true) +KOKKOSBLAS_ZUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + false) + +KOKKOSBLAS_CUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + true) +KOKKOSBLAS_CUNMQR_WORKSPACE_CUSOLVER(Kokkos::LayoutLeft, Kokkos::LayoutLeft, + Kokkos::LayoutLeft, Kokkos::CudaUVMSpace, + false) + } // namespace Impl } // namespace KokkosBlas From e23717471b2b73096ec09b8fa5598a42aa45c2a0 Mon Sep 17 00:00:00 2001 From: dialecticDolt Date: Thu, 4 Nov 2021 15:08:07 -0500 Subject: [PATCH 5/8] Adding error check wrapper to UNMQR and GEQRF cusolver functions --- .../tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp | 302 +++++++++--------- .../tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp | 58 ++-- 2 files changed, 182 insertions(+), 178 deletions(-) diff --git a/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp index 992742d8c4..68a47c881a 100644 --- a/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas_geqrf_tpl_spec_decl.hpp @@ -424,8 +424,9 @@ namespace Impl { const int lwork = workspace.extent(0); \ KokkosBlas::Impl::CudaSolverSingleton& s = \ KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ - cusolverDnDgeqrf(s.handle, M, N, A.data(), LDA, tau.data(), \ - workspace.data(), lwork, &devinfo); \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( \ + cusolverDnDgeqrf(s.handle, M, N, A.data(), LDA, tau.data(), \ + workspace.data(), lwork, &devinfo)); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -467,8 +468,9 @@ namespace Impl { KokkosBlas::Impl::CudaSolverSingleton& s = \ KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ const int lwork = workspace.extent(0); \ - cusolverDnSgeqrf(s.handle, M, N, A.data(), LDA, tau.data(), \ - workspace.data(), lwork, &devinfo); \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( \ + cusolverDnSgeqrf(s.handle, M, N, A.data(), LDA, tau.data(), \ + workspace.data(), lwork, &devinfo)); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -513,29 +515,75 @@ namespace Impl { KokkosBlas::Impl::CudaSolverSingleton& s = \ KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ const int lwork = workspace.extent(0); \ - cusolverDnZgeqrf(s.handle, M, N, \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(tau.data()), \ - reinterpret_cast(workspace.data()), \ - lwork, &devinfo); \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf( \ + s.handle, M, N, reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(tau.data()), \ + reinterpret_cast(workspace.data()), lwork, \ + &devinfo)); \ Kokkos::Profiling::popRegion(); \ } \ }; -#define KOKKOSBLAS_CGEQRF_CUSOLVER(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ +#define KOKKOSBLAS_CGEQRF_CUSOLVER(LAYOUTA, MEMSPACE, ETI_SPEC_AVAIL) \ + template \ + struct GEQRF**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef float PRECISION; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + WViewType; \ + \ + static void geqrf(AViewType& A, TauViewType& tau, WViewType& workspace) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::geqrf[TPL_CUSOLVER, Kokkos::complex]"); \ + int devinfo = 0; \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + int lwork = workspace.extent(0); \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf( \ + s.handle, M, N, reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(tau.data()), \ + reinterpret_cast(workspace.data()), lwork, &devinfo)); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +// WORKSPACE_QUERIES + +#define KOKKOSBLAS_DGEQRF_WORKSPACE_CUSOLVER(LAYOUTA, MEMSPACE, \ + ETI_SPEC_AVAIL) \ template \ - struct GEQRF**, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef float PRECISION; \ + struct GEQRF_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ typedef int ORDINAL; \ typedef Kokkos::View, \ @@ -545,107 +593,63 @@ namespace Impl { Kokkos::Device, \ Kokkos::MemoryTraits > \ TauViewType; \ + \ + static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::geqrf[TPL_CUSOLVER, double]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + int lwork = 0; \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( \ + cusolverDnDgeqrf_bufferSize(s.handle, M, N, A.data(), LDA, &lwork)); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)lwork; \ + } \ + }; + +#define KOKKOSBLAS_SGEQRF_WORKSPACE_CUSOLVER(LAYOUTA, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct GEQRF_WORKSPACE< \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef float SCALAR; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ typedef Kokkos::View, \ Kokkos::MemoryTraits > \ - WViewType; \ + TauViewType; \ \ - static void geqrf(AViewType& A, TauViewType& tau, WViewType& workspace) { \ + static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ Kokkos::Profiling::pushRegion( \ - "KokkosBlas::geqrf[TPL_CUSOLVER, Kokkos::complex]"); \ - int devinfo = 0; \ + "KokkosBlas::geqrf[TPL_CUSOLVER, single]"); \ int M = A.extent(0); \ int N = A.extent(1); \ bool A_is_lr = std::is_same::value; \ const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ LDA = AST == 0 ? 1 : AST; \ - int lwork = workspace.extent(0); \ KokkosBlas::Impl::CudaSolverSingleton& s = \ KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ - cusolverDnCgeqrf(s.handle, M, N, reinterpret_cast(A.data()), \ - LDA, reinterpret_cast(tau.data()), \ - reinterpret_cast(workspace.data()), lwork, \ - &devinfo); \ + int lwork = 0; \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL( \ + cusolverDnSgeqrf_bufferSize(s.handle, M, N, A.data(), LDA, &lwork)); \ Kokkos::Profiling::popRegion(); \ + return (int64_t)lwork; \ } \ }; -// WORKSPACE_QUERIES - -#define KOKKOSBLAS_DGEQRF_WORKSPACE_CUSOLVER(LAYOUTA, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEQRF_WORKSPACE< \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef int ORDINAL; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - TauViewType; \ - \ - static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::geqrf[TPL_CUSOLVER, double]"); \ - int M = A.extent(0); \ - int N = A.extent(1); \ - bool A_is_lr = std::is_same::value; \ - const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ - LDA = AST == 0 ? 1 : AST; \ - int lwork = 0; \ - KokkosBlas::Impl::CudaSolverSingleton& s = \ - KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ - cusolverDnDgeqrf_bufferSize(s.handle, M, N, A.data(), LDA, &lwork); \ - Kokkos::Profiling::popRegion(); \ - return (int64_t)lwork; \ - } \ - }; - -#define KOKKOSBLAS_SGEQRF_WORKSPACE_CUSOLVER(LAYOUTA, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEQRF_WORKSPACE< \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef float SCALAR; \ - typedef int ORDINAL; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - TauViewType; \ - \ - static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::geqrf[TPL_CUSOLVER, single]"); \ - int M = A.extent(0); \ - int N = A.extent(1); \ - bool A_is_lr = std::is_same::value; \ - const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ - LDA = AST == 0 ? 1 : AST; \ - KokkosBlas::Impl::CudaSolverSingleton& s = \ - KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ - int lwork = 0; \ - cusolverDnSgeqrf_bufferSize(s.handle, M, N, A.data(), LDA, &lwork); \ - Kokkos::Profiling::popRegion(); \ - return (int64_t)lwork; \ - } \ - }; - #define KOKKOSBLAS_ZGEQRF_WORKSPACE_CUSOLVER(LAYOUTA, MEMSPACE, \ ETI_SPEC_AVAIL) \ template \ @@ -680,54 +684,54 @@ namespace Impl { KokkosBlas::Impl::CudaSolverSingleton& s = \ KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ int lwork = 0; \ - cusolverDnZgeqrf_bufferSize( \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZgeqrf_bufferSize( \ s.handle, M, N, reinterpret_cast(A.data()), LDA, \ - &lwork); \ + &lwork)); \ Kokkos::Profiling::popRegion(); \ return (int64_t)lwork; \ } \ }; -#define KOKKOSBLAS_CGEQRF_WORKSPACE_CUSOLVER(LAYOUTA, MEMSPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEQRF_WORKSPACE< \ - Kokkos::View**, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef float PRECISION; \ - typedef int ORDINAL; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - TauViewType; \ - \ - static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::geqrf[TPL_CUSOLVER, Kokkos::complex]"); \ - int M = A.extent(0); \ - int N = A.extent(1); \ - bool A_is_lr = std::is_same::value; \ - const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ - LDA = AST == 0 ? 1 : AST; \ - int lwork = 0; \ - KokkosBlas::Impl::CudaSolverSingleton& s = \ - KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ - cusolverDnCgeqrf_bufferSize(s.handle, M, N, \ - reinterpret_cast(A.data()), LDA, \ - &lwork); \ - Kokkos::Profiling::popRegion(); \ - return (int64_t)lwork; \ - } \ +#define KOKKOSBLAS_CGEQRF_WORKSPACE_CUSOLVER(LAYOUTA, MEMSPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct GEQRF_WORKSPACE< \ + Kokkos::View**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef float PRECISION; \ + typedef int ORDINAL; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + TauViewType; \ + \ + static int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::geqrf[TPL_CUSOLVER, Kokkos::complex]"); \ + int M = A.extent(0); \ + int N = A.extent(1); \ + bool A_is_lr = std::is_same::value; \ + const int AST = A_is_lr ? A.stride(0) : A.stride(1), \ + LDA = AST == 0 ? 1 : AST; \ + int lwork = 0; \ + KokkosBlas::Impl::CudaSolverSingleton& s = \ + KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCgeqrf_bufferSize( \ + s.handle, M, N, reinterpret_cast(A.data()), LDA, \ + &lwork)); \ + Kokkos::Profiling::popRegion(); \ + return (int64_t)lwork; \ + } \ }; // CUDA Space diff --git a/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp b/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp index f04bedb16d..381d970d60 100644 --- a/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp +++ b/src/impl/tpls/KokkosBlas_unmqr_tpl_spec_decl.hpp @@ -590,9 +590,9 @@ namespace Impl { const int lwork = workspace.extent(0); \ KokkosBlas::Impl::CudaSolverSingleton& s = \ KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ - cusolverDnDormqr(s.handle, m_side, m_trans, M, N, k, A.data(), LDA, \ - tau.data(), C.data(), LDC, workspace.data(), lwork, \ - &devinfo); \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDormqr( \ + s.handle, m_side, m_trans, M, N, k, A.data(), LDA, tau.data(), \ + C.data(), LDC, workspace.data(), lwork, &devinfo)); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -652,9 +652,9 @@ namespace Impl { const int lwork = workspace.extent(0); \ KokkosBlas::Impl::CudaSolverSingleton& s = \ KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ - cusolverDnSormqr(s.handle, m_side, m_trans, M, N, k, A.data(), LDA, \ - tau.data(), C.data(), LDC, workspace.data(), lwork, \ - &devinfo); \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSormqr( \ + s.handle, m_side, m_trans, M, N, k, A.data(), LDA, tau.data(), \ + C.data(), LDC, workspace.data(), lwork, &devinfo)); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -717,13 +717,13 @@ namespace Impl { const int lwork = workspace.extent(0); \ KokkosBlas::Impl::CudaSolverSingleton& s = \ KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ - cusolverDnZunmqr(s.handle, m_side, m_trans, M, N, k, \ - reinterpret_cast(A.data()), \ - LDA, \ - reinterpret_cast(tau.data()), \ - reinterpret_cast(C.data()), LDC, \ - reinterpret_cast(workspace.data()), \ - lwork, &devinfo); \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZunmqr( \ + s.handle, m_side, m_trans, M, N, k, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(tau.data()), \ + reinterpret_cast(C.data()), LDC, \ + reinterpret_cast(workspace.data()), lwork, \ + &devinfo)); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -786,12 +786,12 @@ namespace Impl { const int lwork = workspace.extent(0); \ KokkosBlas::Impl::CudaSolverSingleton& s = \ KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ - cusolverDnCunmqr(s.handle, m_side, m_trans, M, N, k, \ - reinterpret_cast(A.data()), LDA, \ - reinterpret_cast(tau.data()), \ - reinterpret_cast(C.data()), LDC, \ - reinterpret_cast(workspace.data()), lwork, \ - &devinfo); \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCunmqr( \ + s.handle, m_side, m_trans, M, N, k, \ + reinterpret_cast(A.data()), LDA, \ + reinterpret_cast(tau.data()), \ + reinterpret_cast(C.data()), LDC, \ + reinterpret_cast(workspace.data()), lwork, &devinfo)); \ Kokkos::Profiling::popRegion(); \ } \ }; @@ -847,9 +847,9 @@ namespace Impl { int lwork = 0; \ KokkosBlas::Impl::CudaSolverSingleton& s = \ KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ - cusolverDnDormqr_bufferSize(s.handle, m_side, m_trans, M, N, k, \ - A.data(), LDA, tau.data(), C.data(), LDC, \ - &lwork); \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnDormqr_bufferSize( \ + s.handle, m_side, m_trans, M, N, k, A.data(), LDA, tau.data(), \ + C.data(), LDC, &lwork)); \ Kokkos::Profiling::popRegion(); \ return (int64_t)lwork; \ } \ @@ -903,9 +903,9 @@ namespace Impl { int lwork = 0; \ KokkosBlas::Impl::CudaSolverSingleton& s = \ KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ - cusolverDnSormqr_bufferSize(s.handle, m_side, m_trans, M, N, k, \ - A.data(), LDA, tau.data(), C.data(), LDC, \ - &lwork); \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnSormqr_bufferSize( \ + s.handle, m_side, m_trans, M, N, k, A.data(), LDA, tau.data(), \ + C.data(), LDC, &lwork)); \ Kokkos::Profiling::popRegion(); \ return (int64_t)lwork; \ } \ @@ -962,11 +962,11 @@ namespace Impl { int lwork = 0; \ KokkosBlas::Impl::CudaSolverSingleton& s = \ KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ - cusolverDnZunmqr_bufferSize( \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnZunmqr_bufferSize( \ s.handle, m_side, m_trans, M, N, k, \ reinterpret_cast(A.data()), LDA, \ reinterpret_cast(tau.data()), \ - reinterpret_cast(C.data()), LDC, &lwork); \ + reinterpret_cast(C.data()), LDC, &lwork)); \ Kokkos::Profiling::popRegion(); \ return (int64_t)lwork; \ } \ @@ -1023,11 +1023,11 @@ namespace Impl { int lwork = 0; \ KokkosBlas::Impl::CudaSolverSingleton& s = \ KokkosBlas::Impl::CudaSolverSingleton::singleton(); \ - cusolverDnCunmqr_bufferSize( \ + KOKKOS_CUSOLVER_SAFE_CALL_IMPL(cusolverDnCunmqr_bufferSize( \ s.handle, m_side, m_trans, M, N, k, \ reinterpret_cast(A.data()), LDA, \ reinterpret_cast(tau.data()), \ - reinterpret_cast(C.data()), LDC, &lwork); \ + reinterpret_cast(C.data()), LDC, &lwork)); \ Kokkos::Profiling::popRegion(); \ return (int64_t)lwork; \ } \ From fb97b7dd095ee3abb096b2637f8ffac9bf16042b Mon Sep 17 00:00:00 2001 From: dialecticDolt Date: Thu, 4 Nov 2021 15:17:45 -0500 Subject: [PATCH 6/8] Adding additional documentation of workspace queries UNMQR and GEQRF --- src/blas/KokkosBlas_geqrf.hpp | 28 +++++++++++++++++++- src/blas/KokkosBlas_unmqr.hpp | 48 +++++++++++++++++++++++++++++++++-- 2 files changed, 73 insertions(+), 3 deletions(-) diff --git a/src/blas/KokkosBlas_geqrf.hpp b/src/blas/KokkosBlas_geqrf.hpp index 7ab409754e..7a99ea7227 100644 --- a/src/blas/KokkosBlas_geqrf.hpp +++ b/src/blas/KokkosBlas_geqrf.hpp @@ -65,7 +65,8 @@ namespace KokkosBlas { /// On entry, M-by-N matrix /// On exit, overwritten with the solution. /// \param tau [in] Input vector, as a 1-D Kokkos::View. Scalar factors of -/// reflectors. \param workspace [in] Input vector, as a 1-D Kokkos::View. +/// reflectors. +/// \param workspace [in] Input vector, as a 1-D Kokkos::View. /// Scratchspace for calculations. template @@ -134,6 +135,19 @@ void geqrf(AViewType& A, TauViewType& tau, WViewType& workspace) { } // function geqrf +/// \brief Returns the required workspace of geqrf. ( Compute the QR +/// factorization of M x N matrix A. (geqrf) ) +/// +/// \return int64_t length of the required workspace +/// \tparam AViewType Input(A) / Output (Solution) M x N matrix , as a 2-D +/// Kokkos::View \tparam TauViewType Input k vector , as a 1-D Kokkos::View +/// \tparam WViewType Input Workspace, as a 1-D Kokkos::View +/// +/// \param A [in] Input matrix, as a 2-D Kokkos::View. Can be +/// uninitialized for workspace queries just needs to be the correct size. +/// \param tau [in] Input vector, as a 1-D Kokkos::View. Scalar factors of +/// reflectors. Can be empty for workspace queries. + template int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { // return if degenerate matrix provided @@ -160,6 +174,18 @@ int64_t geqrf_workspace(AViewType& A, TauViewType& tau) { } // function geqrf_workspace +/// \brief Compute the QR factorization of M x N matrix A. (geqrf). Allocates an +/// internal workspace. + +/// \tparam AViewType Input(A) / Output (Solution) M x N matrix , as a 2-D +/// Kokkos::View \tparam TauViewType Input k vector , as a 1-D Kokkos::View +/// +/// \param A [in, out] Input matrix, as a 2-D Kokkos::View +/// On entry, M-by-N matrix +/// On exit, overwritten with the solution. +/// \param tau [in] Input vector, as a 1-D Kokkos::View. Scalar factors of +/// reflectors. + template void geqrf(AViewType& A, TauViewType& tau) { int64_t lwork = geqrf_workspace(A, tau); diff --git a/src/blas/KokkosBlas_unmqr.hpp b/src/blas/KokkosBlas_unmqr.hpp index a6fe78d3e2..b9a87611d5 100644 --- a/src/blas/KokkosBlas_unmqr.hpp +++ b/src/blas/KokkosBlas_unmqr.hpp @@ -70,8 +70,10 @@ namespace KokkosBlas { // "T" or "t" for transpose /// \param k [in] Number of elementary reflectors that define Q /// \param A [in] Input matrix, as a 2-D Kokkos::View, output of geqrf or -/// geqp3. \param tau [in] Input vector, as a 1-D Kokkos::View. Scalar factors -/// of reflectors. \param C [in,out] Input/Output matrix, as a 2-D Kokkos::View +/// geqp3. +/// \param tau [in] Input vector, as a 1-D Kokkos::View. Scalar factors +/// of reflectors. +/// \param C [in,out] Input/Output matrix, as a 2-D Kokkos::View /// On entry, M-by-N matrix /// On exit, overwritten with the solution. /// \param workspace [in] Input vector, as a 1-D Kokkos::View. Scratchspace for @@ -209,6 +211,29 @@ void unmqr(const char side[], const char trans[], int k, AViewType& A, } // function unmqr +/// \brief Returns the length of workspace needed for unmqr (Multiply +/// rectangular matrix C by Q or Q^H (where Q is the unitary output of QR by +/// geqrf or geqp3)). + +/// \tparam AViewType Input matrix M-by-k matrix , as a 2-D Kokkos::View +/// \tparam CViewType Input (RHS)/Output (Solution) M-by-N matrix, as a 2-D +/// Kokkos::View \tparam TauViewType Input k vector , as a 1-D Kokkos::View + +/// \return int64_t length of required workspace +/// \param side [in] "L" or "l" indicates matrix Q is applied on the left of C +/// "R" or "r" indicates matrix Q is applied on the right of C +/// \param transpose [in] Specifies what op does to Q: +// "N" or "n" for non-transpose, +// "T" or "t" for transpose +/// \param k [in] Number of elementary reflectors that define Q +/// \param A [in] Input matrix, as a 2-D Kokkos::View, output of geqrf or +/// geqp3. Can be empty for workspace queries, just needs to be the correct +/// size. +///\param tau [in] Input vector, as a 1-D Kokkos::View. Scalar factors +/// of reflectors. Can be empty for workspace queries. +/// \param C [in] Input/Output unmqr matrix, as a 2-D Kokkos::View. Can be empty +/// for workspace queries, just needs to be the correct size. + template int64_t unmqr_workspace(const char side[], const char trans[], int k, AViewType& A, TauViewType& tau, CViewType& C) { @@ -244,6 +269,25 @@ int64_t unmqr_workspace(const char side[], const char trans[], int k, } // function unmqr_workspace +/// \brief Multiply rectangular matrix C by Q or Q^H (where Q is the unitary +/// output of QR by geqrf or geqp3). Allocates a workspace internally. + +/// \tparam AViewType Input matrix M-by-k matrix , as a 2-D Kokkos::View +/// \tparam CViewType Input (RHS)/Output (Solution) M-by-N matrix, as a 2-D +/// Kokkos::View \tparam TauViewType Input k vector , as a 1-D Kokkos::View +/// +/// \param side [in] "L" or "l" indicates matrix Q is applied on the left of C +/// "R" or "r" indicates matrix Q is applied on the right of C +/// \param transpose [in] Specifies what op does to Q: +// "N" or "n" for non-transpose, +// "T" or "t" for transpose +/// \param k [in] Number of elementary reflectors that define Q +/// \param A [in] Input matrix, as a 2-D Kokkos::View, output of geqrf or +/// geqp3. \param tau [in] Input vector, as a 1-D Kokkos::View. Scalar factors +/// of reflectors. \param C [in,out] Input/Output matrix, as a 2-D Kokkos::View +/// On entry, M-by-N matrix +/// On exit, overwritten with the solution. + template void unmqr(const char side[], const char trans[], int k, AViewType& A, TauViewType& tau, CViewType& C) { From 3f3df37430b5de2159bedc7f4b790a3e297685c7 Mon Sep 17 00:00:00 2001 From: William Ruys Date: Wed, 8 Dec 2021 23:36:04 -0700 Subject: [PATCH 7/8] Removed parameter names in non-TPL placeholders --- src/blas/impl/KokkosBlas_geqrf_impl.hpp | 4 ++-- src/blas/impl/KokkosBlas_unmqr_impl.hpp | 7 +++---- src/impl/tpls/KokkosBlas_tpl_spec.hpp | 1 + 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/blas/impl/KokkosBlas_geqrf_impl.hpp b/src/blas/impl/KokkosBlas_geqrf_impl.hpp index ca77e6beb3..4955a4e298 100644 --- a/src/blas/impl/KokkosBlas_geqrf_impl.hpp +++ b/src/blas/impl/KokkosBlas_geqrf_impl.hpp @@ -55,7 +55,7 @@ namespace Impl { // Put non TPL implementation here template -void execute_geqrf(AVT& A, TVT& tau, WVT& C) { +void execute_geqrf(AVT& /*A*/, TVT& /*tau*/, WVT& /*C*/) { std::ostringstream os; os << "There is no ETI implementation of GEQRF. Compile with TPL (LAPACKE or " "CUSOLVER).\n"; @@ -63,7 +63,7 @@ void execute_geqrf(AVT& A, TVT& tau, WVT& C) { } template -int64_t execute_geqrf_workspace(AVT& A, TVT& tau) { +int64_t execute_geqrf_workspace(AVT& /*A*/, TVT& /*tau*/) { std::ostringstream os; os << "There is no ETI implementation of GEQRF (Workspace Query). Compile " "with TPL (LAPACKE or CUSOLVER).\n"; diff --git a/src/blas/impl/KokkosBlas_unmqr_impl.hpp b/src/blas/impl/KokkosBlas_unmqr_impl.hpp index ff1888838b..84b0df2691 100644 --- a/src/blas/impl/KokkosBlas_unmqr_impl.hpp +++ b/src/blas/impl/KokkosBlas_unmqr_impl.hpp @@ -55,8 +55,8 @@ namespace Impl { // Put non TPL implementation here template -void execute_unmqr(char side, char trans, int k, AVT& A, TVT& tau, CVT& C, - WVT& workspace) { +void execute_unmqr(char /*side*/, char /*trans*/, int /*k*/, AVT& /*A*/, TVT& /*tau*/, CVT& /*C*/, + WVT& /*workspace*/) { std::ostringstream os; os << "There is no ETI implementation of UNMQR. Compile with TPL (LAPACKE or " "CUSOLVER).\n"; @@ -64,8 +64,7 @@ void execute_unmqr(char side, char trans, int k, AVT& A, TVT& tau, CVT& C, } template -int64_t execute_unmqr_workspace(char side, char trans, int k, AVT& A, TVT& tau, - CVT& C) { +int64_t execute_unmqr_workspace(char /*side*/, char /*trans*/, int /*k*/, AVT& /*A*/, TVT& /*tau*/, CVT& /*C*/) { std::ostringstream os; os << "There is no ETI implementation of UNMQR Workspace. Compile with TPL " "(LAPACKE or CUSOLVER).\n"; diff --git a/src/impl/tpls/KokkosBlas_tpl_spec.hpp b/src/impl/tpls/KokkosBlas_tpl_spec.hpp index 1ebc30f48f..a7ff5ca93e 100644 --- a/src/impl/tpls/KokkosBlas_tpl_spec.hpp +++ b/src/impl/tpls/KokkosBlas_tpl_spec.hpp @@ -284,6 +284,7 @@ inline void cusolver_internal_error_throw(cusolverStatus_t cublasState, } // namespace Impl } // namespace KokkosBlas + #endif // KOKKOSKERNELS_ENABLE_TPL_CUSOLVER // If LAPACK TPL is enabled, it is preferred over magma's LAPACK From f98d26a4a2c79c629dbe45d0a4b1bb37c555844e Mon Sep 17 00:00:00 2001 From: William Ruys Date: Thu, 9 Dec 2021 10:41:49 -0700 Subject: [PATCH 8/8] Fix typo in LayoutRight specialization --- unit_test/blas/Test_Blas_qr.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unit_test/blas/Test_Blas_qr.hpp b/unit_test/blas/Test_Blas_qr.hpp index 540f688e09..51d262f7dd 100644 --- a/unit_test/blas/Test_Blas_qr.hpp +++ b/unit_test/blas/Test_Blas_qr.hpp @@ -219,10 +219,10 @@ int test_qr() { #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || (!defined(KOKKOSKERNELS_ETI_ONLY) && !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) typedef Kokkos::View view_type_a_lr; typedef Kokkos::View view_type_b_lr; - Test::impl_test_qr(0,0); - Test::impl_test_qr(13,15); - Test::impl_test_qr(179,15); - Test::impl_test_qr(12,323); + Test::impl_test_qr(0,0); + Test::impl_test_qr(13,15); + Test::impl_test_qr(179,15); + Test::impl_test_qr(12,323); #endif return 1;