diff --git a/batched/dense/impl/KokkosBatched_SVD_Serial_Impl.hpp b/batched/dense/impl/KokkosBatched_SVD_Serial_Impl.hpp index e1d024ef39..f915f56cd8 100644 --- a/batched/dense/impl/KokkosBatched_SVD_Serial_Impl.hpp +++ b/batched/dense/impl/KokkosBatched_SVD_Serial_Impl.hpp @@ -39,6 +39,12 @@ KOKKOS_INLINE_FUNCTION int SerialSVD::invoke(SVD_USV_Tag, const AViewType &A, co Vt.stride(0), Vt.stride(1), sigma.data(), sigma.stride(0), work.data(), tol); } +// Tell users how much memory the work buffer needs +template +KOKKOS_INLINE_FUNCTION static size_t work_size(const AViewType &A) { + return Kokkos::max(A.extent_int(0), A.extent_int(1)); +} + // Version which computes only singular values template KOKKOS_INLINE_FUNCTION int SerialSVD::invoke(SVD_S_Tag, const AViewType &A, const SViewType &sigma, diff --git a/batched/dense/src/KokkosBatched_SVD_Decl.hpp b/batched/dense/src/KokkosBatched_SVD_Decl.hpp index a022d826cc..9a9ad66f8e 100644 --- a/batched/dense/src/KokkosBatched_SVD_Decl.hpp +++ b/batched/dense/src/KokkosBatched_SVD_Decl.hpp @@ -61,6 +61,10 @@ struct SerialSVD { SVD_USV_Tag, const AViewType &A, const UViewType &U, const SViewType &s, const VtViewType &Vt, const WViewType &W, typename AViewType::const_value_type tol = Kokkos::ArithTraits::zero()); + // Get the workspace size required for a given matrix. + template + KOKKOS_INLINE_FUNCTION static size_t work_size(const AViewType &A); + // Version which computes only singular values template KOKKOS_INLINE_FUNCTION static int invoke( diff --git a/batched/dense/unit_test/Test_Batched_SerialSVD.hpp b/batched/dense/unit_test/Test_Batched_SerialSVD.hpp index bdfdeec276..b8d9d2ee73 100644 --- a/batched/dense/unit_test/Test_Batched_SerialSVD.hpp +++ b/batched/dense/unit_test/Test_Batched_SerialSVD.hpp @@ -497,6 +497,21 @@ Kokkos::View getTestCase(int testCase) { Ahost = MatrixHost("A5", m, n); break; } + case 6: { + m = 3; + n = 2; + Ahost = MatrixHost("A6", m, n); + + Ahost(0, 0) = -1.6175067619642277e-05; + Ahost(1, 0) = -1.6175067619642270e-05; + Ahost(2, 0) = 3.0662409276442540e-21; + + Ahost(0, 1) = 1.6175067619642277e-05; + Ahost(1, 1) = -1.6175067619642277e-05; + Ahost(2, 1) = 2.3002860307475551e-21; + + break; + } default: throw std::runtime_error("Test case out of bounds."); } Kokkos::View A(Ahost.label(), m, n); @@ -509,7 +524,7 @@ void testSpecialCases() { using Matrix = Kokkos::View; using Vector = Kokkos::View; using ExecSpace = typename Device::execution_space; - for (int i = 0; i < 6; i++) { + for (int i = 0; i < 7; i++) { Matrix A = getTestCase(i); int m = A.extent(0); int n = A.extent(1); diff --git a/docs/source/API/batched-index.rst b/docs/source/API/batched-index.rst index 14e99d5c68..045988efe7 100644 --- a/docs/source/API/batched-index.rst +++ b/docs/source/API/batched-index.rst @@ -1,6 +1,12 @@ API: Batched ============ +.. toctree:: + :maxdepth: 2 + :hidden: + + batched/dense/batched_svd_serial.rst + .. note:: Documentation for the batched algorithms is upcoming. diff --git a/example/wiki/CMakeLists.txt b/example/wiki/CMakeLists.txt index 1e751f5797..76211297d5 100644 --- a/example/wiki/CMakeLists.txt +++ b/example/wiki/CMakeLists.txt @@ -1,3 +1,4 @@ ADD_SUBDIRECTORY(blas) ADD_SUBDIRECTORY(sparse) ADD_SUBDIRECTORY(graph) +ADD_SUBDIRECTORY(batched) diff --git a/example/wiki/batched/CMakeLists.txt b/example/wiki/batched/CMakeLists.txt new file mode 100644 index 0000000000..9af4e69e52 --- /dev/null +++ b/example/wiki/batched/CMakeLists.txt @@ -0,0 +1 @@ +ADD_SUBDIRECTORY(dense) diff --git a/example/wiki/batched/dense/CMakeLists.txt b/example/wiki/batched/dense/CMakeLists.txt new file mode 100644 index 0000000000..e66d1f9e09 --- /dev/null +++ b/example/wiki/batched/dense/CMakeLists.txt @@ -0,0 +1,9 @@ +KOKKOSKERNELS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +KOKKOSKERNELS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +KOKKOSKERNELS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../../../../test_common) + +KOKKOSKERNELS_ADD_EXECUTABLE_AND_TEST( + wiki_batched_dense_svd + SOURCES KokkosBatched_wiki_svd.cpp + ) diff --git a/example/wiki/batched/dense/KokkosBatched_wiki_svd.cpp b/example/wiki/batched/dense/KokkosBatched_wiki_svd.cpp new file mode 100644 index 0000000000..ebcc29354d --- /dev/null +++ b/example/wiki/batched/dense/KokkosBatched_wiki_svd.cpp @@ -0,0 +1,107 @@ +#include +#include +#include + +template +struct SVD_functor { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + + MatInfoType mat_info; + MatValuesType mat_values; + SingularValuesType S; + WorkspaceType W; + OffsetType s_offset, w_offset; + + SVD_functor(MatInfoType mat_info_, MatValuesType mat_values_, SingularValuesType S_, WorkspaceType W_, + OffsetType s_offset_, OffsetType w_offset_) + : mat_info(mat_info_), mat_values(mat_values_), S(S_), W(W_), s_offset(s_offset_), w_offset(w_offset_) {} + + void KOKKOS_FUNCTION operator()(const int matIdx) const { + Kokkos::View> A( + &mat_values(mat_info(matIdx, 0)), mat_info(matIdx, 1), mat_info(matIdx, 2)); + Kokkos::View> s( + &S(s_offset(matIdx)), s_offset(matIdx + 1) - s_offset(matIdx)); + Kokkos::View> w( + &W(w_offset(matIdx)), w_offset(matIdx + 1) - w_offset(matIdx)); + + KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_S_Tag{}, A, s, w); + } +}; + +int main(int argc, char* argv[]) { + Kokkos::initialize(argc, argv); + { + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + + constexpr int numMats = 2; + + // We want the following matrices in 1D storage + // using a FORTRAN or column wise ordering. + // + // A1 = [2, 1, 4] A2 = [3, 0] + // [-1, 2, -2] [4, 5] + Kokkos::View mat_values("values storage", 10); + auto mat_values_h = Kokkos::create_mirror_view(mat_values); + mat_values_h(0) = 2; + mat_values_h(2) = 1; + mat_values_h(4) = 4; + mat_values_h(1) = -1; + mat_values_h(3) = 2; + mat_values_h(5) = -2; + + mat_values_h(6) = 3; + mat_values_h(8) = 0; + mat_values_h(7) = 4; + mat_values_h(9) = 5; + Kokkos::deep_copy(mat_values, mat_values_h); + + // To help our functor extract matrices from 1D storage, + // we specify offset, numRows and numCols for each matrix. + Kokkos::View mat_info("matrices info", numMats, 3); + auto mat_info_h = Kokkos::create_mirror_view(mat_info); + mat_info_h(0, 0) = 0; + mat_info_h(0, 1) = 2; + mat_info_h(0, 2) = 3; + mat_info_h(1, 0) = 6; + mat_info_h(1, 1) = 2; + mat_info_h(1, 2) = 2; + Kokkos::deep_copy(mat_info, mat_info_h); + + Kokkos::View s_offset("s offsets", 3), w_offset("w offsets", 3); + auto s_offset_h = Kokkos::create_mirror_view(s_offset); + auto w_offset_h = Kokkos::create_mirror_view(w_offset); + for (int matIdx = 0; matIdx < numMats; ++matIdx) { + s_offset_h(matIdx + 1) = s_offset_h(matIdx) + Kokkos::min(mat_info_h(matIdx, 1), mat_info_h(matIdx, 2)); + w_offset_h(matIdx + 1) = w_offset_h(matIdx) + Kokkos::max(mat_info_h(matIdx, 1), mat_info_h(matIdx, 2)); + } + Kokkos::deep_copy(s_offset, s_offset_h); + Kokkos::deep_copy(w_offset, w_offset_h); + + Kokkos::View S("singular values", 4), W("workspace", 5); + + SVD_functor svd_calculator(mat_info, mat_values, S, W, s_offset, w_offset); + Kokkos::parallel_for(numMats, svd_calculator); + + auto S_h = Kokkos::create_mirror_view(S); + Kokkos::deep_copy(S_h, S); + if (Kokkos::abs(S_h(0) - 5) > 1e-14) { + std::cout << "Large singular value of the first matrix is " << S_h(0) << "instead of 5!" << std::endl; + } + if (Kokkos::abs(S_h(1) - Kokkos::sqrt(5)) > 1e-14) { + std::cout << "Large singular value of the first matrix is " << S_h(1) << "instead of " << Kokkos::sqrt(5) << "!" + << std::endl; + } + if (Kokkos::abs(S_h(2) - Kokkos::sqrt(45)) > 1e-14) { + std::cout << "Large singular value of the first matrix is " << S_h(2) << "instead of " << Kokkos::sqrt(45) << "!" + << std::endl; + } + if (Kokkos::abs(S_h(3) - Kokkos::sqrt(5)) > 1e-14) { + std::cout << "Large singular value of the first matrix is " << S_h(3) << "instead of " << Kokkos::sqrt(5) << "!" + << std::endl; + } + + std::cout << "Singular Values of the first matrix: " << S_h(0) << ", " << S_h(1) << std::endl; + std::cout << "Singular Values of the second matrix: " << S_h(2) << ", " << S_h(3) << std::endl; + } + Kokkos::finalize(); +} diff --git a/perf_test/blas/blas1/KokkosBlas_team_dot_perf_test_benchmark.cpp b/perf_test/blas/blas1/KokkosBlas_team_dot_perf_test_benchmark.cpp index e07cadd647..e9a0e298d4 100644 --- a/perf_test/blas/blas1/KokkosBlas_team_dot_perf_test_benchmark.cpp +++ b/perf_test/blas/blas1/KokkosBlas_team_dot_perf_test_benchmark.cpp @@ -79,8 +79,7 @@ struct teamDotFunctor { template static void run(benchmark::State& state) { - const auto m = state.range(0); - const auto repeat = state.range(1); + const auto m = state.range(0); // Declare type aliases using Scalar = double; using MemSpace = typename ExecSpace::memory_space;