diff --git a/.gitignore b/.gitignore index bb4134be..b58d5e03 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ .DS_Store build/* _build +LastTest.log +CTestCostData.txt install/* *~ *.swp diff --git a/CHANGELOG.md b/CHANGELOG.md index a7d57662..fd9c56fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,8 @@ - Added Developer Guide documentation for vector and matrix classes and handlers. +- Remove automatic memory allocation and memory syncing in various memory management functions and require the user to explicitly manage their own memory. + ## Changes to Re::Solve in release 0.99.2 ### Major Features diff --git a/examples/ExampleHelper.hpp b/examples/ExampleHelper.hpp index 50f5716b..7adf25c7 100644 --- a/examples/ExampleHelper.hpp +++ b/examples/ExampleHelper.hpp @@ -130,6 +130,7 @@ namespace ReSolve if (res_ == nullptr) { res_ = new ReSolve::vector::Vector(A->getNumRows()); + res_->allocate(memspace_); } else { @@ -137,6 +138,7 @@ namespace ReSolve { delete res_; res_ = new ReSolve::vector::Vector(A->getNumRows()); + res_->allocate(memspace_); } } diff --git a/examples/experimental/r_KLU_GLU_matrix_values_update.cpp b/examples/experimental/r_KLU_GLU_matrix_values_update.cpp index e74b1fa5..578d49ce 100644 --- a/examples/experimental/r_KLU_GLU_matrix_values_update.cpp +++ b/examples/experimental/r_KLU_GLU_matrix_values_update.cpp @@ -121,6 +121,7 @@ int main(int argc, char* argv[]) ReSolve::io::updateArrayFromFile(rhs_file, &rhs); } // Copy matrix data to device + A->allocateMatrixData(ReSolve::memory::DEVICE); A->syncData(ReSolve::memory::DEVICE); std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() @@ -133,10 +134,12 @@ int main(int argc, char* argv[]) // Update host and device data. if (i < 1) { + vec_rhs->allocate(ReSolve::memory::HOST); vec_rhs->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); } else { + vec_rhs->allocate(ReSolve::memory::DEVICE); vec_rhs->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout << "CSR matrix loaded. Expanded NNZ: " << A->getNnz() << std::endl; @@ -173,6 +176,7 @@ int main(int argc, char* argv[]) status = GLU->solve(vec_rhs, vec_x); std::cout << "CUSOLVER GLU solve status: " << status << std::endl; } + vec_r->allocate(ReSolve::memory::DEVICE); vec_r->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); diff --git a/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp b/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp index 05f6ee4d..af32faae 100644 --- a/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp +++ b/examples/experimental/r_KLU_cusolverrf_redo_factorization.cpp @@ -117,6 +117,7 @@ int main(int argc, char* argv[]) ReSolve::io::updateArrayFromFile(rhs_file, &rhs); } // Copy matrix data to device + A->allocateMatrixData(ReSolve::memory::DEVICE); A->syncData(ReSolve::memory::DEVICE); std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() @@ -129,10 +130,12 @@ int main(int argc, char* argv[]) // Update host and device data. if (i < 2) { + vec_rhs->allocate(ReSolve::memory::HOST); vec_rhs->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); } else { + vec_rhs->allocate(ReSolve::memory::DEVICE); vec_rhs->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout << "CSR matrix loaded. Expanded NNZ: " << A->getNnz() << std::endl; @@ -182,6 +185,7 @@ int main(int argc, char* argv[]) } // Make sure vec_r is properly initialized before using it. + vec_r->allocate(ReSolve::memory::DEVICE); vec_r->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); diff --git a/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp b/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp index 70e4298d..70c8e159 100644 --- a/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp +++ b/examples/experimental/r_KLU_rf_FGMRES_reuse_factorization.cpp @@ -107,6 +107,7 @@ int main(int argc, char* argv[]) vec_x->allocate(ReSolve::memory::HOST); // for KLU vec_x->allocate(ReSolve::memory::DEVICE); vec_r = new vector_type(A->getNumRows()); + vec_r->allocate(ReSolve::memory::DEVICE); } else { @@ -114,6 +115,7 @@ int main(int argc, char* argv[]) ReSolve::io::updateArrayFromFile(rhs_file, &rhs); } // Copy matrix data to device + A->allocateMatrixData(ReSolve::memory::DEVICE); A->syncData(ReSolve::memory::DEVICE); std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() @@ -126,11 +128,14 @@ int main(int argc, char* argv[]) // Update host and device data. if (i < 2) { + vec_rhs->allocate(ReSolve::memory::HOST); vec_rhs->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); } else { + vec_rhs->allocate(ReSolve::memory::HOST); + vec_rhs->allocate(ReSolve::memory::DEVICE); A->setUpdated(ReSolve::memory::HOST); A->syncData(ReSolve::memory::DEVICE); vec_rhs->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); diff --git a/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp b/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp index fce5cf15..b2a48e93 100644 --- a/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp +++ b/examples/experimental/r_KLU_rocsolverrf_redo_factorization.cpp @@ -106,6 +106,7 @@ int main(int argc, char* argv[]) ReSolve::io::updateArrayFromFile(rhs_file, &rhs); } // Copy matrix data to device + A->allocateMatrixData(ReSolve::memory::DEVICE); A->syncData(ReSolve::memory::DEVICE); std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() << " x " << A->getNumColumns() @@ -118,10 +119,12 @@ int main(int argc, char* argv[]) // Update host and device data. if (i < 2) { + vec_rhs->allocate(ReSolve::memory::HOST); vec_rhs->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); } else { + vec_rhs->allocate(ReSolve::memory::DEVICE); vec_rhs->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); } std::cout << "CSR matrix loaded. Expanded NNZ: " << A->getNnz() << std::endl; @@ -158,6 +161,7 @@ int main(int argc, char* argv[]) } // Check accuracy of the solution + vec_rhs->allocate(ReSolve::memory::DEVICE); vec_r->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler->setValuesChanged(true, ReSolve::memory::DEVICE); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); diff --git a/examples/gluRefactor.cpp b/examples/gluRefactor.cpp index 9362540a..3a9ef4e2 100644 --- a/examples/gluRefactor.cpp +++ b/examples/gluRefactor.cpp @@ -221,7 +221,9 @@ int gluRefactor(int argc, char* argv[]) rhs_file.close(); // Copy data to device + A->allocateMatrixData(memory::DEVICE); A->syncData(memory::DEVICE); + vec_rhs->allocate(memory::DEVICE); vec_rhs->syncData(memory::DEVICE); RESOLVE_RANGE_POP("File input"); diff --git a/examples/gpuRefactor.cpp b/examples/gpuRefactor.cpp index d7d23fbc..010d9e0f 100644 --- a/examples/gpuRefactor.cpp +++ b/examples/gpuRefactor.cpp @@ -249,7 +249,9 @@ int gpuRefactor(int argc, char* argv[]) rhs_file.close(); // Copy data to device + A->allocateMatrixData(memory::DEVICE); A->syncData(memory::DEVICE); + vec_rhs->allocate(memory::DEVICE); vec_rhs->syncData(memory::DEVICE); RESOLVE_RANGE_POP("File input"); diff --git a/examples/randGmres.cpp b/examples/randGmres.cpp index 0a46d8da..442eef4c 100644 --- a/examples/randGmres.cpp +++ b/examples/randGmres.cpp @@ -162,8 +162,10 @@ int runGmresExample(int argc, char* argv[]) vec_x->allocate(memspace); if (memspace == memory::DEVICE) { - A->syncData(memspace); - vec_rhs->syncData(memspace); + A->allocateMatrixData(memory::DEVICE); + A->syncData(memory::DEVICE); + vec_rhs->allocate(memory::DEVICE); + vec_rhs->syncData(memory::DEVICE); } printSystemInfo(matrix_pathname, A); diff --git a/examples/sysGmres.cpp b/examples/sysGmres.cpp index 538510ed..c1fb4958 100644 --- a/examples/sysGmres.cpp +++ b/examples/sysGmres.cpp @@ -223,8 +223,10 @@ int sysGmres(int argc, char* argv[]) if (memspace == memory::DEVICE) { // Copy data to the device - A->syncData(memspace); - vec_rhs->syncData(memspace); + A->allocateMatrixData(memory::DEVICE); + A->syncData(memory::DEVICE); + vec_rhs->allocate(memory::DEVICE); + vec_rhs->syncData(memory::DEVICE); } mat_file.close(); diff --git a/examples/sysRefactor.cpp b/examples/sysRefactor.cpp index 6f78f4f9..e0e416fd 100644 --- a/examples/sysRefactor.cpp +++ b/examples/sysRefactor.cpp @@ -306,7 +306,9 @@ int sysRefactor(int argc, char* argv[]) // Ensure matrix data is synced to the device before any GPU operations if (hw_backend == "CUDA" || hw_backend == "HIP") { + A->allocateMatrixData(memory::DEVICE); A->syncData(memory::DEVICE); + vec_rhs->allocate(memory::DEVICE); vec_rhs->syncData(memory::DEVICE); } @@ -315,7 +317,10 @@ int sysRefactor(int argc, char* argv[]) double solve_time_ms = 0.0; - syncDevice(hw_backend); + if (hw_backend == "CUDA" || hw_backend == "HIP") + { + syncDevice(hw_backend); + } auto solve_start = std::chrono::high_resolution_clock::now(); // Now call direct solver @@ -348,7 +353,10 @@ int sysRefactor(int argc, char* argv[]) } status = solver.solve(vec_rhs, vec_x); - syncDevice(hw_backend); + if (hw_backend == "CUDA" || hw_backend == "HIP") + { + syncDevice(hw_backend); + } auto solve_end = std::chrono::high_resolution_clock::now(); solve_time_ms = std::chrono::duration(solve_end - solve_start).count(); std::cout << "Triangular solve status: " << status << std::endl; diff --git a/resolve/GramSchmidt.cpp b/resolve/GramSchmidt.cpp index c8935dcd..b5c29bc7 100644 --- a/resolve/GramSchmidt.cpp +++ b/resolve/GramSchmidt.cpp @@ -191,6 +191,10 @@ namespace ReSolve // copy H_col to aux, we will need it later vec_Hcolumn_->setDataUpdated(memspace_); vec_Hcolumn_->resize(i + 1); + if (memspace_ == memory::DEVICE) + { + vec_Hcolumn_->syncData(memory::HOST); + } vec_Hcolumn_->copyToExternal(h_aux_, 0, memory::HOST, memory::HOST); // Hcol = V(:,1:i)^T*V(:,i+1); @@ -201,6 +205,10 @@ namespace ReSolve // copy H_col to H vec_Hcolumn_->setDataUpdated(memspace_); + if (memspace_ == memory::DEVICE) + { + vec_Hcolumn_->syncData(memory::HOST); + } vec_Hcolumn_->copyToExternal(&H[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST, memory::HOST); // add both pieces together (unstable otherwise, careful here!!) @@ -379,6 +387,10 @@ namespace ReSolve // copy H_col to H vec_Hcolumn_->setDataUpdated(memspace_); + if (memspace_ == memory::DEVICE) + { + vec_Hcolumn_->syncData(memory::HOST); + } vec_Hcolumn_->resize(i + 1); vec_Hcolumn_->copyToExternal(&H[idxmap(i, 0, num_vecs_ + 1)], 0, memory::HOST, memory::HOST); diff --git a/resolve/LinSolverDirectCuSolverGLU.cpp b/resolve/LinSolverDirectCuSolverGLU.cpp index bd47b492..9f314ce4 100644 --- a/resolve/LinSolverDirectCuSolverGLU.cpp +++ b/resolve/LinSolverDirectCuSolverGLU.cpp @@ -184,6 +184,9 @@ namespace ReSolve return error_sum; } + /** + * @brief Solve the system. Input vector data must be synced on the device. + */ int LinSolverDirectCuSolverGLU::solve(vector_type* rhs, vector_type* x) { RESOLVE_RANGE_PUSH(__FUNCTION__); diff --git a/resolve/LinSolverDirectRocSolverRf.cpp b/resolve/LinSolverDirectRocSolverRf.cpp index c39fa3a0..694bd244 100644 --- a/resolve/LinSolverDirectRocSolverRf.cpp +++ b/resolve/LinSolverDirectRocSolverRf.cpp @@ -77,6 +77,7 @@ namespace ReSolve combineFactors(L, U); M_->setUpdated(ReSolve::memory::HOST); + M_->allocateMatrixData(ReSolve::memory::DEVICE); M_->syncData(ReSolve::memory::DEVICE); // remember - P and Q are generally CPU variables diff --git a/resolve/SystemSolver.cpp b/resolve/SystemSolver.cpp index 29a82753..20c500c3 100644 --- a/resolve/SystemSolver.cpp +++ b/resolve/SystemSolver.cpp @@ -453,6 +453,8 @@ namespace ReSolve L_ = factorizationSolver_->getLFactor(); U_ = factorizationSolver_->getUFactor(); + L_->allocateMatrixData(memory::DEVICE); + U_->allocateMatrixData(memory::DEVICE); P_ = factorizationSolver_->getPOrdering(); Q_ = factorizationSolver_->getQOrdering(); @@ -862,6 +864,7 @@ namespace ReSolve else { resVector_->copyFromExternal(rhs, memory::HOST, memory::DEVICE); + resVector_->syncData(memory::HOST); norm_b = std::sqrt(vectorHandler_->dot(resVector_, resVector_, memory::HOST)); // ms = memory::HOST; } diff --git a/resolve/hykkt/HyKKTSolver.cpp b/resolve/hykkt/HyKKTSolver.cpp index d4d4eb05..1a37685c 100644 --- a/resolve/hykkt/HyKKTSolver.cpp +++ b/resolve/hykkt/HyKKTSolver.cpp @@ -57,56 +57,6 @@ namespace ReSolve delete sccg_; } - /* - * @brief loads KKT system into solver. Note: matrices and vectors, - * including the LHS (output) vectors, must be allocated by the - * caller beforehand and destroyed after. - * - * @param file names for different components of KKT system - * with same nonzero structure - * - * @post H_, D_s_, J_, J_d_, r_x_, r_s_, r_y_, - * r_yd_ have new values for the system in a following - * solver iteration with same nonzero structure as - * previous iterations - */ - void hykkt::HyKKTSolver::readMatrixFiles( - std::istream& H_file, - std::istream& D_s_file, - std::istream& J_file, - std::istream& J_d_file, - std::istream& r_x_file, - std::istream& rs_file, - std::istream& r_y_file, - std::istream& r_yd_file) - { - io::updateMatrixFromFile(H_file, H_); - io::updateMatrixFromFile(D_s_file, D_s_); - io::updateMatrixFromFile(J_file, J_); - io::updateMatrixFromFile(J_d_file, J_d_); // J_d_tr_ will be populated later - - io::updateVectorFromFile(r_x_file, r_x_); - io::updateVectorFromFile(rs_file, r_s_); - io::updateVectorFromFile(r_y_file, r_y_); - io::updateVectorFromFile(r_yd_file, r_yd_); - - if (memspace_ == memory::DEVICE) - { - H_->syncData(memory::DEVICE); - D_s_->syncData(memory::DEVICE); - J_->syncData(memory::DEVICE); - J_d_->syncData(memory::DEVICE); - r_x_->syncData(memory::DEVICE); - r_s_->syncData(memory::DEVICE); - r_y_->syncData(memory::DEVICE); - r_yd_->syncData(memory::DEVICE); - } - - bool J_d_flag = J_d_->getNnz() > 0; - status_ = (J_d_flag_ == J_d_flag); // if new nonzero structure then broken - J_d_flag_ = J_d_flag; - } - /** * @brief Sets blocks of the KKT matrix in CSR format to user provided * values. It will only set pointers to user provided data; it is user's @@ -293,17 +243,20 @@ namespace ReSolve J_d_scaled_ = new matrix::Csr(J_d_->getNumRows(), J_d_->getNumColumns(), J_d_->getNnz()); D_s_vals_->setData(D_s_->getValues(memspace_), memspace_); + r_yd_scaled_->allocate(memspace_); r_x_perm_->allocate(memspace_); omega_perm_->allocate(memspace_); schur_->allocate(memspace_); r_x_til_->allocate(memspace_); r_x_hat_->allocate(memspace_); z_->allocate(memspace_); - J_tr_->allocateMatrixData(memspace_); + r_y_copy_->allocate(memspace_); + J_tr_->allocateAll(memspace_); J_d_tr_->allocateMatrixData(memspace_); - J_tr_perm_->allocateMatrixData(memory::HOST); - J_perm_->allocateMatrixData(memory::HOST); + J_perm_->allocateAll(memspace_); + J_tr_perm_->allocateAll(memspace_); J_d_scaled_->allocateWithExternalSparsityPattern(J_d_->getRowData(memspace_), J_d_->getColData(memspace_), J_d_->getNnz(), memspace_); + // H_tilde_ does not need to be allocated because loadResultMatrix() does it later } else if (memspace_ == memory::DEVICE) { @@ -360,10 +313,11 @@ namespace ReSolve } else { + H_tilde_->setNnz(H_->getNnz()); + H_tilde_->allocateMatrixData(memspace_); H_tilde_->copyFromExternal(H_->getRowData(memspace_), H_->getColData(memspace_), H_->getValues(memspace_), - H_->getNnz(), memspace_, memspace_); @@ -384,8 +338,10 @@ namespace ReSolve { if (!allocated_) { - J_copy_ = new matrix::Csr(J_->getNumRows(), J_->getNumColumns(), J_->getNnz()); + J_copy_ = new matrix::Csr(J_->getNumRows(), J_->getNumColumns(), J_->getNnz()); + J_copy_->allocateMatrixData(memspace_); J_tr_copy_ = new matrix::Csr(J_tr_->getNumRows(), J_tr_->getNumColumns(), J_tr_->getNnz()); + J_tr_copy_->allocateMatrixData(memspace_); } J_copy_->copyFromExternal(J_->getRowData(memspace_), J_->getColData(memspace_), @@ -434,7 +390,7 @@ namespace ReSolve spgemm_hgamma_ = new SpGEMM(memspace_, gamma_, ONE); spgemm_hgamma_->loadProductMatrices(J_tr_, J_); spgemm_hgamma_->loadSumMatrix(H_tilde_); - spgemm_hgamma_->loadResultMatrix(&H_gamma_); // H_gamma_ will be created by SpGEMM at this step + spgemm_hgamma_->loadResultMatrix(&H_gamma_); // H_gamma_ will be created when calling SpGEMM->compute() } /* @@ -455,16 +411,13 @@ namespace ReSolve void hykkt::HyKKTSolver::setupPermutation() { H_gamma_perm_ = new matrix::Csr(n_x_, n_x_, H_gamma_->getNnz()); // Nnz not known at setupParameters(), so we have to create it here - H_gamma_perm_->allocateMatrixData(memory::HOST); + H_gamma_perm_->allocateAll(memspace_); if (memspace_ == memory::DEVICE) { + H_gamma_->allocateMatrixData(memory::HOST); H_gamma_->syncData(memory::HOST); J_tr_->syncData(memory::HOST); - - H_gamma_perm_->allocateMatrixData(memory::DEVICE); - J_perm_->allocateMatrixData(memory::DEVICE); - J_tr_perm_->allocateMatrixData(memory::DEVICE); } // These permutation steps are device-only @@ -600,6 +553,10 @@ namespace ReSolve // block-recovering the solution to the original system by parts // this part is to recover delta_x cholesky_->solve(z_, r_x_perm_); + if (memspace_ == memory::DEVICE) + { + x_->syncData(memory::DEVICE); + } permutation_->mapIndex(REV_PERM_V, z_->getData(memspace_), x_->getData(memspace_)); x_->setDataUpdated(memspace_); diff --git a/resolve/matrix/Coo.cpp b/resolve/matrix/Coo.cpp index ccdcc8a8..6cc420be 100644 --- a/resolve/matrix/Coo.cpp +++ b/resolve/matrix/Coo.cpp @@ -221,37 +221,18 @@ namespace ReSolve if (memspaceOut == memory::HOST) { - // check if cpu data allocated - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Coo::copyFromExternal one of host row or column data is null!\n"); - - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) - { - this->h_row_data_ = new index_type[nnz_current]; - this->h_col_data_ = new index_type[nnz_current]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) + if ((h_row_data_ == nullptr) || (h_col_data_ == nullptr) || (h_val_data_ == nullptr)) { - this->h_val_data_ = new real_type[nnz_current]; - owns_cpu_values_ = true; + out::error() << "Trying to copy from external COO matrix, but destination (host) is not allocated!\n"; + return -1; } } - - if (memspaceOut == memory::DEVICE) + else if (memspaceOut == memory::DEVICE) { - // Check if Cuda data is allocated. - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Coo::copyFromExternal one of device row or column data is null!\n"); - - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) - { - mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); - owns_gpu_sparsity_pattern_ = true; - } - if (d_val_data_ == nullptr) + if ((d_row_data_ == nullptr) || (d_col_data_ == nullptr) || (d_val_data_ == nullptr)) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - owns_gpu_values_ = true; + out::error() << "Trying to copy from external COO matrix, but destination (device) is not allocated!\n"; + return -1; } } @@ -287,25 +268,17 @@ namespace ReSolve return 0; } - int matrix::Coo::copyFromExternal(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, - memory::MemorySpace memspaceIn, - memory::MemorySpace memspaceOut) - { - destroyMatrixData(memspaceOut); - nnz_ = new_nnz; - return copyFromExternal(row_data, col_data, val_data, memspaceIn, memspaceOut); - } - int matrix::Coo::allocateMatrixData(memory::MemorySpace memspace) { index_type nnz_current = nnz_; - destroyMatrixData(memspace); // just in case if (memspace == memory::HOST) { + if (h_row_data_ || h_col_data_ || h_val_data_) + { + out::error() << "Trying to allocate COO matrix host data, but matrix host data has already been allocated!\n"; + return 1; + } this->h_row_data_ = new index_type[nnz_current]; std::fill(h_row_data_, h_row_data_ + nnz_current, 0); this->h_col_data_ = new index_type[nnz_current]; @@ -319,6 +292,11 @@ namespace ReSolve if (memspace == memory::DEVICE) { + if (d_row_data_ || d_col_data_ || d_val_data_) + { + out::error() << "Trying to allocate COO matrix device data, but matrix device data has already been allocated!\n"; + return 1; + } mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); @@ -361,17 +339,6 @@ namespace ReSolve << "See Coo::syncData documentation\n."; assert(d_data_updated_); } - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) - { - h_row_data_ = new index_type[nnz_]; - h_col_data_ = new index_type[nnz_]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) - { - h_val_data_ = new real_type[nnz_]; - owns_cpu_values_ = true; - } mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); diff --git a/resolve/matrix/Coo.hpp b/resolve/matrix/Coo.hpp index 7ec3625b..d494fca4 100644 --- a/resolve/matrix/Coo.hpp +++ b/resolve/matrix/Coo.hpp @@ -37,12 +37,6 @@ namespace ReSolve const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual int copyFromExternal(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, - memory::MemorySpace memspaceIn, - memory::MemorySpace memspaceOut); virtual int allocateMatrixData(memory::MemorySpace memspace); diff --git a/resolve/matrix/Csc.cpp b/resolve/matrix/Csc.cpp index 44a0e3e4..6c0ed663 100644 --- a/resolve/matrix/Csc.cpp +++ b/resolve/matrix/Csc.cpp @@ -110,37 +110,18 @@ namespace ReSolve if (memspaceOut == memory::HOST) { - // check if cpu data allocated - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Csc::copyFromExternal one of host row or column data is null!\n"); - - if ((h_col_data_ == nullptr) && (h_row_data_ == nullptr)) - { - this->h_col_data_ = new index_type[m_ + 1]; - this->h_row_data_ = new index_type[nnz_current]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) + if ((h_row_data_ == nullptr) || (h_col_data_ == nullptr) || (h_val_data_ == nullptr)) { - this->h_val_data_ = new real_type[nnz_current]; - owns_cpu_values_ = true; + out::error() << "Trying to copy from external COO matrix, but destination (host) is not allocated!\n"; + return -1; } } - - if (memspaceOut == memory::DEVICE) + else if (memspaceOut == memory::DEVICE) { - // check if cuda data allocated - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Csc::copyFromExternal one of device row or column data is null!\n"); - - if ((d_col_data_ == nullptr) && (d_row_data_ == nullptr)) - { - mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); - mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); - owns_gpu_sparsity_pattern_ = true; - } - if (d_val_data_ == nullptr) + if ((d_row_data_ == nullptr) || (d_col_data_ == nullptr) || (d_val_data_ == nullptr)) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - owns_gpu_values_ = true; + out::error() << "Trying to copy from external COO matrix, but destination (device) is not allocated!\n"; + return -1; } } @@ -176,25 +157,17 @@ namespace ReSolve return 0; } - int matrix::Csc::copyFromExternal(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, - memory::MemorySpace memspaceIn, - memory::MemorySpace memspaceOut) - { - destroyMatrixData(memspaceOut); - nnz_ = new_nnz; - return copyFromExternal(col_data, row_data, val_data, memspaceIn, memspaceOut); - } - int matrix::Csc::allocateMatrixData(memory::MemorySpace memspace) { index_type nnz_current = nnz_; - destroyMatrixData(memspace); // just in case if (memspace == memory::HOST) { + if (h_row_data_ || h_col_data_ || h_val_data_) + { + out::error() << "Trying to allocate CSC matrix host data, but matrix host data has already been allocated!\n"; + return 1; + } this->h_col_data_ = new index_type[m_ + 1]; std::fill(h_col_data_, h_col_data_ + m_ + 1, 0); this->h_row_data_ = new index_type[nnz_current]; @@ -208,6 +181,11 @@ namespace ReSolve if (memspace == memory::DEVICE) { + if (d_row_data_ || d_col_data_ || d_val_data_) + { + out::error() << "Trying to allocate CSC matrix device data, but matrix device data has already been allocated!\n"; + return 1; + } mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); mem_.allocateArrayOnDevice(&d_row_data_, nnz_current); mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); @@ -250,17 +228,6 @@ namespace ReSolve << "See Csc::syncData documentation\n."; assert(d_data_updated_); } - if ((h_col_data_ == nullptr) && (h_row_data_ == nullptr)) - { - h_col_data_ = new index_type[m_ + 1]; - h_row_data_ = new index_type[nnz_]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) - { - h_val_data_ = new real_type[nnz_]; - owns_cpu_values_ = true; - } mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, m_ + 1); mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, nnz_); mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); @@ -281,17 +248,6 @@ namespace ReSolve << "See Csc::syncData documentation\n."; assert(h_data_updated_); } - if ((d_col_data_ == nullptr) && (d_row_data_ == nullptr)) - { - mem_.allocateArrayOnDevice(&d_col_data_, m_ + 1); - mem_.allocateArrayOnDevice(&d_row_data_, nnz_); - owns_gpu_sparsity_pattern_ = true; - } - if (d_val_data_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_); - owns_gpu_values_ = true; - } mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, m_ + 1); mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, nnz_); mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); diff --git a/resolve/matrix/Csc.hpp b/resolve/matrix/Csc.hpp index ce8f41b1..c48fd7f3 100644 --- a/resolve/matrix/Csc.hpp +++ b/resolve/matrix/Csc.hpp @@ -27,12 +27,6 @@ namespace ReSolve const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual int copyFromExternal(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, - memory::MemorySpace memspaceIn, - memory::MemorySpace memspaceOut); virtual int allocateMatrixData(memory::MemorySpace memspace); diff --git a/resolve/matrix/Csr.cpp b/resolve/matrix/Csr.cpp index a851089f..e29d9944 100644 --- a/resolve/matrix/Csr.cpp +++ b/resolve/matrix/Csr.cpp @@ -245,37 +245,18 @@ namespace ReSolve if (memspaceOut == memory::HOST) { - // check if cpu data allocated - assert(((h_row_data_ == nullptr) == (h_col_data_ == nullptr)) && "In Csr::copyFromExternal one of host row or column data is null!\n"); - - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) - { - this->h_row_data_ = new index_type[n_ + 1]; - this->h_col_data_ = new index_type[nnz_current]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) + if ((h_row_data_ == nullptr) || (h_col_data_ == nullptr) || (h_val_data_ == nullptr)) { - this->h_val_data_ = new real_type[nnz_current]; - owns_cpu_values_ = true; + out::error() << "Trying to copy from external COO matrix, but destination (host) is not allocated!\n"; + return -1; } } - - if (memspaceOut == memory::DEVICE) + else if (memspaceOut == memory::DEVICE) { - // check if cuda data allocated - assert(((d_row_data_ == nullptr) == (d_col_data_ == nullptr)) && "In Csr::copyFromExternal one of device row or column data is null!\n"); - - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) - { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); - owns_gpu_values_ = true; - } - if (d_val_data_ == nullptr) + if ((d_row_data_ == nullptr) || (d_col_data_ == nullptr) || (d_val_data_ == nullptr)) { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); - owns_gpu_sparsity_pattern_ = true; + out::error() << "Trying to copy from external COO matrix, but destination (device) is not allocated!\n"; + return -1; } } @@ -312,25 +293,17 @@ namespace ReSolve return 0; } - int matrix::Csr::copyFromExternal(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, - memory::MemorySpace memspaceIn, - memory::MemorySpace memspaceOut) - { - destroyMatrixData(memspaceOut); - nnz_ = new_nnz; - return copyFromExternal(row_data, col_data, val_data, memspaceIn, memspaceOut); - } - int matrix::Csr::allocateMatrixData(memory::MemorySpace memspace) { index_type nnz_current = nnz_; - destroyMatrixData(memspace); // just in case if (memspace == memory::HOST) { + if (h_row_data_ || h_col_data_ || h_val_data_) + { + out::error() << "Trying to allocate CSR matrix host data, but matrix host data has already been allocated!\n"; + return 1; + } this->h_row_data_ = new index_type[n_ + 1]; std::fill(h_row_data_, h_row_data_ + n_ + 1, 0); this->h_col_data_ = new index_type[nnz_current]; @@ -344,6 +317,11 @@ namespace ReSolve if (memspace == memory::DEVICE) { + if (d_row_data_ || d_col_data_ || d_val_data_) + { + out::error() << "Trying to allocate CSR matrix host data, but matrix host data has already been allocated!\n"; + return 1; + } mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); mem_.allocateArrayOnDevice(&d_col_data_, nnz_current); mem_.allocateArrayOnDevice(&d_val_data_, nnz_current); @@ -387,17 +365,6 @@ namespace ReSolve << "See Csr::syncData documentation\n."; assert(d_data_updated_); } - if ((h_row_data_ == nullptr) && (h_col_data_ == nullptr)) - { - h_row_data_ = new index_type[n_ + 1]; - h_col_data_ = new index_type[nnz_]; - owns_cpu_sparsity_pattern_ = true; - } - if (h_val_data_ == nullptr) - { - h_val_data_ = new real_type[nnz_]; - owns_cpu_values_ = true; - } mem_.copyArrayDeviceToHost(h_row_data_, d_row_data_, n_ + 1); mem_.copyArrayDeviceToHost(h_col_data_, d_col_data_, nnz_); mem_.copyArrayDeviceToHost(h_val_data_, d_val_data_, nnz_); @@ -418,17 +385,6 @@ namespace ReSolve << "See Csr::syncData documentation\n."; assert(h_data_updated_); } - if ((d_row_data_ == nullptr) && (d_col_data_ == nullptr)) - { - mem_.allocateArrayOnDevice(&d_row_data_, n_ + 1); - mem_.allocateArrayOnDevice(&d_col_data_, nnz_); - owns_gpu_sparsity_pattern_ = true; - } - if (d_val_data_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_val_data_, nnz_); - owns_gpu_values_ = true; - } mem_.copyArrayHostToDevice(d_row_data_, h_row_data_, n_ + 1); mem_.copyArrayHostToDevice(d_col_data_, h_col_data_, nnz_); mem_.copyArrayHostToDevice(d_val_data_, h_val_data_, nnz_); diff --git a/resolve/matrix/Csr.hpp b/resolve/matrix/Csr.hpp index d9b3bb9c..6b7c79bb 100644 --- a/resolve/matrix/Csr.hpp +++ b/resolve/matrix/Csr.hpp @@ -44,12 +44,6 @@ namespace ReSolve const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut); - virtual int copyFromExternal(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, - memory::MemorySpace memspaceIn, - memory::MemorySpace memspaceOut); virtual int allocateMatrixData(memory::MemorySpace memspace); diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index c6a9f899..92890dc4 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -233,7 +233,6 @@ namespace ReSolve { index_type error_sum = 0; - A_csr->allocateMatrixData(memory::DEVICE); index_type m = A_csc->getNumColumns(); index_type n = A_csc->getNumRows(); index_type nnz = A_csc->getNnz(); diff --git a/resolve/matrix/MatrixHandlerHip.cpp b/resolve/matrix/MatrixHandlerHip.cpp index ccbb2693..eabe1481 100644 --- a/resolve/matrix/MatrixHandlerHip.cpp +++ b/resolve/matrix/MatrixHandlerHip.cpp @@ -221,7 +221,6 @@ namespace ReSolve rocsparse_status status; - A_csr->allocateMatrixData(memory::DEVICE); index_type m = A_csc->getNumColumns(); index_type n = A_csc->getNumRows(); index_type nnz = A_csc->getNnz(); diff --git a/resolve/matrix/Sparse.cpp b/resolve/matrix/Sparse.cpp index 1863bf23..8686fab3 100644 --- a/resolve/matrix/Sparse.cpp +++ b/resolve/matrix/Sparse.cpp @@ -244,6 +244,27 @@ namespace ReSolve return 0; } + /** + * @brief If memspace is HOST, allocate on HOST. If it is DEVICE, alloate + * on both HOST and DEVICE. + * + * @param[in] memspace - Memory space of the data to be allocated + * + */ + int matrix::Sparse::allocateAll(memory::MemorySpace memspace) + { + using namespace ReSolve::memory; + switch (memspace) + { + case HOST: + return this->allocateMatrixData(memory::HOST); + case DEVICE: + return this->allocateMatrixData(memory::HOST) | this->allocateMatrixData(memory::DEVICE); + default: + return -1; + } + } + /** * @brief Allocate the values array and set pointers to external arrays * containing row and column data. diff --git a/resolve/matrix/Sparse.hpp b/resolve/matrix/Sparse.hpp index cb829632..76fadf0e 100644 --- a/resolve/matrix/Sparse.hpp +++ b/resolve/matrix/Sparse.hpp @@ -72,14 +72,9 @@ namespace ReSolve const real_type* val_data, memory::MemorySpace memspaceIn, memory::MemorySpace memspaceOut) = 0; - virtual int copyFromExternal(const index_type* row_data, - const index_type* col_data, - const real_type* val_data, - index_type new_nnz, - memory::MemorySpace memspaceIn, - memory::MemorySpace memspaceOut) = 0; virtual int allocateMatrixData(memory::MemorySpace memspace) = 0; + virtual int allocateAll(memory::MemorySpace memspace); virtual int allocateWithExternalSparsityPattern(index_type* src_row_data, index_type* src_col_data, index_type new_nnz, diff --git a/resolve/vector/Vector.cpp b/resolve/vector/Vector.cpp index 2b115cbe..20d81622 100644 --- a/resolve/vector/Vector.cpp +++ b/resolve/vector/Vector.cpp @@ -95,6 +95,68 @@ namespace ReSolve return k_; } + /** + * @brief Get whether the data of a vector is updated in the given memory space. + * + * @param[in] memspace - Memory space (HOST or DEVICE) + * + * @return Whether data in memspace is updated. + */ + bool Vector::isUpdated(memory::MemorySpace memspace) const + { + switch (memspace) + { + case ReSolve::memory::HOST: + return cpu_updated_[0]; + case ReSolve::memory::DEVICE: + return gpu_updated_[0]; + default: + return false; + } + } + + /** + * @brief Get whether the data of a specific vector in a multivector is updated + * in the given memory space. + * + * @param[in] memspace - Memory space (HOST or DEVICE) + * @param[in] j - Index of vector in multivector to check. + * + * @return Whether data in memspace is updated. + */ + bool Vector::isUpdated(index_type j, memory::MemorySpace memspace) const + { + switch (memspace) + { + case ReSolve::memory::HOST: + return cpu_updated_[j]; + case ReSolve::memory::DEVICE: + return gpu_updated_[j]; + default: + return false; + } + } + + /** + * @brief Get whether the data of a vector is allocated in the given memory space. + * + * @param[in] memspace - Memory space (HOST or DEVICE) + * + * @return Whether data in memspace is allocated. + */ + bool Vector::isAllocated(memory::MemorySpace memspace) const + { + switch (memspace) + { + case ReSolve::memory::HOST: + return h_data_ != nullptr; + case ReSolve::memory::DEVICE: + return d_data_ != nullptr; + default: + return false; + } + } + /** * @brief Set the vector data pointer (HOST or DEVICE) to an external data. * @@ -220,8 +282,6 @@ namespace ReSolve /** * @brief Copy vector data from input array. * - * This function allocates (if necessary) and copies the data. - * * @param[in] data - Data that is to be copied * @param[in] memspaceIn - Memory space of the incoming data (HOST or DEVICE) * @param[in] memspaceOut - Memory space the data will be copied to (HOST or DEVICE) @@ -250,15 +310,13 @@ namespace ReSolve if ((memspaceOut == memory::HOST) && (h_data_ == nullptr)) { - // allocate first - h_data_ = new real_type[n_capacity_ * k_]; - owns_cpu_data_ = true; + out::error() << "Trying to copy from external vector, but destination (host) is not allocated!\n"; + return -1; } - if ((memspaceOut == memory::DEVICE) && (d_data_ == nullptr)) + else if ((memspaceOut == memory::DEVICE) && (d_data_ == nullptr)) { - // allocate first - mem_.allocateArrayOnDevice(&d_data_, n_capacity_ * k_); - owns_gpu_data_ = true; + out::error() << "Trying to copy from external vector, but destination (device) is not allocated!\n"; + return -1; } switch (control) @@ -296,74 +354,15 @@ namespace ReSolve * * @return pointer to the vector data (HOST or DEVICE). In case of multivectors, * vectors are stored column-wise. - * - * @note This function gives you access to the pointer, not to a copy. - * If you change the values using the pointer, the vector values will - * change too. Make sure to use setDataUpdated function to set the update - * flags correctly after changing the values. */ real_type* Vector::getData(memory::MemorySpace memspace) { - using memory::DEVICE; - using memory::HOST; - - switch (memspace) - { - case HOST: - if ((cpu_updated_[0] == false) && (gpu_updated_[0] == true)) - { - syncData(memspace); - } - return h_data_; - case DEVICE: - if ((gpu_updated_[0] == false) && (cpu_updated_[0] == true)) - { - syncData(memspace); - } - return d_data_; - default: - return nullptr; - } - } - - /** - * @brief get a pointer to HOST or DEVICE vector data. - * - * @param[in] memspace - Memory space of the pointer (HOST or DEVICE) - * - * @return pointer to the vector data (HOST or DEVICE). In case of multivectors, - * vectors are stored column-wise. - */ - const real_type* Vector::getData(memory::MemorySpace memspace) const - { - using memory::DEVICE; - using memory::HOST; - - switch (memspace) - { - case HOST: - if (cpu_updated_[0] == false) - { - out::error() << "Trying to get data on the host, but host data is out of date!\n" - << "Use syncData function to sync host data with the device data!\n"; - return nullptr; - } - return h_data_; - case DEVICE: - if (gpu_updated_[0] == false) - { - out::error() << "Trying to get data on the device, but device data is out of date!\n" - << "Use syncData function to sync device data with the host data!\n"; - return nullptr; - } - return d_data_; - default: - return nullptr; - } + return getData(0, memspace); } /** - * @brief get a pointer to HOST or DEVICE data of a particular vector in a multivector. + * @brief get a pointer to HOST or DEVICE data of a particular + * vector in a multivector. * * @param[in] j - Index of a vector in multivector * @param[in] memspace - Memory space of the pointer (HOST or DEVICE) @@ -372,10 +371,6 @@ namespace ReSolve * * @pre `j` < `k_` i.e, `j` is smaller than the total number of vectors in multivector. * - * @note This function gives you access to the pointer, not to a copy. - * If you change the values using the pointer, the vector values will - * change too. Make sure to use setDataUpdated function to set the update - * flags correctly after changing the values. */ real_type* Vector::getData(index_type j, memory::MemorySpace memspace) { @@ -392,22 +387,26 @@ namespace ReSolve switch (memspace) { case HOST: - if ((cpu_updated_[j] == false) && (gpu_updated_[j] == true)) - { - syncData(j, memspace); - } return &h_data_[j * n_size_]; case DEVICE: - if ((gpu_updated_[j] == false) && (cpu_updated_[j] == true)) - { - syncData(j, memspace); - } return &d_data_[j * n_size_]; default: return nullptr; } } + /** + * @brief get a const pointer to HOST or DEVICE vector data. + * + * @param[in] memspace - Memory space of the pointer (HOST or DEVICE) + * + * @return pointer to the vector data (HOST or DEVICE). + */ + const real_type* Vector::getData(memory::MemorySpace memspace) const + { + return getData(0, memspace); + } + /** * @brief get a const pointer to HOST or DEVICE data of a particular * vector in a multivector. @@ -629,35 +628,72 @@ namespace ReSolve switch (memspace) { case HOST: - delete[] h_data_; - h_data_ = new real_type[n_capacity_ * k_]; - owns_cpu_data_ = true; - if (gpu_updated_[0]) + if (h_data_) { - cpu_updated_[0] = false; + out::error() << "Trying to allocate vector host data, but vector host data has already been allocated!\n"; + return 1; } - else + h_data_ = new real_type[n_capacity_ * k_]; + owns_cpu_data_ = true; + // Set updated flags for each vector in multivector + for (index_type j = 0; j < k_; j++) { - cpu_updated_[0] = true; + if (gpu_updated_[j]) + { + cpu_updated_[j] = false; + } + else + { + cpu_updated_[j] = true; + } } break; case DEVICE: - mem_.deleteOnDevice(d_data_); - mem_.allocateArrayOnDevice(&d_data_, n_capacity_ * k_); - owns_gpu_data_ = true; - if (cpu_updated_[0]) + if (d_data_) { - gpu_updated_[0] = false; + out::error() << "Trying to allocate vector device data, but vector device data has already been allocated!\n"; + return 1; } - else + mem_.allocateArrayOnDevice(&d_data_, n_capacity_ * k_); + owns_gpu_data_ = true; + // Set updated flags for each vector in multivector + for (index_type j = 0; j < k_; j++) { - gpu_updated_[0] = true; + if (cpu_updated_[j]) + { + gpu_updated_[j] = false; + } + else + { + gpu_updated_[j] = true; + } } break; } return 0; } + /** + * @brief If memspace is HOST, allocate on HOST. If it is DEVICE, alloate + * on both HOST and DEVICE. + * + * @param[in] memspace - Memory space of the data to be allocated + * + */ + int Vector::allocateAll(memory::MemorySpace memspace) + { + using namespace ReSolve::memory; + switch (memspace) + { + case HOST: + return allocate(memory::HOST); + case DEVICE: + return allocate(memory::HOST) | allocate(memory::DEVICE); + default: + return -1; + } + } + /** * @brief set vector data to zero. In case of multivectors, entire multivector is set to zero. * @@ -670,21 +706,11 @@ namespace ReSolve switch (memspace) { case HOST: - if (h_data_ == nullptr) - { - h_data_ = new real_type[n_capacity_ * k_]; - owns_cpu_data_ = true; - } mem_.setZeroArrayOnHost(h_data_, n_capacity_ * k_); setHostUpdated(true); setDeviceUpdated(false); break; case DEVICE: - if (d_data_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_data_, n_capacity_ * k_); - owns_gpu_data_ = true; - } mem_.setZeroArrayOnDevice(d_data_, n_capacity_ * k_); setHostUpdated(false); setDeviceUpdated(true); @@ -707,21 +733,11 @@ namespace ReSolve switch (memspace) { case HOST: - if (h_data_ == nullptr) - { - h_data_ = new real_type[n_capacity_ * k_]; - owns_cpu_data_ = true; - } mem_.setZeroArrayOnHost(&h_data_[j * n_size_], n_size_); cpu_updated_[j] = true; gpu_updated_[j] = false; break; case DEVICE: - if (d_data_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_data_, n_capacity_ * k_); - owns_gpu_data_ = true; - } // TODO: We should not need to access raw data in this class mem_.setZeroArrayOnDevice(&d_data_[j * n_size_], n_size_); cpu_updated_[j] = false; @@ -746,21 +762,11 @@ namespace ReSolve switch (memspace) { case HOST: - if (h_data_ == nullptr) - { - h_data_ = new real_type[n_capacity_ * k_]; - owns_cpu_data_ = true; - } mem_.setArrayToConstOnHost(h_data_, C, n_size_ * k_); setHostUpdated(true); setDeviceUpdated(false); break; case DEVICE: - if (d_data_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_data_, n_capacity_ * k_); - owns_gpu_data_ = true; - } mem_.setArrayToConstOnDevice(d_data_, C, n_size_ * k_); setHostUpdated(false); setDeviceUpdated(true); @@ -784,21 +790,11 @@ namespace ReSolve switch (memspace) { case HOST: - if (h_data_ == nullptr) - { - h_data_ = new real_type[n_capacity_ * k_]; - owns_cpu_data_ = true; - } mem_.setArrayToConstOnHost(&h_data_[n_size_ * j], C, n_size_); cpu_updated_[j] = true; gpu_updated_[j] = false; break; case DEVICE: - if (d_data_ == nullptr) - { - mem_.allocateArrayOnDevice(&d_data_, n_capacity_ * k_); - owns_gpu_data_ = true; - } mem_.setArrayToConstOnDevice(&d_data_[n_size_ * j], C, n_size_); cpu_updated_[j] = false; gpu_updated_[j] = true; diff --git a/resolve/vector/Vector.hpp b/resolve/vector/Vector.hpp index 451c9177..c539e81d 100644 --- a/resolve/vector/Vector.hpp +++ b/resolve/vector/Vector.hpp @@ -45,11 +45,15 @@ namespace ReSolve index_type getCapacity() const; index_type getSize() const; index_type getNumVectors() const; + bool isUpdated(memory::MemorySpace memspace) const; + bool isUpdated(index_type j, memory::MemorySpace memspace) const; + bool isAllocated(memory::MemorySpace memspace) const; int setDataUpdated(memory::MemorySpace memspace); int setDataUpdated(index_type j, memory::MemorySpace memspace); int setData(real_type* data, memory::MemorySpace memspace); int allocate(memory::MemorySpace memspace); + int allocateAll(memory::MemorySpace memspace); int setToZero(memory::MemorySpace memspace); int setToZero(index_type i, memory::MemorySpace memspace); int setToConst(real_type C, memory::MemorySpace memspace); diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 3c2dc041..b626c031 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -419,9 +419,9 @@ namespace ReSolve { assert(x->getSize() == y->getSize() && "Vectors must be the same size."); assert(x->getSize() == out->getSize() && "Vectors must be the same size."); - assert(x->getData(memspace) != nullptr && "Vector x data is null!"); - assert(y->getData(memspace) != nullptr && "Vector y data is null!"); - assert(out->getData(memspace) != nullptr && "Vector out data is null!"); + assert(x->isAllocated(memspace) && "Vector x data is null!"); + assert(y->isAllocated(memspace) && "Vector y data is null!"); + assert(out->isAllocated(memspace) && "Vector out data is null!"); using namespace ReSolve::memory; switch (memspace) { diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index ff1819f1..9d6098cc 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -108,14 +108,13 @@ namespace ReSolve workspace_->setNormBuffer(buffer); workspace_->setNormBufferState(true); } - real_type norm{0.0}; - // TODO: Shouldn't the return type be cusolverStatus_t ? - int status = cusolverSpDnrminf(workspace_->getCusolverSpHandle(), - x->getSize(), - x->getData(memory::DEVICE), - &norm, - workspace_->getNormBuffer() /* at least 8192 bytes */); - if (status) + real_type norm{0.0}; + cusolverStatus_t status = cusolverSpDnrminf(workspace_->getCusolverSpHandle(), + x->getSize(), + x->getData(memory::DEVICE), + &norm, + workspace_->getNormBuffer() /* at least 8192 bytes */); + if (status != CUSOLVER_STATUS_SUCCESS) { out::error() << "cusolverSpDnrminf failed with error code " << status << "\n"; } diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.cpp b/resolve/workspace/LinAlgWorkspaceCUDA.cpp index 1c6facee..98bb934a 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.cpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.cpp @@ -1,7 +1,12 @@ +#include + +#include #include namespace ReSolve { + using out = io::Logger; + LinAlgWorkspaceCUDA::LinAlgWorkspaceCUDA() { handle_cusolversp_ = nullptr; @@ -88,15 +93,16 @@ namespace ReSolve return transpose_workspace_; } - void LinAlgWorkspaceCUDA::setTransposeBufferWorkspace(size_t bufferSize) + int LinAlgWorkspaceCUDA::setTransposeBufferWorkspace(size_t bufferSize) { if (transpose_workspace_ready_) { - mem_.deleteOnDevice(transpose_workspace_); + out::error() << "Transpose workspace already set!\n"; + return 1; } mem_.allocateBufferOnDevice(&transpose_workspace_, bufferSize); transpose_workspace_ready_ = true; - return; + return 0; } bool LinAlgWorkspaceCUDA::isTransposeBufferAllocated() diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.hpp b/resolve/workspace/LinAlgWorkspaceCUDA.hpp index 9c7b555b..512f173b 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.hpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.hpp @@ -20,7 +20,7 @@ namespace ReSolve void* getSpmvBuffer(); void* getNormBuffer(); void* getTransposeBufferWorkspace(); - void setTransposeBufferWorkspace(size_t bufferSize); + int setTransposeBufferWorkspace(size_t bufferSize); bool isTransposeBufferAllocated(); void setSpmvBuffer(void* buffer); void setNormBuffer(void* buffer); diff --git a/resolve/workspace/LinAlgWorkspaceHIP.cpp b/resolve/workspace/LinAlgWorkspaceHIP.cpp index 239dcb30..4ef86137 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.cpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.cpp @@ -1,7 +1,12 @@ +#include + +#include #include namespace ReSolve { + using out = io::Logger; + LinAlgWorkspaceHIP::LinAlgWorkspaceHIP() { handle_rocsparse_ = nullptr; @@ -186,14 +191,16 @@ namespace ReSolve return transpose_workspace_; } - void LinAlgWorkspaceHIP::setTransposeBufferWorkspace(size_t bufferSize) + int LinAlgWorkspaceHIP::setTransposeBufferWorkspace(size_t bufferSize) { if (transpose_workspace_ready_) { - mem_.deleteOnDevice(transpose_workspace_); + out::error() << "Transpose workspace already set!\n"; + return 1; } mem_.allocateBufferOnDevice(&transpose_workspace_, bufferSize); transpose_workspace_ready_ = true; + return 0; } bool LinAlgWorkspaceHIP::isTransposeBufferAllocated() diff --git a/resolve/workspace/LinAlgWorkspaceHIP.hpp b/resolve/workspace/LinAlgWorkspaceHIP.hpp index 91171257..db081520 100644 --- a/resolve/workspace/LinAlgWorkspaceHIP.hpp +++ b/resolve/workspace/LinAlgWorkspaceHIP.hpp @@ -25,7 +25,7 @@ namespace ReSolve real_type* getDr(); real_type* getNormBuffer(); void* getTransposeBufferWorkspace(); - void setTransposeBufferWorkspace(size_t bufferSize); + int setTransposeBufferWorkspace(size_t bufferSize); bool getNormBufferState(); bool isTransposeBufferAllocated(); diff --git a/tests/functionality/TestHelper.hpp b/tests/functionality/TestHelper.hpp index 0b7bf6e9..81aa9300 100644 --- a/tests/functionality/TestHelper.hpp +++ b/tests/functionality/TestHelper.hpp @@ -134,7 +134,8 @@ class TestHelper /** * @brief Set the new linear system together with its computed solution - * and compute solution error and residual norms. + * and compute solution error and residual norms. All input matrix and + * vector data arrays must be updated on the current memory space. * * This will set the new system A*x = r and compute related error norms. * @@ -147,10 +148,11 @@ class TestHelper ReSolve::vector::Vector* x) { assert((res_ == nullptr) && (x_true_ == nullptr)); - A_ = A; - r_ = r; - x_ = x; - res_ = new ReSolve::vector::Vector(A->getNumRows()); + A_ = A; + r_ = r; + x_ = x; + res_ = new ReSolve::vector::Vector(A->getNumRows()); + res_->allocateAll(memspace_); x_true_ = new ReSolve::vector::Vector(A->getNumRows()); setSolutionVector(); computeNorms(); @@ -160,7 +162,8 @@ class TestHelper * @brief Set the new linear system together with its computed solution * and compute solution error and residual norms. * - * This is to be used after values in A and r are updated. + * This is to be used after values in A and r are updated and synced on + * the host. * * @todo This method probably does not need any input parameters. * diff --git a/tests/functionality/testKlu.cpp b/tests/functionality/testKlu.cpp index 4df39afd..9c19e0bb 100644 --- a/tests/functionality/testKlu.cpp +++ b/tests/functionality/testKlu.cpp @@ -122,6 +122,7 @@ int runTest(int argc, char* argv[], std::string& solver_name) } real_type* rhs = ReSolve::io::createArrayFromFile(rhs1_file); vector_type vec_rhs(A->getNumRows()); + vec_rhs.allocate(ReSolve::memory::HOST); vec_rhs.copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); rhs1_file.close(); diff --git a/tests/functionality/testLUSOL.cpp b/tests/functionality/testLUSOL.cpp index 3ab65980..280546bb 100644 --- a/tests/functionality/testLUSOL.cpp +++ b/tests/functionality/testLUSOL.cpp @@ -77,6 +77,7 @@ int main(int argc, char* argv[]) vector_type vec_x(A->getNumRows()); vector_type vec_r(A->getNumRows()); + vec_rhs.allocate(ReSolve::memory::HOST); vec_rhs.copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs.setDataUpdated(ReSolve::memory::HOST); vec_x.allocate(ReSolve::memory::HOST); @@ -106,12 +107,16 @@ int main(int argc, char* argv[]) real_type* x_data = new real_type[A->getNumRows()]; std::fill_n(x_data, A->getNumRows(), 1.0); + vec_test.allocate(ReSolve::memory::HOST); vec_test.copyFromExternal(x_data, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_r.allocate(ReSolve::memory::HOST); vec_r.copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_diff.allocate(ReSolve::memory::HOST); vec_diff.copyFromExternal(x_data, ReSolve::memory::HOST, ReSolve::memory::HOST); // Matrix-vector product does not support COO format so we need to convert to CSR ReSolve::matrix::Csr A_csr(A->getNumRows(), A->getNumColumns(), A->getNnz(), A->symmetric(), A->expanded()); + A_csr.allocateMatrixData(ReSolve::memory::HOST); error_sum += coo2csr(A.get(), &A_csr, ReSolve::memory::HOST); // Tell matrix handler this is a new matrix @@ -186,7 +191,6 @@ int main(int argc, char* argv[]) vec_rhs.copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs.setDataUpdated(ReSolve::memory::HOST); - vec_x.allocate(ReSolve::memory::HOST); error_sum += lusol.setup(A.get()); error_sum += lusol.analyze(); diff --git a/tests/functionality/testRandGmres.cpp b/tests/functionality/testRandGmres.cpp index e667bfa6..be9d9d45 100644 --- a/tests/functionality/testRandGmres.cpp +++ b/tests/functionality/testRandGmres.cpp @@ -143,6 +143,10 @@ int runTest(int argc, char* argv[]) error_sum += status; // Compute error norms for the system + if (memspace == ReSolve::memory::DEVICE) + { + vec_x.syncData(ReSolve::memory::HOST); + } helper.setSystem(A, vec_rhs, &vec_x); // Print result summary and check solution @@ -246,6 +250,10 @@ ReSolve::matrix::Csr* generateMatrix(const index_type n, ReSolve::memory::Memory // Allocate CSR matrix structure ReSolve::matrix::Csr* matrix = new ReSolve::matrix::Csr(n, n, static_cast(total_nonzeros)); matrix->allocateMatrixData(ReSolve::memory::HOST); + if (memory_space == ReSolve::memory::DEVICE) + { + matrix->allocateMatrixData(ReSolve::memory::DEVICE); + } // Get pointers to CSR data structures index_type* row_offsets = matrix->getRowData(ReSolve::memory::HOST); diff --git a/tests/functionality/testRefactor.cpp b/tests/functionality/testRefactor.cpp index 7d72a401..1d594044 100644 --- a/tests/functionality/testRefactor.cpp +++ b/tests/functionality/testRefactor.cpp @@ -135,6 +135,7 @@ int runTest(int argc, char* argv[], std::string& solver_name) return -1; } ReSolve::matrix::Csr* A = ReSolve::io::createCsrFromFile(mat1); + A->allocateMatrixData(ReSolve::memory::DEVICE); A->syncData(ReSolve::memory::DEVICE); mat1.close(); @@ -147,7 +148,9 @@ int runTest(int argc, char* argv[], std::string& solver_name) } real_type* rhs = ReSolve::io::createArrayFromFile(rhs1_file); vector_type vec_rhs(A->getNumRows()); + vec_rhs.allocate(ReSolve::memory::HOST); vec_rhs.copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs.allocate(ReSolve::memory::DEVICE); vec_rhs.syncData(ReSolve::memory::DEVICE); rhs1_file.close(); @@ -171,15 +174,18 @@ int runTest(int argc, char* argv[], std::string& solver_name) matrix_type* U = KLU.getUFactor(); index_type* P = KLU.getPOrdering(); index_type* Q = KLU.getQOrdering(); + L->allocateMatrixData(ReSolve::memory::DEVICE); + U->allocateMatrixData(ReSolve::memory::DEVICE); status = Rf.setup(A, L, U, P, Q, &vec_rhs); error_sum += status; - // Refactorize (on device where available) + // Refactorize status = Rf.refactorize(); error_sum += status; - // Solve system (on device where available) + // Solve system + vec_x.syncData(ReSolve::memory::DEVICE); status = Rf.solve(&vec_rhs, &vec_x); error_sum += status; @@ -200,6 +206,7 @@ int runTest(int argc, char* argv[], std::string& solver_name) } // Compute error norms for the system + vec_x.syncData(ReSolve::memory::HOST); helper.setSystem(A, &vec_rhs, &vec_x); // Print result summary and check solution @@ -231,6 +238,7 @@ int runTest(int argc, char* argv[], std::string& solver_name) } ReSolve::io::updateArrayFromFile(rhs2_file, &rhs); rhs2_file.close(); + vec_rhs.allocate(ReSolve::memory::DEVICE); vec_rhs.copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); // Refactorize second matrix @@ -255,6 +263,11 @@ int runTest(int argc, char* argv[], std::string& solver_name) } // Recompute error norms for the second system and print summary + vec_rhs.syncData(ReSolve::memory::HOST); + if (!vec_x.isUpdated(ReSolve::memory::HOST)) + { + vec_x.syncData(ReSolve::memory::HOST); + } helper.resetSystem(A, &vec_rhs, &vec_x); std::cout << "\nResults (second matrix): \n\n"; diff --git a/tests/functionality/testSysGLU.cpp b/tests/functionality/testSysGLU.cpp index 1fbb9aa6..5e877e60 100644 --- a/tests/functionality/testSysGLU.cpp +++ b/tests/functionality/testSysGLU.cpp @@ -89,6 +89,7 @@ int main(int argc, char* argv[]) return -1; } ReSolve::matrix::Csr* A = ReSolve::io::createCsrFromFile(mat1); + A->allocateMatrixData(ReSolve::memory::DEVICE); A->syncData(ReSolve::memory::DEVICE); mat1.close(); @@ -106,10 +107,8 @@ int main(int argc, char* argv[]) vector_type* vec_r = new vector_type(A->getNumRows()); rhs1_file.close(); - vec_x->allocate(ReSolve::memory::HOST); // for KLU - vec_x->allocate(ReSolve::memory::DEVICE); - // Set RHS vector on CPU + vec_rhs->allocate(ReSolve::memory::HOST); vec_rhs->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); vec_rhs->setDataUpdated(ReSolve::memory::HOST); @@ -130,12 +129,15 @@ int main(int argc, char* argv[]) std::cout << "GLU setup status: " << status << std::endl; // Move rhs vector data to GPU + vec_rhs->allocate(ReSolve::memory::DEVICE); vec_rhs->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vec_x->allocate(ReSolve::memory::DEVICE); status = solver.solve(vec_rhs, vec_x); error_sum += status; std::cout << "GLU solve status: " << status << std::endl; // Compute residual on device + vec_r->allocate(ReSolve::memory::DEVICE); vec_r->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); matrix_handler.setValuesChanged(true, ReSolve::memory::DEVICE); status = matrix_handler.matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::DEVICE); @@ -180,10 +182,13 @@ int main(int argc, char* argv[]) { x_data_ref[i] = 1.0; } + vec_test->allocate(ReSolve::memory::HOST); vec_test->copyFromExternal(x_data_ref, ReSolve::memory::HOST, ReSolve::memory::HOST); - vec_test->copyFromExternal(x_data_ref, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + vec_test->allocate(ReSolve::memory::DEVICE); + vec_test->syncData(ReSolve::memory::DEVICE); // compute ||x_diff|| = ||x - x_true|| norm + vec_diff->allocate(ReSolve::memory::DEVICE); vec_diff->copyFromExternal(x_data_ref, ReSolve::memory::HOST, ReSolve::memory::DEVICE); vector_handler.axpy(MINUS_ONE, vec_x, vec_diff, ReSolve::memory::DEVICE); real_type normDiffMatrix1 = sqrt(vector_handler.dot(vec_diff, vec_diff, ReSolve::memory::DEVICE)); @@ -195,13 +200,16 @@ int main(int argc, char* argv[]) real_type exactSol_normRmatrix1 = sqrt(vector_handler.dot(vec_r, vec_r, ReSolve::memory::DEVICE)); // Compute residual norm ON THE CPU using COMPUTED solution + vec_x->allocate(ReSolve::memory::HOST); vec_x->copyFromExternal(vec_x->getData(ReSolve::memory::DEVICE), ReSolve::memory::DEVICE, ReSolve::memory::HOST); + vec_r->allocate(ReSolve::memory::HOST); vec_r->copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); status = matrix_handler.matvec(A, vec_x, vec_r, &ONE, &MINUS_ONE, ReSolve::memory::HOST); error_sum += status; real_type normRmatrix1CPU = sqrt(vector_handler.dot(vec_r, vec_r, ReSolve::memory::HOST)); // Verify relative residual norm computation in SystemSolver + vec_x->syncData(ReSolve::memory::DEVICE); real_type rel_residual_norm = solver.getResidualNorm(vec_rhs, vec_x); error = std::abs(normB1 * rel_residual_norm - normRmatrix1) / normRmatrix1; if (error > 10.0 * std::numeric_limits::epsilon()) diff --git a/tests/functionality/testSysGmres.cpp b/tests/functionality/testSysGmres.cpp index 7311f310..c7790bd8 100644 --- a/tests/functionality/testSysGmres.cpp +++ b/tests/functionality/testSysGmres.cpp @@ -142,10 +142,9 @@ int test(int argc, char* argv[]) // Create solution vector vector_type vec_x(A->getNumRows()); - vec_x.allocate(ReSolve::memory::HOST); // Set the initial guess to 0 - vec_x.allocate(memspace); + vec_x.allocateAll(memspace); vec_x.setToZero(memspace); // Set solver options @@ -186,8 +185,7 @@ int test(int argc, char* argv[]) // Create a large initial guess with a larger residual than the zero vector. vector_type bad_guess_x(A->getNumRows()); - bad_guess_x.allocate(ReSolve::memory::HOST); - bad_guess_x.allocate(memspace); + bad_guess_x.allocateAll(memspace); bad_guess_x.setToConst(1.0e6, memspace); ReSolve::SystemSolver bad_guess_solver(&workspace, "none", "none", method, "ilu0", "none"); @@ -238,6 +236,7 @@ int test(int argc, char* argv[]) // Use a scaled converged solution as a nonzero initial guess. vector_type vec_x_guess(vec_x.getSize()); + vec_x_guess.allocate(memspace); vec_x_guess.copyFromExternal(&vec_x, memspace, memspace); vector_handler.scal(0.9, &vec_x_guess, memspace); @@ -282,6 +281,10 @@ int test(int argc, char* argv[]) } // Check results and print summary + if (memspace == ReSolve::memory::DEVICE) + { + vec_x.syncData(ReSolve::memory::HOST); + } helper.setSystem(A, vec_rhs, &vec_x); std::cout << std::defaultfloat @@ -403,8 +406,7 @@ std::string headerInfo(const std::string& method, ReSolve::vector::Vector* generateRhs(const index_type N, ReSolve::memory::MemorySpace memspace) { vector_type* vec_rhs = new vector_type(N); - vec_rhs->allocate(ReSolve::memory::HOST); - vec_rhs->allocate(memspace); + vec_rhs->allocateAll(memspace); real_type* data = vec_rhs->getData(ReSolve::memory::HOST); for (int i = 0; i < N; ++i) @@ -447,7 +449,7 @@ ReSolve::matrix::Csr* generateMatrix(const index_type N, ReSolve::memory::Memory // Allocate NxN CSR matrix with NNZ nonzeros ReSolve::matrix::Csr* A = new ReSolve::matrix::Csr(N, N, NNZ); - A->allocateMatrixData(ReSolve::memory::HOST); + A->allocateAll(memspace); index_type* rowptr = A->getRowData(ReSolve::memory::HOST); index_type* colidx = A->getColData(ReSolve::memory::HOST); diff --git a/tests/functionality/testSysRefactor.cpp b/tests/functionality/testSysRefactor.cpp index dd771afa..86d62905 100644 --- a/tests/functionality/testSysRefactor.cpp +++ b/tests/functionality/testSysRefactor.cpp @@ -157,9 +157,10 @@ static int runTest(int argc, char* argv[], std::string backend) return -1; } ReSolve::matrix::Csr* A = ReSolve::io::createCsrFromFile(mat1, true); - if (memspace != memory::HOST) + if (memspace == memory::DEVICE) { - A->syncData(memspace); + A->allocateMatrixData(memory::DEVICE); + A->syncData(memory::DEVICE); } mat1.close(); @@ -175,15 +176,12 @@ static int runTest(int argc, char* argv[], std::string backend) // Create and set residual vector vector_type vec_rhs = (A->getNumRows()); + vec_rhs.allocate(ReSolve::memory::HOST); vec_rhs.copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); // Create and allocate solution vector vector_type vec_x(A->getNumRows()); - if (memspace != memory::HOST) - { - vec_x.allocate(ReSolve::memory::HOST); // for KLU - } - vec_x.allocate(memspace); + vec_x.allocateAll(memspace); // Add system matrix to the solver status = solver.setMatrix(A); @@ -200,6 +198,11 @@ static int runTest(int argc, char* argv[], std::string backend) error_sum += status; // Compute error norms for the system + if (memspace == ReSolve::memory::DEVICE) + { + vec_x.syncData(ReSolve::memory::DEVICE); + vec_rhs.syncData(ReSolve::memory::DEVICE); + } helper.setSystem(A, &vec_rhs, &vec_x); // Print result summary and check solution @@ -243,6 +246,10 @@ static int runTest(int argc, char* argv[], std::string backend) ReSolve::io::updateArrayFromFile(rhs2_file, &rhs); rhs2_file.close(); + if (memspace == ReSolve::memory::DEVICE) + { + vec_rhs.copyFromExternal(rhs, ReSolve::memory::HOST, ReSolve::memory::DEVICE); + } vec_rhs.copyFromExternal(rhs, ReSolve::memory::HOST, memspace); // Refactorize matrix @@ -254,6 +261,14 @@ static int runTest(int argc, char* argv[], std::string backend) error_sum += status; // Compute error norms for the system + if (memspace == ReSolve::memory::DEVICE) + { + if (!vec_rhs.isUpdated(ReSolve::memory::HOST)) + { + vec_rhs.syncData(ReSolve::memory::HOST); + } + vec_x.syncData(ReSolve::memory::HOST); + } helper.resetSystem(A, &vec_rhs, &vec_x); // Print result summary and check solution diff --git a/tests/unit/hykkt/HykktCholeskyTests.hpp b/tests/unit/hykkt/HykktCholeskyTests.hpp index cf1a9b8a..e04d39ae 100644 --- a/tests/unit/hykkt/HykktCholeskyTests.hpp +++ b/tests/unit/hykkt/HykktCholeskyTests.hpp @@ -53,6 +53,7 @@ namespace ReSolve index_type A_row_data[4] = {0, 3, 6, 9}; index_type A_col_data[9] = {0, 1, 2, 0, 1, 2, 0, 1, 2}; real_type A_values[9] = {4.0, 12.0, -16.0, 12.0, 37.0, -43.0, -16.0, -43.0, 98.0}; + A->allocateAll(memspace_); A->copyFromExternal(A_row_data, A_col_data, A_values, memory::HOST, memory::HOST); if (memspace_ == memory::DEVICE) { @@ -65,9 +66,11 @@ namespace ReSolve solver.setPivotTolerance(1e-12); solver.numericalFactorization(); vector::Vector* x = new vector::Vector(3); - x->allocate(memspace_); + x->allocateAll(memspace_); + x->setDataUpdated(memory::DEVICE); vector::Vector* b = new vector::Vector(3); real_type b_data[3] = {-6.0, -17.25, 30.0}; + b->allocate(memspace_); b->copyFromExternal(b_data, memory::HOST, memspace_); solver.solve(x, b); @@ -110,6 +113,7 @@ namespace ReSolve matrix::Csr* A = new matrix::Csr((index_type) L_times_L_tr->nrow, (index_type) L_times_L_tr->ncol, (index_type) L_times_L_tr->nzmax); + A->allocateAll(memspace_); A->copyFromExternal( static_cast(L_times_L_tr->p), static_cast(L_times_L_tr->i), @@ -185,6 +189,7 @@ namespace ReSolve cholmod_sparse* A_chol = cholmod_ssmult(L, L_tr, 0, 1, 0, &Common); matrix::Csr* A = new matrix::Csr((index_type) A_chol->nrow, (index_type) A_chol->ncol, (index_type) A_chol->nzmax); + A->allocateAll(memspace_); A->copyFromExternal( static_cast(A_chol->p), static_cast(A_chol->i), static_cast(A_chol->x), memory::HOST, memspace_); @@ -314,7 +319,7 @@ namespace ReSolve vector::Vector* randomVector(index_type n) { vector::Vector* v = new vector::Vector(n); - v->allocate(memory::HOST); + v->allocateAll(memspace_); std::uniform_real_distribution distribution(0.0, 1.0); for (index_type i = 0; i < n; ++i) { diff --git a/tests/unit/hykkt/HykktPermutationTests.hpp b/tests/unit/hykkt/HykktPermutationTests.hpp index 6ec9614f..9efc1f3d 100644 --- a/tests/unit/hykkt/HykktPermutationTests.hpp +++ b/tests/unit/hykkt/HykktPermutationTests.hpp @@ -49,6 +49,9 @@ namespace ReSolve matrix::Csr hes(n, n, nnz_hes); matrix::Csr jac(m, n, nnz_jac); matrix::Csr jac_tr(n, m, nnz_jac); + hes.allocateAll(memspace_); + jac.allocateAll(memspace_); + jac_tr.allocateAll(memspace_); getTestData(&hes, &jac, &jac_tr); // correct results diff --git a/tests/unit/hykkt/HykktRuizScalingTests.hpp b/tests/unit/hykkt/HykktRuizScalingTests.hpp index b89e831c..74c1d415 100644 --- a/tests/unit/hykkt/HykktRuizScalingTests.hpp +++ b/tests/unit/hykkt/HykktRuizScalingTests.hpp @@ -54,14 +54,18 @@ namespace ReSolve { matrix::Csr* H = new matrix::Csr(n, n, 3 * n - 2); matrix::Csr* J = new matrix::Csr(n - 1, n, 2 * n - 2); + H->allocateAll(memspace_); + J->allocateAll(memspace_); vector::Vector* rhs_top = new vector::Vector(n); vector::Vector* rhs_bottom = new vector::Vector(n - 1); + rhs_top->allocateAll(memspace_); + rhs_bottom->allocateAll(memspace_); generateMatrixData(H, J, rhs_top, rhs_bottom, n); // Transpose J and store in J_tr matrix::Csr* J_tr = new matrix::Csr(n, n - 1, 2 * n - 2); - J_tr->allocateMatrixData(memspace_); + J_tr->allocateAll(memspace_); matrixHandler_.transpose(J, J_tr, memspace_); // Perform scaling diff --git a/tests/unit/hykkt/HykktSCCGTests.hpp b/tests/unit/hykkt/HykktSCCGTests.hpp index 97074b22..4a3fe533 100644 --- a/tests/unit/hykkt/HykktSCCGTests.hpp +++ b/tests/unit/hykkt/HykktSCCGTests.hpp @@ -63,6 +63,8 @@ namespace ReSolve matrix::Csr* J = io::createCsrFromFile(J_file, false); if (memspace_ == memory::DEVICE) { + H->allocateMatrixData(memory::DEVICE); + J->allocateMatrixData(memory::DEVICE); H->syncData(memory::DEVICE); J->syncData(memory::DEVICE); } @@ -79,15 +81,16 @@ namespace ReSolve sccg.setSolverTolerance(sccg_tol); matrix::Csr* J_tr = new matrix::Csr(m, n, nnz); - J_tr->allocateMatrixData(memspace_); + J_tr->allocateAll(memspace_); matrix_handler_.transpose(J, J_tr, memspace_); vector::Vector* x_0 = new vector::Vector(n); - x_0->allocate(memspace_); + x_0->allocateAll(memspace_); vector::Vector* b = io::createVectorFromFile(b_file); if (memspace_ == memory::DEVICE) { + b->allocate(memory::DEVICE); b->syncData(memory::DEVICE); } diff --git a/tests/unit/hykkt/HykktSolverTests.hpp b/tests/unit/hykkt/HykktSolverTests.hpp index ed2b4f4d..9d2a9d94 100644 --- a/tests/unit/hykkt/HykktSolverTests.hpp +++ b/tests/unit/hykkt/HykktSolverTests.hpp @@ -69,7 +69,6 @@ namespace ReSolve const std::string& r_y_file_name, const std::string& r_yd_file_name, real_type gamma) - { constexpr double tol = 1e-2; @@ -90,6 +89,10 @@ namespace ReSolve matrix::Csr* J_d = io::createCsrFromFile(J_d_file, false); if (memspace_ == memory::DEVICE) { + H->allocateMatrixData(memory::DEVICE); + D_s->allocateMatrixData(memory::DEVICE); + J->allocateMatrixData(memory::DEVICE); + J_d->allocateMatrixData(memory::DEVICE); H->syncData(memory::DEVICE); D_s->syncData(memory::DEVICE); J->syncData(memory::DEVICE); @@ -103,6 +106,10 @@ namespace ReSolve vector::Vector* r_yd = io::createVectorFromFile(r_yd_file); if (memspace_ == memory::DEVICE) { + r_x->allocate(memory::DEVICE); + r_s->allocate(memory::DEVICE); + r_y->allocate(memory::DEVICE); + r_yd->allocate(memory::DEVICE); r_x->syncData(memory::DEVICE); r_s->syncData(memory::DEVICE); r_y->syncData(memory::DEVICE); @@ -114,10 +121,10 @@ namespace ReSolve vector::Vector* s = new vector::Vector(m_d); vector::Vector* y = new vector::Vector(m_c); vector::Vector* y_d = new vector::Vector(m_d); - x->allocate(memspace_); - s->allocate(memspace_); - y->allocate(memspace_); - y_d->allocate(memspace_); + x->allocateAll(memspace_); + s->allocateAll(memspace_); + y->allocateAll(memspace_); + y_d->allocateAll(memspace_); hykkt::HyKKTSolver hykktSolver(n_x, m_d, m_c, memspace_); hykktSolver.setMatrixBlocks(H, D_s, J, J_d); diff --git a/tests/unit/hykkt/HykktSpGEMMTests.hpp b/tests/unit/hykkt/HykktSpGEMMTests.hpp index b34ab928..ecaeb987 100644 --- a/tests/unit/hykkt/HykktSpGEMMTests.hpp +++ b/tests/unit/hykkt/HykktSpGEMMTests.hpp @@ -62,6 +62,7 @@ namespace ReSolve if (memspace_ == memory::DEVICE) { + E->allocateMatrixData(memory::HOST); E->syncData(memory::HOST); } @@ -165,6 +166,10 @@ namespace ReSolve matrix::Csr* B = new matrix::Csr(n, n, nnz); matrix::Csr* D = new matrix::Csr(n, n, n - 2); + A->allocateMatrixData(memspace_); + B->allocateMatrixData(memspace_); + D->allocateMatrixData(memspace_); + A->copyFromExternal(A_row_ptr, A_col_ind, A_values, memory::HOST, memspace_); B->copyFromExternal(B_row_ptr, B_col_ind, B_values, memory::HOST, memspace_); D->copyFromExternal(D_row_ptr, D_col_ind, D_values, memory::HOST, memspace_); @@ -178,6 +183,7 @@ namespace ReSolve if (memspace_ == memory::DEVICE) { + E->allocateMatrixData(memory::HOST); E->syncData(memory::HOST); } @@ -276,8 +282,10 @@ namespace ReSolve if (memspace_ == memory::DEVICE) { + E->allocateMatrixData(memory::HOST); + A->allocateMatrixData(memory::HOST); + D->allocateMatrixData(memory::HOST); E->syncData(memory::HOST); - A->syncData(memory::HOST); D->syncData(memory::HOST); } @@ -341,6 +349,10 @@ namespace ReSolve *B = new matrix::Csr(3, 3, 5); *D = new matrix::Csr(3, 3, 6); + (*A)->allocateMatrixData(memspace_); + (*B)->allocateMatrixData(memspace_); + (*D)->allocateMatrixData(memspace_); + (*A)->copyFromExternal(A_row_ptr, A_col_ind, A_values, memory::HOST, memspace_); (*B)->copyFromExternal(B_row_ptr, B_col_ind, B_values, memory::HOST, memspace_); (*D)->copyFromExternal(D_row_ptr, D_col_ind, D_values, memory::HOST, memspace_); diff --git a/tests/unit/matrix/LUSOLTests.hpp b/tests/unit/matrix/LUSOLTests.hpp index 12c05f10..d03d7b6d 100644 --- a/tests/unit/matrix/LUSOLTests.hpp +++ b/tests/unit/matrix/LUSOLTests.hpp @@ -51,6 +51,7 @@ namespace ReSolve matrix::Coo* A = createMatrix(); vector::Vector rhs(A->getNumRows()); + rhs.allocate(memory::HOST); rhs.setToConst(constants::ONE, memory::HOST); vector::Vector x(A->getNumColumns()); @@ -80,6 +81,7 @@ namespace ReSolve matrix::Coo* A = createMatrix(); vector::Vector rhs(A->getNumRows()); + rhs.allocate(memory::HOST); rhs.setToConst(constants::ONE, memory::HOST); vector::Vector x(A->getNumColumns()); @@ -113,6 +115,7 @@ namespace ReSolve matrix::Coo* A = createMatrix(); vector::Vector rhs(A->getNumRows()); + rhs.allocate(memory::HOST); rhs.setToConst(constants::ONE, memory::HOST); vector::Vector x(A->getNumColumns()); @@ -199,6 +202,7 @@ namespace ReSolve // NOTE: these are hardcoded for now index_type size = static_cast(valsA_.size()); matrix::Coo* A = new matrix::Coo(9, 9, size, true, true); + A->allocateMatrixData(memory::HOST); A->copyFromExternal(rowsA_.data(), colsA_.data(), valsA_.data(), diff --git a/tests/unit/matrix/MatrixFactorizationTests.hpp b/tests/unit/matrix/MatrixFactorizationTests.hpp index d3553920..5911106c 100644 --- a/tests/unit/matrix/MatrixFactorizationTests.hpp +++ b/tests/unit/matrix/MatrixFactorizationTests.hpp @@ -60,6 +60,7 @@ namespace ReSolve ReSolve::matrix::Csr* A = createCsrMatrix(0, "cpu"); ReSolve::vector::Vector rhs(A->getNumRows()); + rhs.allocate(memory::HOST); rhs.setToConst(constants::ONE, memory::HOST); ReSolve::vector::Vector x(A->getNumRows()); @@ -167,6 +168,7 @@ namespace ReSolve if ((memspace == "cuda") || (memspace == "hip")) { + A->allocateMatrixData(memory::DEVICE); A->syncData(memory::DEVICE); } @@ -348,6 +350,7 @@ namespace ReSolve bool status = true; if (memspace != "cpu") { + x.allocate(memory::DEVICE); x.syncData(memory::HOST); } diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index 063e64d7..8e487d29 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -101,7 +101,7 @@ namespace ReSolve matrix::Csc* A_csc = createRectangularCscMatrix(n, m); matrix::Csr* A_csr = new matrix::Csr(m, n, A_csc->getNnz()); - A_csr->allocateMatrixData(memspace_); + A_csr->allocateAll(memspace_); handler_.csc2csr(A_csc, A_csr, memspace_); @@ -131,34 +131,29 @@ namespace ReSolve matrix_size << " for " << n << " x " << m << " matrix"; testname += matrix_size.str(); + matrix::Csr* A = createRectangularCsrMatrix(n, m); matrix::Csr* At = new matrix::Csr(m, n, 2 * std::min(n, m)); - matrix::Csr* A = nullptr; // Declare A outside + At->allocateAll(memspace_); - for (real_type val = 0.0; val <= 1.0; val += 1.0) - { // Use a step to prevent infinite loop - if (val == 0.0) - { - A = createRectangularCsrMatrix(n, m); - At->allocateMatrixData(memspace_); - handler_.transpose(A, At, memspace_); + handler_.transpose(A, At, memspace_); + status *= (At->getNumRows() == A->getNumColumns()); + status *= (At->getNumColumns() == A->getNumRows()); + status *= (At->getNnz() == A->getNnz()); - status *= (At->getNumRows() == A->getNumColumns()); - status *= (At->getNumColumns() == A->getNumRows()); - status *= (At->getNnz() == A->getNnz()); - } - else - { - handler_.addConst(A, val, memspace_); - handler_.transpose(A, At, memspace_); - } + if (memspace_ == memory::DEVICE) + { + At->syncData(memory::HOST); + } + verifyCsrMatrix(At, 0.0); - if (memspace_ == memory::DEVICE) - { - At->syncData(memory::HOST); - } + handler_.addConst(A, 1.0, memspace_); + handler_.transpose(A, At, memspace_); - verifyCsrMatrix(At, val); + if (memspace_ == memory::DEVICE) + { + At->syncData(memory::HOST); } + verifyCsrMatrix(At, 1.0); delete A; // Delete after loop delete At; @@ -308,7 +303,8 @@ namespace ReSolve A->setUpdated(memory::HOST); if (memspace_ == memory::DEVICE) { - A->syncData(memspace_); + A->allocateMatrixData(memory::DEVICE); + A->syncData(memory::DEVICE); } return A; } @@ -395,6 +391,7 @@ namespace ReSolve A->setUpdated(memory::HOST); if (memspace_ == memory::DEVICE) { + A->allocateMatrixData(memspace_); A->syncData(memspace_); } return A; @@ -573,7 +570,8 @@ namespace ReSolve if (memspace_ == memory::DEVICE) { - A->syncData(memspace_); + A->allocateMatrixData(memory::DEVICE); + A->syncData(memory::DEVICE); } return A; diff --git a/tests/unit/preconditioner/PreconditionerLUTests.hpp b/tests/unit/preconditioner/PreconditionerLUTests.hpp index 7f415e81..bd6cc7a8 100644 --- a/tests/unit/preconditioner/PreconditionerLUTests.hpp +++ b/tests/unit/preconditioner/PreconditionerLUTests.hpp @@ -55,6 +55,7 @@ namespace ReSolve index_type col_data[3] = {0, 1, 2}; real_type val_data[3] = {4.0, 5.0, 6.0}; + A->allocateAll(memspace_); A->copyFromExternal(row_data, col_data, val_data, memory::HOST, memory::HOST); if (memspace_ == memory::DEVICE) @@ -67,10 +68,11 @@ namespace ReSolve real_type rhs_data[3] = {4.0, 10.0, 18.0}; vector::Vector* rhs = new vector::Vector(n); + rhs->allocate(memspace_); rhs->copyFromExternal(rhs_data, memory::HOST, memspace_); vector::Vector* x = new vector::Vector(n); - x->allocate(memspace_); + x->allocateAll(memspace_); precond.apply(rhs, x); diff --git a/tests/unit/preconditioner/PreconditionerUserMatrixTests.hpp b/tests/unit/preconditioner/PreconditionerUserMatrixTests.hpp index e79267a6..fa207142 100644 --- a/tests/unit/preconditioner/PreconditionerUserMatrixTests.hpp +++ b/tests/unit/preconditioner/PreconditionerUserMatrixTests.hpp @@ -113,17 +113,16 @@ namespace ReSolve index_type col_data[3] = {0, 1, 2}; real_type val_data[3] = {2.0, 3.0, 4.0}; + B->allocateAll(memspace_); B->copyFromExternal(row_data, col_data, val_data, memory::HOST, memory::HOST); if (memspace_ == memory::DEVICE) { - B->allocateMatrixData(memspace_); B->syncData(memspace_); } vector::Vector x(n); - x.allocate(memory::HOST); - x.allocate(memspace_); + x.allocateAll(memspace_); for (index_type i = 0; i < n; ++i) { @@ -138,8 +137,7 @@ namespace ReSolve } vector::Vector y(n); - y.allocate(memory::HOST); - y.allocate(memspace_); + y.allocateAll(memspace_); y.setToZero(memspace_); PreconditionerUserMatrix precond(&handler_); diff --git a/tests/unit/vector/GramSchmidtTests.hpp b/tests/unit/vector/GramSchmidtTests.hpp index 9f71eede..a63807dc 100644 --- a/tests/unit/vector/GramSchmidtTests.hpp +++ b/tests/unit/vector/GramSchmidtTests.hpp @@ -79,10 +79,12 @@ namespace ReSolve real_type* H = new real_type[restart * (restart + 1)]; // Allocate Krylov subspace - V.allocate(memspace_); + V.allocateAll(memspace_); + ; + V.setDataUpdated(memspace_); if (memspace_ == memory::DEVICE) { - V.allocate(memory::HOST); + V.setDataUpdated(memory::HOST); } // Create and allocate Gram-Schmidt orthogonalization @@ -128,6 +130,10 @@ namespace ReSolve handler_.scal(nrm, &V, memspace_); // Orthogonalize system and verify result + if (memspace_ == memory::DEVICE) + { + V.syncData(memory::HOST); + } GS.orthogonalize(N, &V, H, 0); GS.orthogonalize(N, &V, H, 1); status *= verifyAnswer(V, restart + 1); @@ -145,7 +151,9 @@ namespace ReSolve bool verifyAnswer(vector::Vector& x, index_type K) { vector::Vector a(x.getSize()); + a.allocate(memory::HOST); vector::Vector b(x.getSize()); + b.allocate(memory::HOST); real_type ip; bool status = true; diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 4f728abd..2c67ad9c 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -62,6 +62,7 @@ namespace ReSolve { data[i] = 0.1 * (real_type) i; } + x.allocate(memspace_); x.copyFromExternal(data, memory::HOST, memspace_); real_type result = handler_.amax(&x, memspace_); @@ -249,7 +250,7 @@ namespace ReSolve // diag[i] = i, vec[i] = 3.0 // expected result vec[i] = i * 3.0 diag.allocate(memspace_); - vec.allocate(memspace_); + vec.allocateAll(memspace_); vec.setToConst(3.0, memspace_); @@ -332,8 +333,8 @@ namespace ReSolve vector::Vector z(N); x.allocate(memspace_); - y.allocate(memspace_); - z.allocate(memspace_); + y.allocateAll(memspace_); + z.allocateAll(memspace_); auto x_data = std::unique_ptr(new real_type[N]); auto y_data = std::unique_ptr(new real_type[N]); @@ -359,6 +360,7 @@ namespace ReSolve if (memspace_ == memory::DEVICE) { y.syncData(memory::HOST); + z.syncData(memory::HOST); } for (index_type i = 0; i < N; ++i) @@ -390,8 +392,8 @@ namespace ReSolve vector::Vector x(N); vector::Vector y(N); - x.allocate(memspace_); - y.allocate(memspace_); + x.allocateAll(memspace_); + y.allocateAll(memspace_); auto x_data = std::unique_ptr(new real_type[N]); for (size_t i = 0; i < static_cast(N); ++i) diff --git a/tests/unit/vector/VectorTests.hpp b/tests/unit/vector/VectorTests.hpp index c9589fa4..3bf30256 100644 --- a/tests/unit/vector/VectorTests.hpp +++ b/tests/unit/vector/VectorTests.hpp @@ -177,14 +177,17 @@ namespace ReSolve } // array -> memspace + x.allocate(memspace_); x.copyFromExternal(data, memory::HOST, memspace_); // memspace -> memspace vector::Vector y(N); + y.allocate(memspace_); y.copyFromExternal(&x, memspace_, memspace_); // memspace -> host vector::Vector z(N); + z.allocate(memory::HOST); z.copyFromExternal(&y, memspace_, memory::HOST); const real_type* z_data = z.getData(memory::HOST); @@ -291,6 +294,7 @@ namespace ReSolve vector::Vector x(vector_size, number_vectors); + x.allocate(memspace_); x.setToZero(memspace_); success *= verifyAnswer(x, ZERO); @@ -336,6 +340,8 @@ namespace ReSolve index_type number_vectors = 3; vector::Vector x(vector_size, number_vectors); + x.allocate(memory::HOST); + x.allocate(memory::DEVICE); // Set all vectors in x on device to ones x.setToConst(ONE, memspace_);