diff --git a/source/source_cell/module_neighlist/bin_manager.cpp b/source/source_cell/module_neighlist/bin_manager.cpp index 2e64b49a881..b1182f0f1bd 100644 --- a/source/source_cell/module_neighlist/bin_manager.cpp +++ b/source/source_cell/module_neighlist/bin_manager.cpp @@ -118,14 +118,15 @@ void BinManager::build_atom_neighbors( { assert(atoms.size() == neighbor_list.numneigh.size()); - double sradius2 = sradius * sradius; + const double sradius2 = sradius * sradius; + const int natoms = static_cast(atoms.size()); neighbor_list.reset(); + std::fill(neighbor_list.numneigh.begin(), neighbor_list.numneigh.end(), 0); + std::fill(neighbor_list.firstneigh.begin(), neighbor_list.firstneigh.end(), nullptr); - for (int i = 0; i < atoms.size(); i++) + auto collect_neighbors = [&](const int i, std::vector& neigh_tmp) { - std::vector neigh_tmp; - int ix = std::min( std::max(int((atoms[i].position_x - x_min) / bin_sizex), 0), nbinx - 1 @@ -173,21 +174,41 @@ void BinManager::build_atom_neighbors( } } } - } - int n = neigh_tmp.size(); - - //std::cout<> chunk_neighbor_ids(chunk_len); + +#ifdef _OPENMP +#pragma omp parallel for schedule(static) +#endif + for (int local_i = 0; local_i < chunk_len; local_i++) { - assert(ptr != nullptr); - ptr[k] = neigh_tmp[k]; + collect_neighbors(chunk_begin + local_i, chunk_neighbor_ids[local_i]); } - neighbor_list.firstneigh[i] = ptr; - neighbor_list.numneigh[i] = n; + for (int local_i = 0; local_i < chunk_len; local_i++) + { + const int atom_i = chunk_begin + local_i; + const std::vector& neigh_tmp = chunk_neighbor_ids[local_i]; + const int n = static_cast(neigh_tmp.size()); + + int* ptr = neighbor_list.allocator.allocate(n); + assert(n == 0 || ptr != nullptr); + + for (int k = 0; k < n; k++) + { + ptr[k] = neigh_tmp[k]; + } + + neighbor_list.firstneigh[atom_i] = ptr; + neighbor_list.numneigh[atom_i] = n; + } } } @@ -199,4 +220,4 @@ void BinManager::clear() } bins.clear(); -} \ No newline at end of file +} diff --git a/source/source_cell/module_neighlist/neighbor_list.h b/source/source_cell/module_neighlist/neighbor_list.h index 9ed0a4dc409..c96b5bdb5f6 100644 --- a/source/source_cell/module_neighlist/neighbor_list.h +++ b/source/source_cell/module_neighlist/neighbor_list.h @@ -4,6 +4,7 @@ #include #include #include +#include class PageAllocator { public: @@ -20,11 +21,11 @@ class PageAllocator { // Default constructor PageAllocator() : pgsize(DEFAULT_PGSIZE) { - if (pgsize > 0) new_page(); + if (pgsize > 0) new_page_unlocked(); } PageAllocator(int pgsize_) : pgsize(pgsize_) { - new_page(); + new_page_unlocked(); } ~PageAllocator() { @@ -34,30 +35,42 @@ class PageAllocator { PageAllocator(const PageAllocator&) = delete; PageAllocator& operator=(const PageAllocator&) = delete; - // Allow move - PageAllocator(PageAllocator&&) = default; - PageAllocator& operator=(PageAllocator&&) = default; + // Allow move; mutex state is not moved. + PageAllocator(PageAllocator&& other) noexcept { + std::lock_guard lock(other.mutex_); + pages = std::move(other.pages); + pgsize = other.pgsize; + } + + PageAllocator& operator=(PageAllocator&& other) noexcept { + if (this != &other) { + std::lock(mutex_, other.mutex_); + std::lock_guard lock_this(mutex_, std::adopt_lock); + std::lock_guard lock_other(other.mutex_, std::adopt_lock); + pages = std::move(other.pages); + pgsize = other.pgsize; + } + return *this; + } void new_page() { - Page p; - p.capacity = pgsize; - p.offset = 0; - p.data.resize(pgsize); - pages.push_back(std::move(p)); + std::lock_guard lock(mutex_); + new_page_unlocked(); } int* allocate(int n) { + std::lock_guard lock(mutex_); if (n <= 0) return nullptr; // reject requests larger than a single page if (n > pgsize) { std::cerr << "PageAllocator::allocate error: request " << n << " larger than page size " << pgsize << std::endl; return nullptr; } - if (pages.empty()) new_page(); + if (pages.empty()) new_page_unlocked(); Page& p = pages.back(); if (p.offset + n > p.capacity) { - new_page(); - return allocate(n); + new_page_unlocked(); + return allocate_unlocked(n); } int* ptr = p.data.data() + p.offset; p.offset += n; @@ -65,8 +78,29 @@ class PageAllocator { } void reset() { - pages.resize(1); - pages[0].offset = 0; + std::lock_guard lock(mutex_); + pages.clear(); + if (pgsize > 0) new_page_unlocked(); + } + +private: + std::mutex mutex_; + + void new_page_unlocked() { + Page p; + p.capacity = pgsize; + p.offset = 0; + p.data.resize(pgsize); + pages.push_back(std::move(p)); + } + + int* allocate_unlocked(int n) { + if (n <= 0) return nullptr; + Page& p = pages.back(); + assert(p.offset + n <= p.capacity); + int* ptr = p.data.data() + p.offset; + p.offset += n; + return ptr; } }; @@ -97,4 +131,4 @@ class NeighborList { void reset() { allocator.reset(); } -}; \ No newline at end of file +}; diff --git a/source/source_esolver/esolver_lj.cpp b/source/source_esolver/esolver_lj.cpp index 72db00a1db2..0a507c46781 100644 --- a/source/source_esolver/esolver_lj.cpp +++ b/source/source_esolver/esolver_lj.cpp @@ -57,45 +57,113 @@ void ESolver_LJ::before_all_runners(UnitCell& ucell, const Input_para& inp) void ESolver_LJ::runner(UnitCell& ucell, const int istep) { + ModuleBase::timer::start("ESolver_LJ", "runner"); + UnitCellPlus ucell_plus = change_from_ucell_to_ucell_plus(ucell); NeighborSearch neighbor_search; neighbor_search.init(ucell_plus, search_radius, 0); neighbor_search.build_neighbors(); - double distance = 0.0; - int index = 0; - // Important! potential, force, virial must be zero per step lj_potential = 0; lj_force.zero_out(); lj_virial.zero_out(); - ModuleBase::Vector3 tau1, tau2, dtau; - for (int it = 0; it < ucell.ntype; ++it) + const int nlocal = neighbor_search.neighbor_list.nlocal; + double potential_sum = 0.0; + double virial_sum[9] = {0.0}; + std::cout<<1<na; ++ia) + double potential_private = 0.0; + double virial_private[9] = {0.0}; + +#pragma omp for schedule(static) + for (int index = 0; index < nlocal; ++index) { - tau1 = atom1->tau[ia]; + const NeighborAtom& atom1 = neighbor_search.inside_atoms[index]; + const int it = atom1.atom_type; + ModuleBase::Vector3 tau1(atom1.position_x, atom1.position_y, atom1.position_z); + for (int ad = 0; ad < neighbor_search.neighbor_list.numneigh[index]; ++ad) { + ModuleBase::Vector3 tau2; tau2.x = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].position_x; tau2.y = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].position_y; tau2.z = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].position_z; int it2 = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].atom_type; - dtau = (tau1 - tau2) * ucell.lat0; - distance = dtau.norm(); + ModuleBase::Vector3 dtau = (tau1 - tau2) * ucell.lat0; + double distance = dtau.norm(); if (distance < lj_rcut(it, it2)) { - lj_potential += LJ_energy(distance, it, it2) - en_shift(it, it2); + potential_private += LJ_energy(distance, it, it2) - en_shift(it, it2); ModuleBase::Vector3 f_ij = LJ_force(dtau, it, it2); lj_force(index, 0) += f_ij.x; lj_force(index, 1) += f_ij.y; lj_force(index, 2) += f_ij.z; - LJ_virial(f_ij, dtau); + for (int i = 0; i < 3; ++i) + { + for (int j = 0; j < 3; ++j) + { + virial_private[i * 3 + j] += dtau[i] * f_ij[j]; + } + } } } - index++; + } + +#pragma omp critical + { + potential_sum += potential_private; + for (int i = 0; i < 9; ++i) + { + virial_sum[i] += virial_private[i]; + } + } + } +#else + for (int index = 0; index < nlocal; ++index) + { + const NeighborAtom& atom1 = neighbor_search.inside_atoms[index]; + const int it = atom1.atom_type; + ModuleBase::Vector3 tau1(atom1.position_x, atom1.position_y, atom1.position_z); + + for (int ad = 0; ad < neighbor_search.neighbor_list.numneigh[index]; ++ad) + { + ModuleBase::Vector3 tau2; + tau2.x = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].position_x; + tau2.y = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].position_y; + tau2.z = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].position_z; + int it2 = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].atom_type; + ModuleBase::Vector3 dtau = (tau1 - tau2) * ucell.lat0; + double distance = dtau.norm(); + if (distance < lj_rcut(it, it2)) + { + potential_sum += LJ_energy(distance, it, it2) - en_shift(it, it2); + ModuleBase::Vector3 f_ij = LJ_force(dtau, it, it2); + lj_force(index, 0) += f_ij.x; + lj_force(index, 1) += f_ij.y; + lj_force(index, 2) += f_ij.z; + for (int i = 0; i < 3; ++i) + { + for (int j = 0; j < 3; ++j) + { + virial_sum[i * 3 + j] += dtau[i] * f_ij[j]; + } + } + } + } + } +#endif + + lj_potential = potential_sum; + for (int i = 0; i < 3; ++i) + { + for (int j = 0; j < 3; ++j) + { + lj_virial(i, j) = virial_sum[i * 3 + j]; } } @@ -156,6 +224,8 @@ void ESolver_LJ::runner(UnitCell& ucell, const int istep) lj_virial(i, j) /= (2.0 * ucell.omega); } } + + ModuleBase::timer::end("ESolver_LJ", "runner"); } double ESolver_LJ::cal_energy()