From 8b19ac7747bf5109d205fdd0af3bf4e1fcac375e Mon Sep 17 00:00:00 2001 From: Fei Yang <2501213217@stu.pku.edu.cn> Date: Thu, 28 May 2026 17:12:51 +0800 Subject: [PATCH 1/3] Parallelize neighbor list construction with OpenMP --- .../module_neighlist/bin_manager.cpp | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/source/source_cell/module_neighlist/bin_manager.cpp b/source/source_cell/module_neighlist/bin_manager.cpp index 2e64b49a881..84cdaa25951 100644 --- a/source/source_cell/module_neighlist/bin_manager.cpp +++ b/source/source_cell/module_neighlist/bin_manager.cpp @@ -118,13 +118,19 @@ void BinManager::build_atom_neighbors( { assert(atoms.size() == neighbor_list.numneigh.size()); - double sradius2 = sradius * sradius; + const double sradius2 = sradius * sradius; + const int natoms = static_cast(atoms.size()); neighbor_list.reset(); - for (int i = 0; i < atoms.size(); i++) + std::vector> neighbor_ids(natoms); + +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic, 16) +#endif + for (int i = 0; i < natoms; i++) { - std::vector neigh_tmp; + std::vector& neigh_tmp = neighbor_ids[i]; int ix = std::min( std::max(int((atoms[i].position_x - x_min) / bin_sizex), 0), @@ -174,7 +180,12 @@ void BinManager::build_atom_neighbors( } } } - int n = neigh_tmp.size(); + } + + for (int i = 0; i < natoms; i++) + { + const std::vector& neigh_tmp = neighbor_ids[i]; + int n = static_cast(neigh_tmp.size()); //std::cout< Date: Tue, 2 Jun 2026 18:04:07 +0800 Subject: [PATCH 2/3] Parallelize neighbor list construction with OpenMP --- .../module_neighlist/bin_manager.cpp | 7 +- .../module_neighlist/neighbor_list.h | 66 +++++++++---- source/source_esolver/esolver_lj.cpp | 96 ++++++++++++++++--- 3 files changed, 138 insertions(+), 31 deletions(-) diff --git a/source/source_cell/module_neighlist/bin_manager.cpp b/source/source_cell/module_neighlist/bin_manager.cpp index 84cdaa25951..98ba374273e 100644 --- a/source/source_cell/module_neighlist/bin_manager.cpp +++ b/source/source_cell/module_neighlist/bin_manager.cpp @@ -126,7 +126,7 @@ void BinManager::build_atom_neighbors( std::vector> neighbor_ids(natoms); #ifdef _OPENMP -#pragma omp parallel for schedule(dynamic, 16) +#pragma omp parallel for schedule(static) #endif for (int i = 0; i < natoms; i++) { @@ -182,6 +182,9 @@ void BinManager::build_atom_neighbors( } } +#ifdef _OPENMP +#pragma omp parallel for schedule(static) +#endif for (int i = 0; i < natoms; i++) { const std::vector& neigh_tmp = neighbor_ids[i]; @@ -190,10 +193,10 @@ void BinManager::build_atom_neighbors( //std::cout< #include #include +#include class PageAllocator { public: @@ -20,11 +21,11 @@ class PageAllocator { // Default constructor PageAllocator() : pgsize(DEFAULT_PGSIZE) { - if (pgsize > 0) new_page(); + if (pgsize > 0) new_page_unlocked(); } PageAllocator(int pgsize_) : pgsize(pgsize_) { - new_page(); + new_page_unlocked(); } ~PageAllocator() { @@ -34,30 +35,42 @@ class PageAllocator { PageAllocator(const PageAllocator&) = delete; PageAllocator& operator=(const PageAllocator&) = delete; - // Allow move - PageAllocator(PageAllocator&&) = default; - PageAllocator& operator=(PageAllocator&&) = default; + // Allow move; mutex state is not moved. + PageAllocator(PageAllocator&& other) noexcept { + std::lock_guard lock(other.mutex_); + pages = std::move(other.pages); + pgsize = other.pgsize; + } + + PageAllocator& operator=(PageAllocator&& other) noexcept { + if (this != &other) { + std::lock(mutex_, other.mutex_); + std::lock_guard lock_this(mutex_, std::adopt_lock); + std::lock_guard lock_other(other.mutex_, std::adopt_lock); + pages = std::move(other.pages); + pgsize = other.pgsize; + } + return *this; + } void new_page() { - Page p; - p.capacity = pgsize; - p.offset = 0; - p.data.resize(pgsize); - pages.push_back(std::move(p)); + std::lock_guard lock(mutex_); + new_page_unlocked(); } int* allocate(int n) { + std::lock_guard lock(mutex_); if (n <= 0) return nullptr; // reject requests larger than a single page if (n > pgsize) { std::cerr << "PageAllocator::allocate error: request " << n << " larger than page size " << pgsize << std::endl; return nullptr; } - if (pages.empty()) new_page(); + if (pages.empty()) new_page_unlocked(); Page& p = pages.back(); if (p.offset + n > p.capacity) { - new_page(); - return allocate(n); + new_page_unlocked(); + return allocate_unlocked(n); } int* ptr = p.data.data() + p.offset; p.offset += n; @@ -65,8 +78,29 @@ class PageAllocator { } void reset() { - pages.resize(1); - pages[0].offset = 0; + std::lock_guard lock(mutex_); + pages.clear(); + if (pgsize > 0) new_page_unlocked(); + } + +private: + std::mutex mutex_; + + void new_page_unlocked() { + Page p; + p.capacity = pgsize; + p.offset = 0; + p.data.resize(pgsize); + pages.push_back(std::move(p)); + } + + int* allocate_unlocked(int n) { + if (n <= 0) return nullptr; + Page& p = pages.back(); + assert(p.offset + n <= p.capacity); + int* ptr = p.data.data() + p.offset; + p.offset += n; + return ptr; } }; @@ -97,4 +131,4 @@ class NeighborList { void reset() { allocator.reset(); } -}; \ No newline at end of file +}; diff --git a/source/source_esolver/esolver_lj.cpp b/source/source_esolver/esolver_lj.cpp index 72db00a1db2..0a507c46781 100644 --- a/source/source_esolver/esolver_lj.cpp +++ b/source/source_esolver/esolver_lj.cpp @@ -57,45 +57,113 @@ void ESolver_LJ::before_all_runners(UnitCell& ucell, const Input_para& inp) void ESolver_LJ::runner(UnitCell& ucell, const int istep) { + ModuleBase::timer::start("ESolver_LJ", "runner"); + UnitCellPlus ucell_plus = change_from_ucell_to_ucell_plus(ucell); NeighborSearch neighbor_search; neighbor_search.init(ucell_plus, search_radius, 0); neighbor_search.build_neighbors(); - double distance = 0.0; - int index = 0; - // Important! potential, force, virial must be zero per step lj_potential = 0; lj_force.zero_out(); lj_virial.zero_out(); - ModuleBase::Vector3 tau1, tau2, dtau; - for (int it = 0; it < ucell.ntype; ++it) + const int nlocal = neighbor_search.neighbor_list.nlocal; + double potential_sum = 0.0; + double virial_sum[9] = {0.0}; + std::cout<<1<na; ++ia) + double potential_private = 0.0; + double virial_private[9] = {0.0}; + +#pragma omp for schedule(static) + for (int index = 0; index < nlocal; ++index) { - tau1 = atom1->tau[ia]; + const NeighborAtom& atom1 = neighbor_search.inside_atoms[index]; + const int it = atom1.atom_type; + ModuleBase::Vector3 tau1(atom1.position_x, atom1.position_y, atom1.position_z); + for (int ad = 0; ad < neighbor_search.neighbor_list.numneigh[index]; ++ad) { + ModuleBase::Vector3 tau2; tau2.x = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].position_x; tau2.y = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].position_y; tau2.z = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].position_z; int it2 = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].atom_type; - dtau = (tau1 - tau2) * ucell.lat0; - distance = dtau.norm(); + ModuleBase::Vector3 dtau = (tau1 - tau2) * ucell.lat0; + double distance = dtau.norm(); if (distance < lj_rcut(it, it2)) { - lj_potential += LJ_energy(distance, it, it2) - en_shift(it, it2); + potential_private += LJ_energy(distance, it, it2) - en_shift(it, it2); ModuleBase::Vector3 f_ij = LJ_force(dtau, it, it2); lj_force(index, 0) += f_ij.x; lj_force(index, 1) += f_ij.y; lj_force(index, 2) += f_ij.z; - LJ_virial(f_ij, dtau); + for (int i = 0; i < 3; ++i) + { + for (int j = 0; j < 3; ++j) + { + virial_private[i * 3 + j] += dtau[i] * f_ij[j]; + } + } } } - index++; + } + +#pragma omp critical + { + potential_sum += potential_private; + for (int i = 0; i < 9; ++i) + { + virial_sum[i] += virial_private[i]; + } + } + } +#else + for (int index = 0; index < nlocal; ++index) + { + const NeighborAtom& atom1 = neighbor_search.inside_atoms[index]; + const int it = atom1.atom_type; + ModuleBase::Vector3 tau1(atom1.position_x, atom1.position_y, atom1.position_z); + + for (int ad = 0; ad < neighbor_search.neighbor_list.numneigh[index]; ++ad) + { + ModuleBase::Vector3 tau2; + tau2.x = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].position_x; + tau2.y = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].position_y; + tau2.z = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].position_z; + int it2 = neighbor_search.all_atoms[neighbor_search.neighbor_list.firstneigh[index][ad]].atom_type; + ModuleBase::Vector3 dtau = (tau1 - tau2) * ucell.lat0; + double distance = dtau.norm(); + if (distance < lj_rcut(it, it2)) + { + potential_sum += LJ_energy(distance, it, it2) - en_shift(it, it2); + ModuleBase::Vector3 f_ij = LJ_force(dtau, it, it2); + lj_force(index, 0) += f_ij.x; + lj_force(index, 1) += f_ij.y; + lj_force(index, 2) += f_ij.z; + for (int i = 0; i < 3; ++i) + { + for (int j = 0; j < 3; ++j) + { + virial_sum[i * 3 + j] += dtau[i] * f_ij[j]; + } + } + } + } + } +#endif + + lj_potential = potential_sum; + for (int i = 0; i < 3; ++i) + { + for (int j = 0; j < 3; ++j) + { + lj_virial(i, j) = virial_sum[i * 3 + j]; } } @@ -156,6 +224,8 @@ void ESolver_LJ::runner(UnitCell& ucell, const int istep) lj_virial(i, j) /= (2.0 * ucell.omega); } } + + ModuleBase::timer::end("ESolver_LJ", "runner"); } double ESolver_LJ::cal_energy() From de7493c874e103e028ef3c93eaf8639d7e250f2a Mon Sep 17 00:00:00 2001 From: Fei Yang <2501213217@stu.pku.edu.cn> Date: Tue, 2 Jun 2026 18:25:37 +0800 Subject: [PATCH 3/3] Parallelize neighbor list construction with OpenMP --- .../module_neighlist/bin_manager.cpp | 55 +++++++++++-------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/source/source_cell/module_neighlist/bin_manager.cpp b/source/source_cell/module_neighlist/bin_manager.cpp index 98ba374273e..b1182f0f1bd 100644 --- a/source/source_cell/module_neighlist/bin_manager.cpp +++ b/source/source_cell/module_neighlist/bin_manager.cpp @@ -122,16 +122,11 @@ void BinManager::build_atom_neighbors( const int natoms = static_cast(atoms.size()); neighbor_list.reset(); + std::fill(neighbor_list.numneigh.begin(), neighbor_list.numneigh.end(), 0); + std::fill(neighbor_list.firstneigh.begin(), neighbor_list.firstneigh.end(), nullptr); - std::vector> neighbor_ids(natoms); - -#ifdef _OPENMP -#pragma omp parallel for schedule(static) -#endif - for (int i = 0; i < natoms; i++) + auto collect_neighbors = [&](const int i, std::vector& neigh_tmp) { - std::vector& neigh_tmp = neighbor_ids[i]; - int ix = std::min( std::max(int((atoms[i].position_x - x_min) / bin_sizex), 0), nbinx - 1 @@ -179,29 +174,41 @@ void BinManager::build_atom_neighbors( } } } - } - } + } + }; + + constexpr int chunk_size = 256; + for (int chunk_begin = 0; chunk_begin < natoms; chunk_begin += chunk_size) + { + const int chunk_end = std::min(chunk_begin + chunk_size, natoms); + const int chunk_len = chunk_end - chunk_begin; + std::vector> chunk_neighbor_ids(chunk_len); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif - for (int i = 0; i < natoms; i++) - { - const std::vector& neigh_tmp = neighbor_ids[i]; - int n = static_cast(neigh_tmp.size()); - - //std::cout<& neigh_tmp = chunk_neighbor_ids[local_i]; + const int n = static_cast(neigh_tmp.size()); + + int* ptr = neighbor_list.allocator.allocate(n); + assert(n == 0 || ptr != nullptr); + + for (int k = 0; k < n; k++) + { + ptr[k] = neigh_tmp[k]; + } + + neighbor_list.firstneigh[atom_i] = ptr; + neighbor_list.numneigh[atom_i] = n; + } } }