From 091a7f5c97bad0364e3be76ed0832f74fe7c02e0 Mon Sep 17 00:00:00 2001 From: Michele Simoncelli Date: Mon, 19 Aug 2024 18:13:00 +0100 Subject: [PATCH 1/4] Fix implementation of Wigner thermal conductivity: - employ the smooth (Wallace) phase, which ensures size consistency (see https://link.aps.org/doi/10.1103/PhysRevX.12.041011 , in particular Sec VI); - consider the Wigner velocity operator also at q=0, a condition that is crucial to have a size-consistent formulation and describe the disordered-solid limit (see https://www.nature.com/articles/s41524-023-01033-4 ) . The current implementation is limited to the RTA level and outputs only the total Wigner conductivity, without decomposing it into population and coherences contributions. The decomposition between populations and coherences will be implemented in the future, along with the possibility to output the Wigner conductivity also with the full exact solution of the BTE. The code gives results compatible with phono3py. --- src/harmonic/phonon_h0.cpp | 130 ++++++------------ src/observable/wigner_phonon_thermal_cond.cpp | 12 +- 2 files changed, 53 insertions(+), 89 deletions(-) diff --git a/src/harmonic/phonon_h0.cpp b/src/harmonic/phonon_h0.cpp index 45e50b27..355ed986 100644 --- a/src/harmonic/phonon_h0.cpp +++ b/src/harmonic/phonon_h0.cpp @@ -910,17 +910,30 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) { auto energies = std::get<0>(tup); auto eigenvectors = std::get<1>(tup); + Eigen::MatrixXcd eigenvectors_bar(numBands, numBands); + Eigen::MatrixXcd eigenvectors_bar_minus(numBands, numBands); + Eigen::MatrixXcd eigenvectors_bar_plus(numBands, numBands); + eigenvectors_bar.setZero(); + eigenvectors_bar_minus.setZero(); + eigenvectors_bar_plus.setZero(); + + Eigen::VectorXcd matrix_U(numBands); + Eigen::VectorXcd matrix_U_plus(numBands); + Eigen::VectorXcd matrix_U_minus(numBands); + matrix_U.setZero(); + matrix_U_plus.setZero(); + matrix_U_minus.setZero(); + //for(int i = 0; i < numBands; i++) printf("old = %.16e\n", energies(i)); //for(int i = 0; i < numBands; i++) // for(int j = 0; j < numBands; j++) // printf("old = %.16e %.16e\n", eigenvectors(i,j).real(), eigenvectors(i,j).imag()); - // now we compute the velocity operator, diagonalizing the expectation - // value of the derivative of the dynamical matrix. - // This works better than doing finite differences on the frequencies. + // now we compute the velocity operator double deltaQ = 1.0e-8; for (int i : {0, 1, 2}) { // define q+ and q- from finite differences. + Eigen::Vector3d qcenter = coordinates; Eigen::Vector3d qPlus = coordinates; Eigen::Vector3d qMinus = coordinates; qPlus(i) += deltaQ; @@ -940,11 +953,28 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) { //for(int i = 0; i < numBands; i++) printf("old = %.16e\n", enPlus(i)); //for(int i = 0; i < numBands; i++) printf("old = %.16e\n", enMinus(i)); //for(int i = 0; i < numBands; i++) - // for(int j = 0; j < numBands; j++) - // printf("old = %.16e %.16e\n", eigPlus(i,j).real(), eigPlus(i,j).imag()); - //for(int i = 0; i < numBands; i++) - // for(int j = 0; j < numBands; j++) - // printf("old = %.16e %.16e\n", eigMinus(i,j).real(), eigMinus(i,j).imag()); + + + for (int iat = 0; iat < numAtoms; iat++){ + for (int i : {0, 1, 2}) { + auto ip= i + iat*3; + double arg= qcenter.dot(atomicPositions.row(iat)); + matrix_U(ip)=exp(-complexI*arg); + arg= qPlus.dot(atomicPositions.row(iat)); + matrix_U_plus(ip)=exp(-complexI*arg); + arg= qMinus.dot(atomicPositions.row(iat)); + matrix_U_minus(ip)=exp(-complexI*arg); + } + } + + for(int i = 0; i < numBands; i++){ + for(int j = 0; j < numBands; j++){ + eigenvectors_bar(i,j)=matrix_U(i)*eigenvectors(i,j); + eigenvectors_bar_plus(i,j)=matrix_U_plus(i)*eigPlus(i,j); + eigenvectors_bar_minus(i,j)=matrix_U_minus(i)*eigMinus(i,j); + } + } + // build diagonal matrices with frequencies Eigen::MatrixXd enPlusMat(numBands, numBands); @@ -957,9 +987,9 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) { // build the dynamical matrix at the two wavevectors // since we diagonalized it before, A = M.U.M* Eigen::MatrixXcd sqrtDPlus(numBands, numBands); - sqrtDPlus = eigPlus * enPlusMat * eigPlus.adjoint(); + sqrtDPlus = eigenvectors_bar_plus * enPlusMat * eigenvectors_bar_plus.adjoint(); Eigen::MatrixXcd sqrtDMinus(numBands, numBands); - sqrtDMinus = eigMinus * enMinusMat * eigMinus.adjoint(); + sqrtDMinus = eigenvectors_bar_minus * enMinusMat * eigenvectors_bar_minus.adjoint(); // now we can build the velocity operator Eigen::MatrixXcd der(numBands, numBands); @@ -984,89 +1014,19 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) { // } //} - // and to be safe, we reimpose hermiticity - der = 0.5 * (der + der.adjoint()); - - // now we rotate in the basis of the eigenvectors at q. - der = eigenvectors.adjoint() * der * eigenvectors; + // option below can probably be decommented, this will enforce hermiticity numerically, + // results should be equivalent within numerical noise + //der = 0.5 * (der + der.adjoint()); + der = eigenvectors_bar.adjoint() * der * eigenvectors_bar; for (int ib2 = 0; ib2 < numBands; ib2++) { for (int ib1 = 0; ib1 < numBands; ib1++) { velocity(ib1, ib2, i) = der(ib1, ib2); - //auto x = velocity(ib1,ib2,i); - //printf("old = %.16e %.16e\n", x.real(), x.imag()); - } - } - } - - // turns out that the above algorithm has problems with degenerate bands - // so, we diagonalize the velocity operator in the degenerate subspace, - - for (int ib = 0; ib < numBands; ib++) { - // first, we check if the band is degenerate, and the size of the - // degenerate subspace - int sizeSubspace = 1; - for (int ib2 = ib + 1; ib2 < numBands; ib2++) { - // I consider bands degenerate if their frequencies are the same - // within 0.0001 cm^-1 - if (abs(energies(ib) - energies(ib2)) > 0.0001 / ryToCmm1) { - break; - } - sizeSubspace += 1; - } - - if (sizeSubspace > 1) { - Eigen::MatrixXcd subMat(sizeSubspace, sizeSubspace); - // we have to repeat for every direction - for (int iCart : {0, 1, 2}) { - // take the velocity matrix of the degenerate subspace - for (int j = 0; j < sizeSubspace; j++) { - for (int i = 0; i < sizeSubspace; i++) { - subMat(i, j) = velocity(ib + i, ib + j, iCart); - } - } - - // reinforce hermiticity - subMat = 0.5 * (subMat + subMat.adjoint()); - - // diagonalize the subMatrix - Eigen::SelfAdjointEigenSolver eigenSolver(subMat); - Eigen::MatrixXcd newEigenVectors = eigenSolver.eigenvectors(); - //newEigenVectors = 3*subMat; // TODO: undo - - // rotate the original matrix in the new basis - // that diagonalizes the subspace. - subMat = newEigenVectors.adjoint() * subMat * newEigenVectors; - - // reinforce hermiticity - subMat = 0.5 * (subMat + subMat.adjoint()); - - // substitute back - for (int j = 0; j < sizeSubspace; j++) { - for (int i = 0; i < sizeSubspace; i++) { - velocity(ib + i, ib + j, iCart) = subMat(i, j); - } - } } } - - // we skip the bands in the subspace, since we corrected them already - ib += sizeSubspace - 1; - } - // if we are working at gamma, we set all velocities to zero. - if (coordinates.norm() < 1.0e-6) { - velocity.setZero(); } - //for(int i = 0; i < 3; i++){ - // for (int ib2 = 0; ib2 < numBands; ib2++) { - // for (int ib1 = 0; ib1 < numBands; ib1++) { - // auto x = velocity(ib1,ib2,i); - // printf("old = %.16e %.16e\n", x.real(), x.imag()); - // } - // } - //} - Kokkos::Profiling::popRegion(); return velocity; + Kokkos::Profiling::popRegion(); } Eigen::Vector3i PhononH0::getCoarseGrid() { return qCoarseGrid; } diff --git a/src/observable/wigner_phonon_thermal_cond.cpp b/src/observable/wigner_phonon_thermal_cond.cpp index c2badcdc..2d5d9f13 100644 --- a/src/observable/wigner_phonon_thermal_cond.cpp +++ b/src/observable/wigner_phonon_thermal_cond.cpp @@ -83,10 +83,6 @@ WignerPhononThermalConductivity::WignerPhononThermalConductivity( // calculate wigner correction for (int ib1 = 0; ib1 < numBands; ib1++) { for (int ib2 = 0; ib2 < numBands; ib2++) { - if (ib1 == ib2) { - continue; - } - int is1 = bandStructure.getIndex(iqIdx, BandIndex(ib1)); int is2 = bandStructure.getIndex(iqIdx, BandIndex(ib2)); auto is1Idx = StateIndex(is1); @@ -101,6 +97,9 @@ WignerPhononThermalConductivity::WignerPhononThermalConductivity( double num = energies(ib1) * bose(iCalc, ib1) * (bose(iCalc, ib1) + 1.) + energies(ib2) * bose(iCalc, ib2) * (bose(iCalc, ib2) + 1.); + + if ( (bose(iCalc, ib1) <= 0.) || (bose(iCalc, ib2) <=0.0) ) continue; //this avoids numerical problems for the acoustic phonons + double vel = (velRot(ib1, ib2, ic1) * velRot(ib2, ib1, ic2)).real(); double den = 4. * pow(energies(ib1) - energies(ib2), 2) + @@ -142,6 +141,9 @@ WignerPhononThermalConductivity &WignerPhononThermalConductivity::operator=( void WignerPhononThermalConductivity::calcFromPopulation(VectorBTE &n) { PhononThermalConductivity::calcFromPopulation(n); + // TO DO: here we use setZero because we include the diagonal elements when computing the Wigner conductivity + // in other words, the Wigner conductivity includes both populations and coherences contributions + tensordxd.setZero(); tensordxd += wignerCorrection; } @@ -149,6 +151,7 @@ void WignerPhononThermalConductivity::calcVariational(VectorBTE &af, VectorBTE &f, VectorBTE &scalingCG) { PhononThermalConductivity::calcVariational(af, f, scalingCG); + Error("Developer error: Wigner conductivity is currently limited to RTA approximation, implementation with iterative will be done in the future"); tensordxd += wignerCorrection; } @@ -159,6 +162,7 @@ void WignerPhononThermalConductivity::calcFromRelaxons( PhononThermalConductivity::calcFromRelaxons(context, statisticsSweep, eigenvectors, scatteringMatrix, eigenvalues); + Error("Developer error: Wigner conductivity is currently limited to RTA approximation, implementation with iterative will be done in the future"); tensordxd += wignerCorrection; } From 56df4d63498c75d8feb3e9bc658e5c78b4343ef5 Mon Sep 17 00:00:00 2001 From: jcoulter12 Date: Fri, 23 Aug 2024 13:34:26 -0400 Subject: [PATCH 2/4] fix placement of kokkos profiling region to before return statement [ci skip] --- src/harmonic/phonon_h0.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/harmonic/phonon_h0.cpp b/src/harmonic/phonon_h0.cpp index 355ed986..73c65d7a 100644 --- a/src/harmonic/phonon_h0.cpp +++ b/src/harmonic/phonon_h0.cpp @@ -1025,8 +1025,8 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) { } } } - return velocity; Kokkos::Profiling::popRegion(); + return velocity; } Eigen::Vector3i PhononH0::getCoarseGrid() { return qCoarseGrid; } From 56ff4656b85831df12eaeac58c8c9f2a8c7c2d8f Mon Sep 17 00:00:00 2001 From: Jenny Coulter Date: Thu, 5 Sep 2024 11:49:01 -0400 Subject: [PATCH 3/4] [ci skip] push a draft of the kokkos version of wallace phase implementation, remove many old unused print statements --- src/harmonic/phonon_h0.cpp | 61 ++++----------- src/harmonic/phonon_h0_kokkos.cpp | 126 ++++++++++++++++++++---------- 2 files changed, 99 insertions(+), 88 deletions(-) diff --git a/src/harmonic/phonon_h0.cpp b/src/harmonic/phonon_h0.cpp index 73c65d7a..da19af77 100644 --- a/src/harmonic/phonon_h0.cpp +++ b/src/harmonic/phonon_h0.cpp @@ -892,6 +892,8 @@ PhononH0::diagonalizeVelocity(Point &point) { return diagonalizeVelocityFromCoordinates(coordinates); } +// TODO I think it would be easier to +// apply these phases to the dynmat during the transform Eigen::Tensor, 3> PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) { @@ -901,9 +903,6 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) { velocity.setZero(); bool withMassScaling = false; - //for(int i = 0; i < 3; i++){ - // printf("old = %.16e\n", coordinates(i)); - //} // get the eigenvectors and the energies of the q-point auto tup = diagonalizeFromCoordinates(coordinates, withMassScaling); @@ -924,14 +923,11 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) { matrix_U_plus.setZero(); matrix_U_minus.setZero(); - //for(int i = 0; i < numBands; i++) printf("old = %.16e\n", energies(i)); - //for(int i = 0; i < numBands; i++) - // for(int j = 0; j < numBands; j++) - // printf("old = %.16e %.16e\n", eigenvectors(i,j).real(), eigenvectors(i,j).imag()); - // now we compute the velocity operator double deltaQ = 1.0e-8; + // kx, ky, kz directions for (int i : {0, 1, 2}) { + // define q+ and q- from finite differences. Eigen::Vector3d qcenter = coordinates; Eigen::Vector3d qPlus = coordinates; @@ -939,10 +935,6 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) { qPlus(i) += deltaQ; qMinus(i) -= deltaQ; - //for(int i = 0; i < 3; i++){ - // printf("old = %.16e %.16e\n", qPlus(i), qMinus(i)); - //} - // diagonalize the dynamical matrix at q+ and q- auto tup2 = diagonalizeFromCoordinates(qPlus, withMassScaling); auto enPlus = std::get<0>(tup2); @@ -950,32 +942,28 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) { auto tup1 = diagonalizeFromCoordinates(qMinus, withMassScaling); auto enMinus = std::get<0>(tup1); auto eigMinus = std::get<1>(tup1); - //for(int i = 0; i < numBands; i++) printf("old = %.16e\n", enPlus(i)); - //for(int i = 0; i < numBands; i++) printf("old = %.16e\n", enMinus(i)); - //for(int i = 0; i < numBands; i++) - + // apply the Wallace phase for (int iat = 0; iat < numAtoms; iat++){ for (int i : {0, 1, 2}) { - auto ip= i + iat*3; - double arg= qcenter.dot(atomicPositions.row(iat)); - matrix_U(ip)=exp(-complexI*arg); - arg= qPlus.dot(atomicPositions.row(iat)); - matrix_U_plus(ip)=exp(-complexI*arg); - arg= qMinus.dot(atomicPositions.row(iat)); - matrix_U_minus(ip)=exp(-complexI*arg); + auto ip = i + iat * 3; + double arg = qcenter.dot(atomicPositions.row(iat)); + matrix_U(ip) = exp(-complexI*arg); + arg = qPlus.dot(atomicPositions.row(iat)); + matrix_U_plus(ip) = exp(-complexI*arg); + arg = qMinus.dot(atomicPositions.row(iat)); + matrix_U_minus(ip) = exp(-complexI*arg); } } for(int i = 0; i < numBands; i++){ for(int j = 0; j < numBands; j++){ - eigenvectors_bar(i,j)=matrix_U(i)*eigenvectors(i,j); - eigenvectors_bar_plus(i,j)=matrix_U_plus(i)*eigPlus(i,j); - eigenvectors_bar_minus(i,j)=matrix_U_minus(i)*eigMinus(i,j); + eigenvectors_bar(i,j) = matrix_U(i) * eigenvectors(i,j); + eigenvectors_bar_plus(i,j) = matrix_U_plus(i) * eigPlus(i,j); + eigenvectors_bar_minus(i,j) = matrix_U_minus(i) * eigMinus(i,j); } } - // build diagonal matrices with frequencies Eigen::MatrixXd enPlusMat(numBands, numBands); Eigen::MatrixXd enMinusMat(numBands, numBands); @@ -995,25 +983,6 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) { Eigen::MatrixXcd der(numBands, numBands); der = (sqrtDPlus - sqrtDMinus) / (2. * deltaQ); - //for(int i = 0; i < numBands; i++){ - // for(int j = 0; j < numBands; j++){ - // auto x = der(i,j); - // printf("old = %.16e %.16e\n", x.real(), x.imag()); - // } - //} - //for(int i = 0; i < numBands; i++){ - // for(int j = 0; j < numBands; j++){ - // auto x = sqrtDPlus(i,j); - // printf("old = %.16e %.16e\n", x.real(), x.imag()); - // } - //} - //for(int i = 0; i < numBands; i++){ - // for(int j = 0; j < numBands; j++){ - // auto x = sqrtDMinus(i,j); - // printf("old = %.16e %.16e\n", x.real(), x.imag()); - // } - //} - // option below can probably be decommented, this will enforce hermiticity numerically, // results should be equivalent within numerical noise //der = 0.5 * (der + der.adjoint()); diff --git a/src/harmonic/phonon_h0_kokkos.cpp b/src/harmonic/phonon_h0_kokkos.cpp index c697af05..5cf5592c 100644 --- a/src/harmonic/phonon_h0_kokkos.cpp +++ b/src/harmonic/phonon_h0_kokkos.cpp @@ -45,7 +45,6 @@ StridedComplexView3D PhononH0::kokkosBatchedBuildBlochHamiltonian( dynamicalMatrices(iK, m, n) = tmp; }); Kokkos::realloc(phases_d, 0, 0); - //print3DComplex("new = ", dynamicalMatrices); if (hasDielectric) { auto longRangeCorrection1_d = this->longRangeCorrection1_d; @@ -137,7 +136,6 @@ StridedComplexView3D PhononH0::kokkosBatchedBuildBlochHamiltonian( } }); }); - //print3DComplex("new = ", dynamicalMatrices); } @@ -167,7 +165,6 @@ StridedComplexView3D PhononH0::kokkosBatchedBuildBlochHamiltonian( DD(iK, m, n) /= sqrt(atomicMasses_d(m) * atomicMasses_d(n)); }); - //print3DComplex("new = ", DD); return DD; } @@ -180,7 +177,6 @@ std::tuple PhononH0::kokkosBatchedDiagonaliz // create the Hamiltonians StridedComplexView3D dynamicalMatrices = kokkosBatchedBuildBlochHamiltonian(cartesianCoordinates); - //print3DComplex("new = ", dynamicalMatrices); int numK = dynamicalMatrices.extent(0); int numBands = this->numBands; @@ -199,8 +195,6 @@ std::tuple PhononH0::kokkosBatchedDiagonaliz frequencies(iK, m) = -sqrt(-frequencies(iK, m)); } }); - //print2D("new = ", frequencies); - //print3DComplex("new = ", dynamicalMatrices); if(withMassScaling) kokkosBatchedScaleEigenvectors(dynamicalMatrices); return std::make_tuple(frequencies, dynamicalMatrices); @@ -386,15 +380,19 @@ PhononH0::kokkosBatchedDiagonalizeWithVelocities( int numK = cartesianCoordinates.extent(0); double delta = 1.0e-8; + Kokkos::complex complexI(0.0, 1.0); // prepare all the wavevectors at which we need the hamiltonian, // (kx, ky, kz), (kx+dk, ky, kz), (kx - dk, ky, kz), (kx, ky + dk, kz), etc. + // 7 here is for the seven possible k+/- values DoubleView2D allVectors("wavevectors_k", numK * 7, 3); Kokkos::parallel_for( - "elHamiltonianShifted_d", numK, KOKKOS_LAMBDA(int iK) { + "phHamiltonianShifted_d", numK, KOKKOS_LAMBDA(int iK) { + // central points, i = x,y,z for (int i = 0; i < 3; ++i) { allVectors(iK * 7, i) = cartesianCoordinates(iK, i); } + // iDir for each kx, ky, kz +/-, i = x,y,z for (int iDir = 0; iDir < 3; ++iDir) { for (int i = 0; i < 3; ++i) { allVectors(iK * 7 + iDir * 2 + 1, i) = cartesianCoordinates(iK, i); @@ -407,99 +405,133 @@ PhononH0::kokkosBatchedDiagonalizeWithVelocities( } }); - //print2D("new = ", allVectors); - // compute the electronic properties at all wavevectors auto t = kokkosBatchedDiagonalizeFromCoordinates(allVectors, false); DoubleView2D allEnergies = std::get<0>(t); StridedComplexView3D allEigenvectors = std::get<1>(t); - //print2D("new = ", allEnergies); - int numBands = allEnergies.extent(1); - //print2D("new = ", allEnergies); - //print3DComplex("new = ", allEigenvectors); - Kokkos::LayoutStride eigenvectorLayout( numK, numBands*numBands, numBands, 1, numBands, numBands); - // save energies and eigenvectors to results - DoubleView2D resultEnergies("energies", numK, numBands); StridedComplexView3D resultEigenvectors("eigenvectors", eigenvectorLayout); ComplexView4D resultVelocities("velocities", numK, numBands, numBands, 3); + ComplexView2D phases("wallacePhases", numK, numBands); + // copy slice of eigenvectors, energies into views + Kokkos::parallel_for( + "phase", Range3D({0, 0, 0}, {numK, numAtoms, 3}), KOKKOS_LAMBDA(int iK, int iat, int i) { + // prepare the Wallace phase + int m = i + iat * 3; // generate the band index + auto R = atomicPositions.row(iat); // TODO need atomic positions as a view... + + // i for each kx, ky, kz +/-, j = x,y,z + double arg = 0.0; + for(int j = 0; j < 3; ++j) { + arg += R(j) * allVectors(iK * 7 + i * 2 + 1, j); + } + phases(iK,m) = exp( -complexI * arg ); + }); + // apply phases -- here we use the results eigenvectors container + // to hold phase * U(k), to be replaced with standard ones before return Kokkos::parallel_for( "der", Range2D({0, 0}, {numK, numBands}), KOKKOS_LAMBDA(int iK, int m) { - auto E = Kokkos::subview(allEnergies, iK * 7, Kokkos::ALL); + auto X = Kokkos::subview(allEigenvectors, iK * 7, Kokkos::ALL, Kokkos::ALL); - resultEnergies(iK, m) = E(m); + + // copy resultEigenvectors to return at the end for (int n=0; n x(0.,0.); - //Kokkos::complex p(0.,0.); - //Kokkos::complex pm(0.,0.); for (int l=0; l tmp(0.,0.); for (int l = 0; l < numBands; ++l) { - tmp += Kokkos::conj(L(l,m)) * 0.5 * (R(l, n) + Kokkos::conj(R(n, l))); + // average (der^* + der / 2) to enforce Hermiticity + //tmp += Kokkos::conj(L(l,m)) * 0.5 * (R(l, n) + Kokkos::conj(R(n, l))); + tmp += Kokkos::conj(U(l,m)) * dER(l, n); } - A(m, n) = tmp; + tempV(m, n) = tmp; }); Kokkos::fence(); + // then applying v_final = v1 * U(k) Kokkos::parallel_for( "vel", Range3D({0, 0, 0}, {numK, numBands, numBands}), KOKKOS_LAMBDA(int iK, int m, int n) { double norm = 0.; + // TODO why skip gamma here but not above? for (int i=0; i<3; ++i) { norm += cartesianCoordinates(iK,i) * cartesianCoordinates(iK,i); } @@ -517,14 +549,24 @@ PhononH0::kokkosBatchedDiagonalizeWithVelocities( Kokkos::resize(der, 0, 0, 0); Kokkos::resize(tmpV, 0, 0, 0); - //print4DComplex("new = ", resultVelocities); + // copy resultEigenvectors and energies to return + DoubleView2D resultEnergies("energies", numK, numBands); + Kokkos::parallel_for( + "copyEigenvectors", Range2D({0, 0}, {numK, numBands}), KOKKOS_LAMBDA(int iK, int m) { + auto E = Kokkos::subview(allEnergies, iK * 7, Kokkos::ALL); + auto X = Kokkos::subview(allEigenvectors, iK * 7, Kokkos::ALL, Kokkos::ALL); + + resultEnergies(iK, m) = E(m); + + for (int n=0; n