From 091a7f5c97bad0364e3be76ed0832f74fe7c02e0 Mon Sep 17 00:00:00 2001
From: Michele Simoncelli <michele.simoncelli@gmail.com>
Date: Mon, 19 Aug 2024 18:13:00 +0100
Subject: [PATCH 1/4] Fix implementation of Wigner thermal conductivity: -
 employ the smooth (Wallace) phase, which ensures size consistency (see
 https://link.aps.org/doi/10.1103/PhysRevX.12.041011 , in particular Sec VI);
 - consider the Wigner velocity operator also at q=0, a condition that is
 crucial to have a size-consistent formulation and describe the
 disordered-solid limit (see
 https://www.nature.com/articles/s41524-023-01033-4 ) .

The current implementation is limited to the RTA level and outputs only the total Wigner conductivity, without decomposing it into population and coherences contributions. The decomposition between populations and coherences will be implemented in the future, along with the possibility to output the Wigner conductivity also with the full exact solution of the BTE. The code gives results compatible with phono3py.
---
 src/harmonic/phonon_h0.cpp                    | 130 ++++++------------
 src/observable/wigner_phonon_thermal_cond.cpp |  12 +-
 2 files changed, 53 insertions(+), 89 deletions(-)

diff --git a/src/harmonic/phonon_h0.cpp b/src/harmonic/phonon_h0.cpp
index 45e50b27..355ed986 100644
--- a/src/harmonic/phonon_h0.cpp
+++ b/src/harmonic/phonon_h0.cpp
@@ -910,17 +910,30 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) {
   auto energies = std::get<0>(tup);
   auto eigenvectors = std::get<1>(tup);
 
+  Eigen::MatrixXcd eigenvectors_bar(numBands, numBands);
+  Eigen::MatrixXcd eigenvectors_bar_minus(numBands, numBands);
+  Eigen::MatrixXcd eigenvectors_bar_plus(numBands, numBands);
+  eigenvectors_bar.setZero();
+  eigenvectors_bar_minus.setZero();
+  eigenvectors_bar_plus.setZero();
+
+  Eigen::VectorXcd matrix_U(numBands);
+  Eigen::VectorXcd matrix_U_plus(numBands);
+  Eigen::VectorXcd matrix_U_minus(numBands);
+  matrix_U.setZero();
+  matrix_U_plus.setZero();
+  matrix_U_minus.setZero();
+
   //for(int i = 0; i < numBands; i++) printf("old = %.16e\n", energies(i));
   //for(int i = 0; i < numBands; i++)
   //  for(int j = 0; j < numBands; j++)
   //    printf("old = %.16e %.16e\n", eigenvectors(i,j).real(), eigenvectors(i,j).imag());
 
-  // now we compute the velocity operator, diagonalizing the expectation
-  // value of the derivative of the dynamical matrix.
-  // This works better than doing finite differences on the frequencies.
+  // now we compute the velocity operator
   double deltaQ = 1.0e-8;
   for (int i : {0, 1, 2}) {
     // define q+ and q- from finite differences.
+    Eigen::Vector3d qcenter = coordinates;
     Eigen::Vector3d qPlus = coordinates;
     Eigen::Vector3d qMinus = coordinates;
     qPlus(i) += deltaQ;
@@ -940,11 +953,28 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) {
     //for(int i = 0; i < numBands; i++) printf("old = %.16e\n", enPlus(i));
     //for(int i = 0; i < numBands; i++) printf("old = %.16e\n", enMinus(i));
     //for(int i = 0; i < numBands; i++)
-    //  for(int j = 0; j < numBands; j++)
-    //    printf("old = %.16e %.16e\n", eigPlus(i,j).real(), eigPlus(i,j).imag());
-    //for(int i = 0; i < numBands; i++)
-    //  for(int j = 0; j < numBands; j++)
-    //    printf("old = %.16e %.16e\n", eigMinus(i,j).real(), eigMinus(i,j).imag());
+
+
+    for (int iat = 0; iat < numAtoms; iat++){
+      for (int i : {0, 1, 2}) {
+        auto ip= i + iat*3;
+        double arg= qcenter.dot(atomicPositions.row(iat));
+        matrix_U(ip)=exp(-complexI*arg);
+        arg= qPlus.dot(atomicPositions.row(iat));
+        matrix_U_plus(ip)=exp(-complexI*arg);
+        arg= qMinus.dot(atomicPositions.row(iat));
+        matrix_U_minus(ip)=exp(-complexI*arg);
+      }
+    }
+
+    for(int i = 0; i < numBands; i++){
+      for(int j = 0; j < numBands; j++){
+        eigenvectors_bar(i,j)=matrix_U(i)*eigenvectors(i,j);
+        eigenvectors_bar_plus(i,j)=matrix_U_plus(i)*eigPlus(i,j);
+        eigenvectors_bar_minus(i,j)=matrix_U_minus(i)*eigMinus(i,j);
+      }
+    }
+
 
     // build diagonal matrices with frequencies
     Eigen::MatrixXd enPlusMat(numBands, numBands);
@@ -957,9 +987,9 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) {
     // build the dynamical matrix at the two wavevectors
     // since we diagonalized it before, A = M.U.M*
     Eigen::MatrixXcd sqrtDPlus(numBands, numBands);
-    sqrtDPlus = eigPlus * enPlusMat * eigPlus.adjoint();
+    sqrtDPlus = eigenvectors_bar_plus * enPlusMat * eigenvectors_bar_plus.adjoint();
     Eigen::MatrixXcd sqrtDMinus(numBands, numBands);
-    sqrtDMinus = eigMinus * enMinusMat * eigMinus.adjoint();
+    sqrtDMinus = eigenvectors_bar_minus * enMinusMat * eigenvectors_bar_minus.adjoint();
 
     // now we can build the velocity operator
     Eigen::MatrixXcd der(numBands, numBands);
@@ -984,89 +1014,19 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) {
     //  }
     //}
 
-    // and to be safe, we reimpose hermiticity
-    der = 0.5 * (der + der.adjoint());
-
-    // now we rotate in the basis of the eigenvectors at q.
-    der = eigenvectors.adjoint() * der * eigenvectors;
+    // option below can probably be decommented, this will enforce hermiticity numerically, 
+    // results should be equivalent within numerical noise
+    //der = 0.5 * (der + der.adjoint()); 
+    der = eigenvectors_bar.adjoint() * der * eigenvectors_bar;
 
     for (int ib2 = 0; ib2 < numBands; ib2++) {
       for (int ib1 = 0; ib1 < numBands; ib1++) {
         velocity(ib1, ib2, i) = der(ib1, ib2);
-        //auto x = velocity(ib1,ib2,i);
-        //printf("old = %.16e %.16e\n", x.real(), x.imag());
-      }
-    }
-  }
-
-  // turns out that the above algorithm has problems with degenerate bands
-  // so, we diagonalize the velocity operator in the degenerate subspace,
-
-  for (int ib = 0; ib < numBands; ib++) {
-    // first, we check if the band is degenerate, and the size of the
-    // degenerate subspace
-    int sizeSubspace = 1;
-    for (int ib2 = ib + 1; ib2 < numBands; ib2++) {
-      // I consider bands degenerate if their frequencies are the same
-      // within 0.0001 cm^-1
-      if (abs(energies(ib) - energies(ib2)) > 0.0001 / ryToCmm1) {
-        break;
-      }
-      sizeSubspace += 1;
-    }
-
-    if (sizeSubspace > 1) {
-      Eigen::MatrixXcd subMat(sizeSubspace, sizeSubspace);
-      // we have to repeat for every direction
-      for (int iCart : {0, 1, 2}) {
-        // take the velocity matrix of the degenerate subspace
-        for (int j = 0; j < sizeSubspace; j++) {
-          for (int i = 0; i < sizeSubspace; i++) {
-            subMat(i, j) = velocity(ib + i, ib + j, iCart);
-          }
-        }
-
-        // reinforce hermiticity
-        subMat = 0.5 * (subMat + subMat.adjoint());
-
-        // diagonalize the subMatrix
-        Eigen::SelfAdjointEigenSolver<Eigen::MatrixXcd> eigenSolver(subMat);
-        Eigen::MatrixXcd newEigenVectors = eigenSolver.eigenvectors();
-        //newEigenVectors = 3*subMat; // TODO: undo
-
-        // rotate the original matrix in the new basis
-        // that diagonalizes the subspace.
-        subMat = newEigenVectors.adjoint() * subMat * newEigenVectors;
-
-        // reinforce hermiticity
-        subMat = 0.5 * (subMat + subMat.adjoint());
-
-        // substitute back
-        for (int j = 0; j < sizeSubspace; j++) {
-          for (int i = 0; i < sizeSubspace; i++) {
-            velocity(ib + i, ib + j, iCart) = subMat(i, j);
-          }
-        }
       }
     }
-
-    // we skip the bands in the subspace, since we corrected them already
-    ib += sizeSubspace - 1;
-  }
-  // if we are working at gamma, we set all velocities to zero.
-  if (coordinates.norm() < 1.0e-6) {
-    velocity.setZero();
   }
-  //for(int i = 0; i < 3; i++){
-  //  for (int ib2 = 0; ib2 < numBands; ib2++) {
-  //    for (int ib1 = 0; ib1 < numBands; ib1++) {
-  //      auto x = velocity(ib1,ib2,i);
-  //      printf("old = %.16e %.16e\n", x.real(), x.imag());
-  //    }
-  //  }
-  //}
-  Kokkos::Profiling::popRegion();
   return velocity;
+  Kokkos::Profiling::popRegion();
 }
 
 Eigen::Vector3i PhononH0::getCoarseGrid() { return qCoarseGrid; }
diff --git a/src/observable/wigner_phonon_thermal_cond.cpp b/src/observable/wigner_phonon_thermal_cond.cpp
index c2badcdc..2d5d9f13 100644
--- a/src/observable/wigner_phonon_thermal_cond.cpp
+++ b/src/observable/wigner_phonon_thermal_cond.cpp
@@ -83,10 +83,6 @@ WignerPhononThermalConductivity::WignerPhononThermalConductivity(
       // calculate wigner correction
       for (int ib1 = 0; ib1 < numBands; ib1++) {
         for (int ib2 = 0; ib2 < numBands; ib2++) {
-          if (ib1 == ib2) {
-            continue;
-          }
-
           int is1 = bandStructure.getIndex(iqIdx, BandIndex(ib1));
           int is2 = bandStructure.getIndex(iqIdx, BandIndex(ib2));
           auto is1Idx = StateIndex(is1);
@@ -101,6 +97,9 @@ WignerPhononThermalConductivity::WignerPhononThermalConductivity(
                 double num =
                     energies(ib1) * bose(iCalc, ib1) * (bose(iCalc, ib1) + 1.) +
                     energies(ib2) * bose(iCalc, ib2) * (bose(iCalc, ib2) + 1.);
+
+                if ( (bose(iCalc, ib1) <= 0.) || (bose(iCalc, ib2) <=0.0) ) continue; //this avoids numerical problems for the acoustic phonons
+
                 double vel =
                     (velRot(ib1, ib2, ic1) * velRot(ib2, ib1, ic2)).real();
                 double den = 4. * pow(energies(ib1) - energies(ib2), 2) +
@@ -142,6 +141,9 @@ WignerPhononThermalConductivity &WignerPhononThermalConductivity::operator=(
 
 void WignerPhononThermalConductivity::calcFromPopulation(VectorBTE &n) {
   PhononThermalConductivity::calcFromPopulation(n);
+  // TO DO: here we use setZero because we include the diagonal elements when computing the Wigner conductivity
+  // in other words, the Wigner conductivity includes both populations and coherences contributions
+  tensordxd.setZero();
   tensordxd += wignerCorrection;
 }
 
@@ -149,6 +151,7 @@ void WignerPhononThermalConductivity::calcVariational(VectorBTE &af,
                                                       VectorBTE &f,
                                                       VectorBTE &scalingCG) {
   PhononThermalConductivity::calcVariational(af, f, scalingCG);
+  Error("Developer error: Wigner conductivity is currently limited to RTA approximation, implementation with iterative will be done in the future");
   tensordxd += wignerCorrection;
 }
 
@@ -159,6 +162,7 @@ void WignerPhononThermalConductivity::calcFromRelaxons(
   PhononThermalConductivity::calcFromRelaxons(context, statisticsSweep,
                                               eigenvectors,
                                               scatteringMatrix, eigenvalues);
+  Error("Developer error: Wigner conductivity is currently limited to RTA approximation, implementation with iterative will be done in the future");         
   tensordxd += wignerCorrection;
 }
 

From 56df4d63498c75d8feb3e9bc658e5c78b4343ef5 Mon Sep 17 00:00:00 2001
From: jcoulter12 <jcoulter@flatironinstitute.org>
Date: Fri, 23 Aug 2024 13:34:26 -0400
Subject: [PATCH 2/4] fix placement of kokkos profiling region to before return
 statement [ci skip]

---
 src/harmonic/phonon_h0.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/harmonic/phonon_h0.cpp b/src/harmonic/phonon_h0.cpp
index 355ed986..73c65d7a 100644
--- a/src/harmonic/phonon_h0.cpp
+++ b/src/harmonic/phonon_h0.cpp
@@ -1025,8 +1025,8 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) {
       }
     }
   }
-  return velocity;
   Kokkos::Profiling::popRegion();
+  return velocity;
 }
 
 Eigen::Vector3i PhononH0::getCoarseGrid() { return qCoarseGrid; }

From 56ff4656b85831df12eaeac58c8c9f2a8c7c2d8f Mon Sep 17 00:00:00 2001
From: Jenny Coulter <jenny.b.coulter@gmail.com>
Date: Thu, 5 Sep 2024 11:49:01 -0400
Subject: [PATCH 3/4] [ci skip] push a draft of the kokkos version of wallace
 phase implementation, remove many old unused print statements

---
 src/harmonic/phonon_h0.cpp        |  61 ++++-----------
 src/harmonic/phonon_h0_kokkos.cpp | 126 ++++++++++++++++++++----------
 2 files changed, 99 insertions(+), 88 deletions(-)

diff --git a/src/harmonic/phonon_h0.cpp b/src/harmonic/phonon_h0.cpp
index 73c65d7a..da19af77 100644
--- a/src/harmonic/phonon_h0.cpp
+++ b/src/harmonic/phonon_h0.cpp
@@ -892,6 +892,8 @@ PhononH0::diagonalizeVelocity(Point &point) {
   return diagonalizeVelocityFromCoordinates(coordinates);
 }
 
+// TODO I think it would be easier to 
+// apply these phases to the dynmat during the transform
 Eigen::Tensor<std::complex<double>, 3>
 PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) {
 
@@ -901,9 +903,6 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) {
   velocity.setZero();
 
   bool withMassScaling = false;
-    //for(int i = 0; i < 3; i++){
-    //  printf("old = %.16e\n", coordinates(i));
-    //}
 
   // get the eigenvectors and the energies of the q-point
   auto tup = diagonalizeFromCoordinates(coordinates, withMassScaling);
@@ -924,14 +923,11 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) {
   matrix_U_plus.setZero();
   matrix_U_minus.setZero();
 
-  //for(int i = 0; i < numBands; i++) printf("old = %.16e\n", energies(i));
-  //for(int i = 0; i < numBands; i++)
-  //  for(int j = 0; j < numBands; j++)
-  //    printf("old = %.16e %.16e\n", eigenvectors(i,j).real(), eigenvectors(i,j).imag());
-
   // now we compute the velocity operator
   double deltaQ = 1.0e-8;
+  // kx, ky, kz directions 
   for (int i : {0, 1, 2}) {
+
     // define q+ and q- from finite differences.
     Eigen::Vector3d qcenter = coordinates;
     Eigen::Vector3d qPlus = coordinates;
@@ -939,10 +935,6 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) {
     qPlus(i) += deltaQ;
     qMinus(i) -= deltaQ;
 
-    //for(int i = 0; i < 3; i++){
-    //  printf("old = %.16e %.16e\n", qPlus(i), qMinus(i));
-    //}
-
     // diagonalize the dynamical matrix at q+ and q-
     auto tup2 = diagonalizeFromCoordinates(qPlus, withMassScaling);
     auto enPlus = std::get<0>(tup2);
@@ -950,32 +942,28 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) {
     auto tup1 = diagonalizeFromCoordinates(qMinus, withMassScaling);
     auto enMinus = std::get<0>(tup1);
     auto eigMinus = std::get<1>(tup1);
-    //for(int i = 0; i < numBands; i++) printf("old = %.16e\n", enPlus(i));
-    //for(int i = 0; i < numBands; i++) printf("old = %.16e\n", enMinus(i));
-    //for(int i = 0; i < numBands; i++)
-
 
+    // apply the Wallace phase 
     for (int iat = 0; iat < numAtoms; iat++){
       for (int i : {0, 1, 2}) {
-        auto ip= i + iat*3;
-        double arg= qcenter.dot(atomicPositions.row(iat));
-        matrix_U(ip)=exp(-complexI*arg);
-        arg= qPlus.dot(atomicPositions.row(iat));
-        matrix_U_plus(ip)=exp(-complexI*arg);
-        arg= qMinus.dot(atomicPositions.row(iat));
-        matrix_U_minus(ip)=exp(-complexI*arg);
+        auto ip = i + iat * 3;
+        double arg = qcenter.dot(atomicPositions.row(iat));
+        matrix_U(ip) = exp(-complexI*arg);
+        arg = qPlus.dot(atomicPositions.row(iat));
+        matrix_U_plus(ip) = exp(-complexI*arg);
+        arg = qMinus.dot(atomicPositions.row(iat));
+        matrix_U_minus(ip) = exp(-complexI*arg);
       }
     }
 
     for(int i = 0; i < numBands; i++){
       for(int j = 0; j < numBands; j++){
-        eigenvectors_bar(i,j)=matrix_U(i)*eigenvectors(i,j);
-        eigenvectors_bar_plus(i,j)=matrix_U_plus(i)*eigPlus(i,j);
-        eigenvectors_bar_minus(i,j)=matrix_U_minus(i)*eigMinus(i,j);
+        eigenvectors_bar(i,j) = matrix_U(i) * eigenvectors(i,j);
+        eigenvectors_bar_plus(i,j) = matrix_U_plus(i) * eigPlus(i,j);
+        eigenvectors_bar_minus(i,j) = matrix_U_minus(i) * eigMinus(i,j);
       }
     }
 
-
     // build diagonal matrices with frequencies
     Eigen::MatrixXd enPlusMat(numBands, numBands);
     Eigen::MatrixXd enMinusMat(numBands, numBands);
@@ -995,25 +983,6 @@ PhononH0::diagonalizeVelocityFromCoordinates(Eigen::Vector3d &coordinates) {
     Eigen::MatrixXcd der(numBands, numBands);
     der = (sqrtDPlus - sqrtDMinus) / (2. * deltaQ);
 
-    //for(int i = 0; i < numBands; i++){
-    //  for(int j = 0; j < numBands; j++){
-    //    auto x = der(i,j);
-    //    printf("old = %.16e %.16e\n", x.real(), x.imag());
-    //  }
-    //}
-    //for(int i = 0; i < numBands; i++){
-    //  for(int j = 0; j < numBands; j++){
-    //    auto x = sqrtDPlus(i,j);
-    //    printf("old = %.16e %.16e\n", x.real(), x.imag());
-    //  }
-    //}
-    //for(int i = 0; i < numBands; i++){
-    //  for(int j = 0; j < numBands; j++){
-    //    auto x = sqrtDMinus(i,j);
-    //    printf("old = %.16e %.16e\n", x.real(), x.imag());
-    //  }
-    //}
-
     // option below can probably be decommented, this will enforce hermiticity numerically, 
     // results should be equivalent within numerical noise
     //der = 0.5 * (der + der.adjoint()); 
diff --git a/src/harmonic/phonon_h0_kokkos.cpp b/src/harmonic/phonon_h0_kokkos.cpp
index c697af05..5cf5592c 100644
--- a/src/harmonic/phonon_h0_kokkos.cpp
+++ b/src/harmonic/phonon_h0_kokkos.cpp
@@ -45,7 +45,6 @@ StridedComplexView3D PhononH0::kokkosBatchedBuildBlochHamiltonian(
         dynamicalMatrices(iK, m, n) = tmp;
       });
   Kokkos::realloc(phases_d, 0, 0);
-  //print3DComplex("new = ", dynamicalMatrices);
 
   if (hasDielectric) {
     auto longRangeCorrection1_d = this->longRangeCorrection1_d;
@@ -137,7 +136,6 @@ StridedComplexView3D PhononH0::kokkosBatchedBuildBlochHamiltonian(
             }
           });
         });
-  //print3DComplex("new = ", dynamicalMatrices);
   }
 
 
@@ -167,7 +165,6 @@ StridedComplexView3D PhononH0::kokkosBatchedBuildBlochHamiltonian(
         DD(iK, m, n) /= sqrt(atomicMasses_d(m) * atomicMasses_d(n));
       });
 
-  //print3DComplex("new = ", DD);
   return DD;
 }
 
@@ -180,7 +177,6 @@ std::tuple<DoubleView2D, StridedComplexView3D> PhononH0::kokkosBatchedDiagonaliz
   // create the Hamiltonians
   StridedComplexView3D dynamicalMatrices =
       kokkosBatchedBuildBlochHamiltonian(cartesianCoordinates);
-  //print3DComplex("new = ", dynamicalMatrices);
 
   int numK = dynamicalMatrices.extent(0);
   int numBands = this->numBands;
@@ -199,8 +195,6 @@ std::tuple<DoubleView2D, StridedComplexView3D> PhononH0::kokkosBatchedDiagonaliz
           frequencies(iK, m) = -sqrt(-frequencies(iK, m));
         }
       });
-  //print2D("new = ", frequencies);
-  //print3DComplex("new = ", dynamicalMatrices);
 
   if(withMassScaling) kokkosBatchedScaleEigenvectors(dynamicalMatrices);
   return std::make_tuple(frequencies, dynamicalMatrices);
@@ -386,15 +380,19 @@ PhononH0::kokkosBatchedDiagonalizeWithVelocities(
   int numK = cartesianCoordinates.extent(0);
 
   double delta = 1.0e-8;
+  Kokkos::complex<double> complexI(0.0, 1.0);
 
   // prepare all the wavevectors at which we need the hamiltonian,
   // (kx, ky, kz), (kx+dk, ky, kz), (kx - dk, ky, kz), (kx, ky + dk, kz), etc.
+  // 7 here is for the seven possible k+/- values
   DoubleView2D allVectors("wavevectors_k", numK * 7, 3);
   Kokkos::parallel_for(
-      "elHamiltonianShifted_d", numK, KOKKOS_LAMBDA(int iK) {
+      "phHamiltonianShifted_d", numK, KOKKOS_LAMBDA(int iK) {
+        // central points, i = x,y,z
         for (int i = 0; i < 3; ++i) {
           allVectors(iK * 7, i) = cartesianCoordinates(iK, i);
         }
+        // iDir for each kx, ky, kz +/-, i = x,y,z
         for (int iDir = 0; iDir < 3; ++iDir) {
           for (int i = 0; i < 3; ++i) {
             allVectors(iK * 7 + iDir * 2 + 1, i) = cartesianCoordinates(iK, i);
@@ -407,99 +405,133 @@ PhononH0::kokkosBatchedDiagonalizeWithVelocities(
         }
       });
 
-  //print2D("new = ", allVectors);
-
   // compute the electronic properties at all wavevectors
   auto t = kokkosBatchedDiagonalizeFromCoordinates(allVectors, false);
   DoubleView2D allEnergies = std::get<0>(t);
   StridedComplexView3D allEigenvectors = std::get<1>(t);
 
-    //print2D("new = ", allEnergies);
-
   int numBands = allEnergies.extent(1);
 
-  //print2D("new = ", allEnergies);
-  //print3DComplex("new = ", allEigenvectors);
-
   Kokkos::LayoutStride eigenvectorLayout(
       numK, numBands*numBands,
       numBands, 1,
       numBands, numBands);
 
-  // save energies and eigenvectors to results
-  DoubleView2D resultEnergies("energies", numK, numBands);
   StridedComplexView3D resultEigenvectors("eigenvectors", eigenvectorLayout);
   ComplexView4D resultVelocities("velocities", numK, numBands, numBands, 3);
+  ComplexView2D phases("wallacePhases", numK, numBands);
 
+  // copy slice of eigenvectors, energies into views 
+  Kokkos::parallel_for(
+      "phase", Range3D({0, 0, 0}, {numK, numAtoms, 3}), KOKKOS_LAMBDA(int iK, int iat, int i) {
+        // prepare the Wallace phase 
+          int m = i + iat * 3;  // generate the band index 
+          auto R = atomicPositions.row(iat); // TODO need atomic positions as a view... 
+
+          // i for each kx, ky, kz +/-, j = x,y,z
+          double arg = 0.0;
+          for(int j = 0; j < 3; ++j) {
+            arg += R(j) * allVectors(iK * 7 + i * 2 + 1, j); 
+          }
+          phases(iK,m) = exp( -complexI * arg );
+        });
+  // apply phases -- here we use the results eigenvectors container 
+  // to hold phase * U(k), to be replaced with standard ones before return 
   Kokkos::parallel_for(
       "der", Range2D({0, 0}, {numK, numBands}), KOKKOS_LAMBDA(int iK, int m) {
-        auto E = Kokkos::subview(allEnergies, iK * 7, Kokkos::ALL);
+
         auto X = Kokkos::subview(allEigenvectors, iK * 7, Kokkos::ALL, Kokkos::ALL);
-        resultEnergies(iK, m) = E(m);
+
+        // copy resultEigenvectors to return at the end
         for (int n=0; n<numBands; ++n) {
-          resultEigenvectors(iK, m, n) = X(m, n);
+          resultEigenvectors(iK, m, n) = phases(iK, m) * X(m, n);
         }
       });
 
-  //print3DComplex("new = ", resultEigenvectors);
+  Kokkos::resize(phases, 0, 0); // deallocate
 
   // Views for intermediate results
   ComplexView3D der("der", numK, numBands, numBands);
   ComplexView3D tmpV("tmpV", numK, numBands, numBands);
-  //ComplexView3D plus("plus", numK, numBands, numBands);
-  //ComplexView3D minus("plus", numK, numBands, numBands);
 
+  // kx, ky, kz directions 
   for (int i = 0; i < 3; ++i) {
 
+    // TODO will these go out of scope? 
+    ComplexView2D phasesPlus("wallacePhasesPlus", numK, numBands);
+    ComplexView2D phasesMinus("wallacePhasesMinus", numK, numBands);
+
+    // precalc +/- phases 
+    Kokkos::parallel_for(
+        "phasePlusMinus", Range3D({0, 0, 0}, {numK, numAtoms, 3}), KOKKOS_LAMBDA(int iK, int iat, int i) {
+          // prepare the Wallace phase 
+
+          int m = i + iat * 3;  // generate the band index 
+          auto R = atomicPositions.row(iat); // TODO need atomic positions as a view... 
+
+          // i for each kx, ky, kz +/-, j = x,y,z
+          double argPlus = 0.0;
+          double argMinus = 0.0;
+          for(int j = 0; j < 3; ++j) {
+            argPlus += R(j) * allVectors(iK * 7 + i * 2 + 1, j); 
+            argMinus += R(j) * allVectors(iK * 7 + i * 2 + 2, j); 
+          }
+          phasesPlus(iK,m) = exp( -complexI * argPlus );
+          phasesMinus(iK,m) = exp( -complexI * argMinus );
+        });
+
     // To build the velocity operator, we first compute the matrix
     // der = dH/dk = ( H(k+dk)-H(k-dk) ) / 2dk
     // we also reinforce the Hermiticity (better numerical properties)
     Kokkos::parallel_for(
         "der", Range3D({0, 0, 0}, {numK, numBands, numBands}),
         KOKKOS_LAMBDA(int iK, int m, int n) {
+
           auto XPlus = Kokkos::subview(allEigenvectors, iK * 7 + i * 2 + 1, Kokkos::ALL, Kokkos::ALL);
           auto XMins = Kokkos::subview(allEigenvectors, iK * 7 + i * 2 + 2, Kokkos::ALL, Kokkos::ALL);
+
           DoubleView1D EPlus = Kokkos::subview(allEnergies, iK * 7 + i * 2 + 1, Kokkos::ALL);
           DoubleView1D EMins = Kokkos::subview(allEnergies, iK * 7 + i * 2 + 2, Kokkos::ALL);
+
           Kokkos::complex<double> x(0.,0.);
-          //Kokkos::complex<double> p(0.,0.);
-          //Kokkos::complex<double> pm(0.,0.);
           for (int l=0; l<numBands; ++l) {
-            x += XPlus(m,l) * EPlus(l) * Kokkos::conj(XPlus(n,l))
-                - XMins(m,l) * EMins(l) * Kokkos::conj(XMins(n,l));
-            //p += XPlus(m,l) * EPlus(l) * Kokkos::conj(XPlus(n,l));
-            //pm += XMins(m,l) * EMins(l) * Kokkos::conj(XMins(n,l));
+            // TODO check that these dimensions are right for band indices 
+            x += phasesPlus(iK,m) * XPlus(m,l) * EPlus(l) * Kokkos::conj(phasesPlus(iK,n) * XPlus(n,l))
+                -  phasesMinus(iK,m) * XMins(m,l) * EMins(l) * Kokkos::conj(phasesMinus(iK,n) * XMins(n,l));
           }
           der(iK, m, n) = x/(2*delta);
-          //plus(iK,m,n) = p;
-          //minus(iK,m,n) = pm;
         });
     Kokkos::fence();
-    //print2D("new = ", resultEnergies);
-    //print3DComplex("new = ", der);
-    //print3DComplex("new = ", plus);
-    //print3DComplex("new = ", minus);
+
+    Kokkos::resize(phasesPlus, 0, 0); // deallocate
+    Kokkos::resize(phasesMinus, 0, 0); // deallocate
 
     // Now we complete the Hellman-Feynman theorem
     // and compute the velocity as v = U(k)^* der * U(k)
+
+    // first applying v1 = U(k)^* der
     Kokkos::parallel_for(
         "tmpV", Range3D({0, 0, 0}, {numK, numBands, numBands}),
         KOKKOS_LAMBDA(int iK, int m, int n) {
-          auto L = Kokkos::subview(resultEigenvectors, iK, Kokkos::ALL, Kokkos::ALL);
-          auto R = Kokkos::subview(der, iK, Kokkos::ALL, Kokkos::ALL);
-          auto A = Kokkos::subview(tmpV, iK, Kokkos::ALL, Kokkos::ALL);
+          auto U = Kokkos::subview(resultEigenvectors, iK, Kokkos::ALL, Kokkos::ALL);
+          auto dER = Kokkos::subview(der, iK, Kokkos::ALL, Kokkos::ALL);
+          auto tempV = Kokkos::subview(tmpV, iK, Kokkos::ALL, Kokkos::ALL);
           Kokkos::complex<double> tmp(0.,0.);
           for (int l = 0; l < numBands; ++l) {
-            tmp += Kokkos::conj(L(l,m)) * 0.5 * (R(l, n) + Kokkos::conj(R(n, l)));
+            // average (der^* + der / 2) to enforce Hermiticity 
+            //tmp += Kokkos::conj(L(l,m)) * 0.5 * (R(l, n) + Kokkos::conj(R(n, l)));
+            tmp += Kokkos::conj(U(l,m)) * dER(l, n);
           }
-          A(m, n) = tmp;
+          tempV(m, n) = tmp;
         });
     Kokkos::fence();
 
+    // then applying v_final = v1 * U(k)
     Kokkos::parallel_for(
         "vel", Range3D({0, 0, 0}, {numK, numBands, numBands}),
         KOKKOS_LAMBDA(int iK, int m, int n) {
           double norm = 0.;
+          // TODO why skip gamma here but not above?
           for (int i=0; i<3; ++i) {
             norm += cartesianCoordinates(iK,i) * cartesianCoordinates(iK,i);
           }
@@ -517,14 +549,24 @@ PhononH0::kokkosBatchedDiagonalizeWithVelocities(
   Kokkos::resize(der, 0, 0, 0);
   Kokkos::resize(tmpV, 0, 0, 0);
 
-  //print4DComplex("new = ", resultVelocities);
+   // copy resultEigenvectors and energies to return 
+  DoubleView2D resultEnergies("energies", numK, numBands);
+  Kokkos::parallel_for(
+      "copyEigenvectors", Range2D({0, 0}, {numK, numBands}), KOKKOS_LAMBDA(int iK, int m) {
+        auto E = Kokkos::subview(allEnergies, iK * 7, Kokkos::ALL);
+        auto X = Kokkos::subview(allEigenvectors, iK * 7, Kokkos::ALL, Kokkos::ALL);
+
+       resultEnergies(iK, m) = E(m);
+
+        for (int n=0; n<numBands; ++n) {
+          resultEigenvectors(iK, m, n) = X(m, n);
+        }
+  });
 
   // deal with velocity issues and degenerate bands
   kokkosBatchedTreatDegenerateVelocities(cartesianCoordinates, resultEnergies,
                                          resultVelocities, 0.0001 / ryToCmm1);
 
-  //print4DComplex("new = ", resultVelocities);
-
 // TODO: this is the only difference from the ElectronH0 kokkosPopulate()
 // can we unify the two?
   kokkosBatchedScaleEigenvectors(resultEigenvectors);

From b4885ec4476fca17fe47d63cc63119db493e562b Mon Sep 17 00:00:00 2001
From: jcoulter12 <jcoulter@flatironinstitute.org>
Date: Sat, 19 Oct 2024 09:54:53 -0400
Subject: [PATCH 4/4] add in missing prime in docs for el wigner correction

---
 doc/sphinx/source/theory/electronBTE.rst | 30 ++++++++++++------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/doc/sphinx/source/theory/electronBTE.rst b/doc/sphinx/source/theory/electronBTE.rst
index 37f92210..778459c8 100644
--- a/doc/sphinx/source/theory/electronBTE.rst
+++ b/doc/sphinx/source/theory/electronBTE.rst
@@ -47,13 +47,13 @@ with
    \times
    \bigg[
    (1-\bar{f}_{\boldsymbol{k}'b'} + \bar{n}_{\boldsymbol{q}\nu})
-   \delta(\epsilon_{\boldsymbol{k}b} - \epsilon_{\boldsymbol{k}'b'} - \hbar \omega_{\boldsymbol{q}\nu}) \\ 
+   \delta(\epsilon_{\boldsymbol{k}b} - \epsilon_{\boldsymbol{k}'b'} - \hbar \omega_{\boldsymbol{q}\nu}) \\
    &+
    (\bar{f}_{\boldsymbol{k}'b'} + \bar{n}_{\boldsymbol{q}\nu})
    \delta(\epsilon_{\boldsymbol{k}b} - \epsilon_{\boldsymbol{k}'b'} + \hbar \omega_{\boldsymbol{q}\nu})
    \bigg]
-   \delta(\boldsymbol{k}-\boldsymbol{k}'+\boldsymbol{q}). 
-   
+   \delta(\boldsymbol{k}-\boldsymbol{k}'+\boldsymbol{q}).
+
 This scattering matrix requires us to know the phonon and electron energies, as well as the electron-phonon coupling on a fine (interpolated) mesh.
 
 Please note that, for convenience, here we use a coupling defined as
@@ -84,7 +84,7 @@ which results in the matrix with diagonal matrix elements:
    \Omega_{\lambda \lambda} = \frac{1}{\tau_{\boldsymbol{k}b}}
 
 and for the off-diagonal terms:
-   
+
 .. math::
    \tilde{\Omega}_{\boldsymbol{k}b,\boldsymbol{k}'b'} =&
    -
@@ -102,11 +102,11 @@ This matrix is symmetric and has a number of interesting physical properties (e.
 Computationally, the symmetric matrix can be used in a conjugate gradient method that maximises the electrical and thermal conductivity, and guarantees the existence of eigenvalues.
 
 In Phoebe, instead of solving the original BTE problem in the form :math:`\sum_{\lambda'} \Omega_{\lambda,\lambda'} \delta f_{\lambda'} = b_{\lambda}`, we solve the symmetrized problem:
-   
+
 .. math::
    \sum_{\lambda'} \tilde{\Omega}_{\lambda,\lambda'} \delta \tilde{f}_{\lambda'} = \tilde{b}_{\lambda}
 
-with 
+with
 
 .. math::
    \delta f_{\lambda} = ( \bar{f}_{\lambda} (1-\bar{f}_{\lambda}) )^{-\frac{1}{2}} \delta f_{\lambda}
@@ -117,12 +117,12 @@ and
    \tilde{b}_{\lambda} = ( \bar{f}_{\lambda} (1-\bar{f}_{\lambda}) )^{-\frac{1}{2}} b_{\lambda}
 
 
-   
+
 
 Onsager coefficients
 --------------------
 
-In the electronic case, there are a handful of transport coefficients we'd like to solve the BTE to find, such as the electrical conductivity, :math:`\sigma`, the electronic part of the thermal conductivity, :math:`\kappa_e`, and the Seebeck coeffieint, :math:`S`. These quantities are defined in terms of the Onsager coefficients. 
+In the electronic case, there are a handful of transport coefficients we'd like to solve the BTE to find, such as the electrical conductivity, :math:`\sigma`, the electronic part of the thermal conductivity, :math:`\kappa_e`, and the Seebeck coeffieint, :math:`S`. These quantities are defined in terms of the Onsager coefficients.
 
 We assume that the response to the applied electric field and thermal gradient is linear in these external fields:
 
@@ -142,7 +142,7 @@ and
 
 where :math:`g_s` is the spin degeneracy.
 
-We can decompose these to write, 
+We can decompose these to write,
 
 .. math::
    \boldsymbol{J} = L_{EE} \boldsymbol{E} + L_{ET} \boldsymbol{\nabla} T
@@ -171,7 +171,7 @@ where :math:`d` is the carriers' doping concentration.
 Solutions of the electron BTE
 --------------------------------------
 
-Largely, these solvers follow the equivalent section in the phonon BTE section, where they may be described in more detail. For further details and references on any specific solver, we suggest you visit the equivalent phonon sections, as well. Here, we again establish methods of finding the solution vector to the BTE, :math:`f`, but in this case, we have two: :math:`f^T` and :math:`f^E`, for each field. 
+Largely, these solvers follow the equivalent section in the phonon BTE section, where they may be described in more detail. For further details and references on any specific solver, we suggest you visit the equivalent phonon sections, as well. Here, we again establish methods of finding the solution vector to the BTE, :math:`f`, but in this case, we have two: :math:`f^T` and :math:`f^E`, for each field.
 
 
 RTA Solution
@@ -205,7 +205,7 @@ Iterative solution: Omini-Sparavigna method
 .. note::
    Generally, we recommend the variational method over this.
 
-This is an adaptation of the Omini-Sparavigna method to electrons. To better understand this method, please have a look first at the counterpart phonon section. 
+This is an adaptation of the Omini-Sparavigna method to electrons. To better understand this method, please have a look first at the counterpart phonon section.
 
 In short, the electron BTE consists in two linear algebra problems:
 
@@ -347,23 +347,23 @@ The related transport coefficients are defined as:
 
 .. math::
    L_{EE}^{ij} =
-   \frac{e g_s}{V N_k} \sum_{\boldsymbol{k}b} \frac{1}{2} \Big\{ v^i(\boldsymbol{k}) , f^{E_j}(\boldsymbol{k}) \Big\}_{bb}
+   \frac{e g_s}{V N_k} \sum_{\boldsymbol{k}b} \frac{1}{2} \Big\{ v^i(\boldsymbol{k}) , f^{E_j}(\boldsymbol{k}) \Big\}_{bb'}
 
 .. math::
    L_{ET}^{ij} =
-   \frac{e g_s}{V N_k} \sum_{\boldsymbol{k}b} \frac{1}{2} \Big\{ v^i(\boldsymbol{k}) , f^{T_j}(\boldsymbol{k}) \Big\}_{bb}
+   \frac{e g_s}{V N_k} \sum_{\boldsymbol{k}b} \frac{1}{2} \Big\{ v^i(\boldsymbol{k}) , f^{T_j}(\boldsymbol{k}) \Big\}_{bb'}
 
 .. math::
    L_{TE}^{ij} =
    \frac{g_s}{V N_k}
    \sum_{\boldsymbol{k}b}
    \big( \epsilon_{b}(\boldsymbol{k})-\mu \big)
-   \frac{1}{2} \Big\{ v^i(\boldsymbol{k}) , f^{E_j}(\boldsymbol{k}) \Big\}_{bb}
+   \frac{1}{2} \Big\{ v^i(\boldsymbol{k}) , f^{E_j}(\boldsymbol{k}) \Big\}_{bb'}
 
 .. math::
    L_{TT}^{ij} =
    \frac{g_s}{V N_k}
    \sum_{\boldsymbol{k}b}
    \big( \epsilon_{b}(\boldsymbol{k})-\mu \big)
-   \frac{1}{2} \Big\{ v^i(\boldsymbol{k}) , f^{T_j}(\boldsymbol{k}) \Big\} _{bb}
+   \frac{1}{2} \Big\{ v^i(\boldsymbol{k}) , f^{T_j}(\boldsymbol{k}) \Big\} _{bb'}