diff --git a/PW/src/stres_gradcorr.f90 b/PW/src/stres_gradcorr.f90 index 3f96951c5a..219072a8e1 100644 --- a/PW/src/stres_gradcorr.f90 +++ b/PW/src/stres_gradcorr.f90 @@ -33,7 +33,7 @@ SUBROUTINE stres_gradcorr( rho, rho_core, rhog_core, nspin, domag, & ! ! ... local variables ! - INTEGER :: k, l, m, ipol, ir, ig, is, nspin0, np + INTEGER :: k, l, m, ipol, ir, ig, is, nspin0, np, ispin INTEGER :: nr1, nr2, nr3, nrxx, ngm REAL(DP), ALLOCATABLE :: grho(:,:,:), grho2(:,:), rhoaux(:,:), & segni(:), taue2(:,:) @@ -45,14 +45,13 @@ SUBROUTINE stres_gradcorr( rho, rho_core, rhog_core, nspin, domag, & ! REAL(DP), PARAMETER :: epsr = 1.0d-6, epsg = 1.0d-10, e2 = 2.d0 REAL(DP) :: v2xc, v2xc_uu, v2xc_dd + REAL(DP) :: g1, g2, g3, h1, h2, h3 REAL(DP) :: sigma_gc11, sigma_gc31, sigma_gc21, & sigma_gc32, sigma_gc22, sigma_gc33 REAL(DP) :: sigma_gradcorr(3,3) ! IF ( .NOT. xclib_dft_is('gradient') .AND. .NOT. xclib_dft_is('meta') ) RETURN ! - IF ( xclib_dft_is('meta') .AND. nspin>1 ) CALL errore( 'stres_gradcorr', & - 'Meta-GGA stress does not work with spin polarization', 1 ) ! !$acc data present_or_copyin( rho, rho%of_r, rho%of_g, rho_core, rhog_core, g ) ! @@ -181,7 +180,7 @@ SUBROUTINE stres_gradcorr( rho, rho_core, rhog_core, nspin, domag, & sigma_gc33 = sigma_gc33 + grho(3,k,1)*grho(3,k,1) * v2xc ENDDO ! - ELSEIF (nspin0 == 2) THEN + ELSE IF (nspin0 == 2) THEN ! ! Spin-polarized case ! @@ -189,31 +188,51 @@ SUBROUTINE stres_gradcorr( rho, rho_core, rhog_core, nspin, domag, & DO k = 1, nrxx grho2(k,1) = grho(1,k,1)**2 + grho(2,k,1)**2 + grho(3,k,1)**2 grho2(k,2) = grho(1,k,2)**2 + grho(2,k,2)**2 + grho(3,k,2)**2 - ENDDO + END DO ! IF ( xclib_dft_is('meta') ) THEN ! !$acc data present_or_copyin(rho%kin_r) create( taue2, v2cm, v3x, v3c ) !$acc parallel loop DO k = 1, nrxx - taue2(k,1:nspin0) = rho%kin_r(k,1:nspin0) / e2 + DO ispin = 1, nspin0 + taue2(k,ispin) = rho%kin_r(k,ispin) / e2 + 0.5_DP * tau_core(k) + END DO ENDDO CALL xc_metagcx( nrxx, nspin0, np, rhoaux, grho, taue2, sx, sc, & v1x, v2x, v3x, v1c, v2cm, v3c, gpu_args_=.TRUE. ) - !$acc parallel loop - DO k = 1, nrxx - v2c(k,:) = v2cm(1,k,:) - ENDDO + ! + ! ... sigma_gc_{alpha,beta} = sum_sigma (v2x*grad_rho(alpha,sigma) + ! ... + v2cm(alpha,sigma)) * grad_rho(beta,sigma), with v2cm(alpha,sigma) + ! ... = d Ec/d(grad_rho(sigma))_alpha already including the chain rule + ! ... over all sigma invariants (uu, ud, dd) from xc_metagcx. + ! + DO ispin = 1, nspin0 + !$acc parallel loop reduction(+:sigma_gc11,sigma_gc21,sigma_gc22, & + !$acc& sigma_gc31,sigma_gc32,sigma_gc33) & + !$acc& private(g1,g2,g3,h1,h2,h3) + DO k = 1, nrxx + g1 = grho(1,k,ispin) + g2 = grho(2,k,ispin) + g3 = grho(3,k,ispin) + h1 = (v2x(k,ispin)*g1 + v2cm(1,k,ispin)) * e2 + h2 = (v2x(k,ispin)*g2 + v2cm(2,k,ispin)) * e2 + h3 = (v2x(k,ispin)*g3 + v2cm(3,k,ispin)) * e2 + sigma_gc11 = sigma_gc11 + h1 * g1 + sigma_gc21 = sigma_gc21 + h2 * g1 + sigma_gc22 = sigma_gc22 + h2 * g2 + sigma_gc31 = sigma_gc31 + h3 * g1 + sigma_gc32 = sigma_gc32 + h3 * g2 + sigma_gc33 = sigma_gc33 + h3 * g3 + END DO + END DO !$acc end data - ! FIXME : what are we supposed to do now? ! ELSE ! ALLOCATE( v2c_ud(nrxx) ) !$acc data create( v2c_ud ) - ! CALL xc_gcx( nrxx, nspin0, rhoaux, grho, sx, sc, v1x, v2x, v1c, v2c, v2c_ud, gpu_args_=.TRUE. ) - ! !$acc parallel loop reduction(+:sigma_gc11,sigma_gc21,sigma_gc22, & !$acc& sigma_gc31,sigma_gc32,sigma_gc33) DO k = 1, nrxx @@ -245,14 +264,13 @@ SUBROUTINE stres_gradcorr( rho, rho_core, rhog_core, nspin, domag, & grho(3,k,2)*grho(3,k,2) * v2xc_dd + & (grho(3,k,1)*grho(3,k,2) + & grho(3,k,2)*grho(3,k,1)) * v2c_ud(k) * e2 - ENDDO - ! + END DO !$acc end data DEALLOCATE( v2c_ud ) ! - ENDIF + END IF ! - ENDIF + END IF ! sigma_gradcorr(1,1) = sigma_gc11 sigma_gradcorr(2,1) = sigma_gc21 diff --git a/PW/src/stres_mgga.f90 b/PW/src/stres_mgga.f90 index e1e9dc512c..69464115db 100644 --- a/PW/src/stres_mgga.f90 +++ b/PW/src/stres_mgga.f90 @@ -35,7 +35,10 @@ SUBROUTINE stres_mgga( sigmaxc ) ! ! ... local variables ! - INTEGER :: ix, iy, K, ir, ipol, iss, incr, ibnd, ik, npw, dffts_nnr + INTEGER :: ix, iy, ir, ipol, iss, incr, ibnd, ik, npw, dffts_nnr + ! + INTEGER, PARAMETER :: ipol_ix(6) = [1, 2, 3, 2, 3, 3] + INTEGER, PARAMETER :: ipol_iy(6) = [1, 1, 1, 2, 2, 3] ! REAL(DP), PARAMETER :: epsr = 1.E-6_DP, epsg = 1.E-10_DP, e2 = 2._DP ! @@ -102,20 +105,15 @@ SUBROUTINE stres_mgga( sigmaxc ) ! CALL wfc_gradient( ibnd, ik, npw, gradwfc ) ! - ! ... Cross terms of kinetic energy density + ! ... Cross terms of kinetic energy density: only the 6 upper-diagonal + ! ... components ipol=1..6 are stored; the mapping ipol -> (ix,iy) is + ! ... held in the PARAMETER arrays ipol_ix, ipol_iy defined above. ! !$acc parallel loop collapse(2) DO ir = 1, dffts_nnr DO ipol = 1, 6 - ! - ! ... explanation here: https://stackoverflow.com/a/244550 - ! - ! M*(M+1)/ 2 - K = (3.0*4.0)/2.0 - 1 - (ipol - 1) - K = FLOOR((SQRT(FLOAT(8*K+1))-1)/2) - ix = (ipol-1) - (3.0*4.0)/2.0 + (K+1)*(K+2)/2.0 + 1 + (2-K) - iy = 3 - K - ! + ix = ipol_ix(ipol) + iy = ipol_iy(ipol) crosstaus(ir,ipol,current_spin) = crosstaus(ir,ipol,current_spin) + & 2.0_DP*w1*DBLE(gradwfc(ir,ix))*DBLE(gradwfc(ir,iy)) + & 2.0_DP*w2*AIMAG(gradwfc(ir,ix))*AIMAG(gradwfc(ir,iy)) diff --git a/PW/src/v_of_rho.f90 b/PW/src/v_of_rho.f90 index d631577c89..60ffc5aa13 100644 --- a/PW/src/v_of_rho.f90 +++ b/PW/src/v_of_rho.f90 @@ -252,7 +252,7 @@ SUBROUTINE v_xc_meta( rho, rho_core, rhog_core, tau_core, etxc, vtxc, v, kedtaur kedtaur(k,1) = (v3x(k,1)+v3c(k,1)) * 0.5d0 * e2 ! etxc = etxc + (ex(k)+ec(k)) * e2 - vtxc = vtxc + (v1x(k,1)+v1c(k,1)) * e2 * ABS(rho%of_r(k,1)) + vtxc = vtxc + (v1x(k,1)+v1c(k,1)) * e2 * ABS(rhotot(k,1)) ! IF (rho%of_r(k,1) < zero) rhoneg1 = rhoneg1-rho%of_r(k,1) !