Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions deepmd/infer/deep_pot.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,11 @@ def eval(
aparam=aparam,
**kwargs,
)
# TODO: if the grid is requested, we can directly return it without reshaping to energy, force and virial. We can also consider to return the grid in a separate key in the results dict, instead of reshaping it to energy, force and virial.
if "grid" in kwargs:
result = results["density"].reshape(nframes, -1)
return result
Comment on lines +216 to +218
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Guard against grid=None before density extraction.

if "grid" in kwargs also matches grid=None, but the backend density branch only runs when grid is not None; this can make results["density"] missing and raise at runtime. Use the same predicate here (kwargs.get("grid") is not None).

Suggested fix
-        if "grid" in kwargs:
+        if kwargs.get("grid") is not None:
             result = results["density"].reshape(nframes, -1)
             return result
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@deepmd/infer/deep_pot.py` around lines 216 - 218, The condition that returns
density currently uses if "grid" in kwargs which also matches grid=None and can
access missing results["density"]; change the predicate to check the actual
value (e.g., kwargs.get("grid") is not None) before reshaping results["density"]
in the function where nframes and results are used so the density branch only
runs when grid is not None.


energy = results["energy_redu"].reshape(nframes, 1)
force = results["energy_derv_r"].reshape(nframes, natoms, 3)
virial = results["energy_derv_c_redu"].reshape(nframes, 9)
Expand Down
7 changes: 6 additions & 1 deletion deepmd/pt/entrypoints/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,12 @@ def train(

# Initialize DDP
if os.environ.get("LOCAL_RANK") is not None:
dist.init_process_group(backend="cuda:nccl,cpu:gloo")
import datetime

timeout = datetime.timedelta(
seconds=18000
) # set a longer timeout for for large datasets or slow file systems
dist.init_process_group(backend="cuda:nccl,cpu:gloo", timeout=timeout)

trainer = get_trainer(
config,
Expand Down
95 changes: 90 additions & 5 deletions deepmd/pt/infer/deep_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,11 +406,7 @@ def eval(
coords, atom_types, len(atom_types.shape) > 1
)
request_defs = self._get_request_defs(atomic)
if "spin" not in kwargs or kwargs["spin"] is None:
out = self._eval_func(self._eval_model, numb_test, natoms)(
coords, cells, atom_types, fparam, aparam, request_defs, charge_spin
)
else:
if "spin" in kwargs and kwargs["spin"] is not None:
out = self._eval_func(self._eval_model_spin, numb_test, natoms)(
coords,
cells,
Expand All @@ -421,6 +417,21 @@ def eval(
request_defs,
charge_spin,
)
elif "grid" in kwargs and kwargs["grid"] is not None:
out = self._eval_func(self._eval_model_density, numb_test, natoms)(
coords,
cells,
atom_types,
np.array(kwargs["grid"]),
fparam,
aparam,
request_defs,
)
return {"density": out}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical | ⚡ Quick win

Return ndarray instead of tuple for "density" payload.

out here is a tuple from _eval_model_density (currently tuple(results)), so {"density": out} makes downstream callers receive a tuple, not an array. DeepPot.eval then calls .reshape(...) on this value and will fail.

Suggested fix
-            return {"density": out}
+            return {"density": out[0]}

Also applies to: 769-775

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@deepmd/pt/infer/deep_eval.py` at line 430, The "density" payload is returning
a tuple (from _eval_model_density/tuple(results)) which breaks DeepPot.eval when
it calls .reshape; convert the tuple to a NumPy array before returning. Locate
the return sites (the dict with "density": out around the return in deep_eval.py
and the similar block at lines ~769-775) and replace the tuple with a NumPy
ndarray (e.g., wrap results with numpy.array or ensure tuple(results) is created
as np.array(results)); add or use the existing import for numpy (np) so callers
receive an ndarray instead of a tuple.

else:
out = self._eval_func(self._eval_model, numb_test, natoms)(
coords, cells, atom_types, fparam, aparam, request_defs
)
Comment on lines +432 to +434
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical | ⚡ Quick win

Pass charge_spin in the default eval path.

_eval_model(...) requires charge_spin, but the non-spin/non-grid call omits it. This will raise TypeError for normal inference.

Suggested fix
-            out = self._eval_func(self._eval_model, numb_test, natoms)(
-                coords, cells, atom_types, fparam, aparam, request_defs
-            )
+            out = self._eval_func(self._eval_model, numb_test, natoms)(
+                coords, cells, atom_types, fparam, aparam, request_defs, charge_spin
+            )

Also applies to: 527-536

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@deepmd/pt/infer/deep_eval.py` around lines 432 - 434, The default
non-spin/non-grid evaluation call to self._eval_func(...)(...) omits the
required charge_spin argument for _eval_model; update the call site (the
invocation that constructs out via self._eval_func(self._eval_model, numb_test,
natoms)(...)) to include the charge_spin parameter (e.g., add charge_spin to the
argument list alongside coords, cells, atom_types, fparam, aparam,
request_defs), and apply the same change to the other similar block around the
527-536 region so both calls pass charge_spin into _eval_model.

return dict(
zip(
[x.name for x in request_defs],
Expand Down Expand Up @@ -688,6 +699,80 @@ def _eval_model_spin(
) # this is kinda hacky
return tuple(results)

def _eval_model_density(
self,
coords: np.ndarray,
cells: np.ndarray | None,
atom_types: np.ndarray,
grid: np.ndarray,
fparam: np.ndarray | None,
aparam: np.ndarray | None,
request_defs: list[OutputVariableDef],
) -> tuple[np.ndarray, ...]:
model = self.dp.to(DEVICE)

nframes = coords.shape[0]
if len(atom_types.shape) == 1:
natoms = len(atom_types)
atom_types = np.tile(atom_types, nframes).reshape(nframes, -1)
else:
natoms = len(atom_types[0])

coord_input = torch.tensor(
coords.reshape([nframes, natoms, 3]),
dtype=GLOBAL_PT_FLOAT_PRECISION,
device=DEVICE,
)
type_input = torch.tensor(atom_types, dtype=torch.long, device=DEVICE)
grid_input = torch.tensor(
grid.reshape([nframes, -1, 3]),
dtype=GLOBAL_PT_FLOAT_PRECISION,
device=DEVICE,
)
ngrid = grid_input.shape[1]
if cells is not None:
box_input = torch.tensor(
cells.reshape([nframes, 3, 3]),
dtype=GLOBAL_PT_FLOAT_PRECISION,
device=DEVICE,
)
else:
box_input = None
if fparam is not None:
fparam_input = to_torch_tensor(
fparam.reshape(nframes, self.get_dim_fparam())
)
else:
fparam_input = None
if aparam is not None:
aparam_input = to_torch_tensor(
aparam.reshape(nframes, natoms, self.get_dim_aparam())
)
else:
aparam_input = None

do_atomic_virial = any(
x.category == OutputVariableCategory.DERV_C_REDU for x in request_defs
)
batch_output = model(
coord_input,
type_input,
grid=grid_input,
box=box_input,
do_atomic_virial=do_atomic_virial,
fparam=fparam_input,
aparam=aparam_input,
)
if isinstance(batch_output, tuple):
batch_output = batch_output[0]

results = []
pt_name = "density"
density_shape = [nframes, ngrid]
out = batch_output[pt_name].reshape(density_shape).detach().cpu().numpy()
results.append(out)
return tuple(results)

def _get_output_shape(
self, odef: OutputVariableDef, nframes: int, natoms: int
) -> list[int]:
Expand Down
4 changes: 4 additions & 0 deletions deepmd/pt/loss/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
from .charge import (
GridDensityLoss,
)
from .denoise import (
DenoiseLoss,
)
Expand Down Expand Up @@ -28,6 +31,7 @@
"EnergyHessianStdLoss",
"EnergySpinLoss",
"EnergyStdLoss",
"GridDensityLoss",
"PropertyLoss",
"TaskLoss",
"TensorLoss",
Expand Down
137 changes: 137 additions & 0 deletions deepmd/pt/loss/charge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
from typing import (
Any,
)

import torch

Comment thread
coderabbitai[bot] marked this conversation as resolved.
from deepmd.pt.loss.loss import (
TaskLoss,
)
from deepmd.pt.utils import (
env,
)
from deepmd.pt.utils.env import (
GLOBAL_PT_FLOAT_PRECISION,
)
from deepmd.utils.data import (
DataRequirementItem,
)


class GridDensityLoss(TaskLoss):
def __init__(
self,
starter_learning_rate: float = 1.0,
start_pref_d: float = 0.0,
limit_pref_d: float = 0.0,
inference: bool = False,
**kwargs: Any,
) -> None:
r"""Construct a layer to compute loss on grid density.

Parameters
----------
starter_learning_rate : float
The learning rate at the start of the training.
start_pref_d : float
The prefactor of charge density loss at the start of the training.
limit_pref_d : float
The prefactor of charge density loss at the end of the training.
inference : bool
If true, it will output all losses found in output, ignoring the pre-factors.
**kwargs
Other keyword arguments.
"""
super().__init__()
self.starter_learning_rate = starter_learning_rate
self.has_d = (start_pref_d != 0.0 and limit_pref_d != 0.0) or inference
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Fix density-loss activation logic on Line 44.

Using and here disables density loss whenever only one prefactor endpoint is zero (common for ramp-up/ramp-down schedules), so training can silently skip this objective.

Suggested fix
-        self.has_d = (start_pref_d != 0.0 and limit_pref_d != 0.0) or inference
+        self.has_d = (start_pref_d != 0.0 or limit_pref_d != 0.0) or inference
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
self.has_d = (start_pref_d != 0.0 and limit_pref_d != 0.0) or inference
self.has_d = (start_pref_d != 0.0 or limit_pref_d != 0.0) or inference
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@deepmd/pt/loss/charge.py` at line 44, The density-loss activation logic in
charge.py sets self.has_d incorrectly using AND between start_pref_d and
limit_pref_d so density loss is disabled when either endpoint is zero; change
the condition in the initializer that assigns self.has_d to use OR between the
endpoint checks (i.e., treat density active if either start_pref_d or
limit_pref_d is nonzero) while still preserving the existing inference flag
check (keep the overall OR with inference); locate the assignment to self.has_d
in charge.py and update the boolean expression accordingly.


self.start_pref_d = start_pref_d
self.limit_pref_d = limit_pref_d
self.inference = inference

def forward(
self,
input_dict: dict[str, torch.Tensor],
model: torch.nn.Module,
label: dict[str, torch.Tensor],
natoms: int,
learning_rate: float,
mae: bool = False,
) -> tuple[dict[str, torch.Tensor], torch.Tensor, dict[str, torch.Tensor]]:
"""Return loss on energy and force.

Parameters
----------
input_dict : dict[str, torch.Tensor]
Model inputs.
model : torch.nn.Module
Model to be used to output the predictions.
label : dict[str, torch.Tensor]
Labels.
natoms : int
The local atom number.

Returns
-------
model_pred: dict[str, torch.Tensor]
Model predictions.
loss: torch.Tensor
Loss for model to minimize.
more_loss: dict[str, torch.Tensor]
Other losses for display.
"""
model_pred = model(**input_dict)
coef = learning_rate / self.starter_learning_rate
pref_d = self.limit_pref_d + (self.start_pref_d - self.limit_pref_d) * coef

loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0]
more_loss = {}
# more_loss['log_keys'] = [] # showed when validation on the fly
# more_loss['test_keys'] = [] # showed when doing dp test
atom_norm = 1.0 / natoms
if self.has_d and "density" in model_pred and "density" in label:
density_pred = model_pred["density"]
density_label = label["density"]
find_density = label.get("find_density", 0.0)
pref_d = pref_d * find_density
density_pred_reshape = density_pred.reshape(-1)
density_label_reshape = density_label.reshape(-1)
l2_density_loss = torch.square(
density_label_reshape - density_pred_reshape
).mean()
rmse_d = l2_density_loss.sqrt()
more_loss["rmse_d"] = self.display_if_exist(rmse_d.detach(), find_density)
l1_density_loss = torch.abs(
density_label_reshape - density_pred_reshape
).mean()
loss += (pref_d * l1_density_loss).to(GLOBAL_PT_FLOAT_PRECISION)
mae_d = l1_density_loss
more_loss["mae_d"] = self.display_if_exist(mae_d.detach(), find_density)
return model_pred, loss, more_loss

@property
def label_requirement(self) -> list[DataRequirementItem]:
"""Return data label requirements needed for this loss calculation."""
label_requirement = []
label_requirement.append(
DataRequirementItem(
"grid",
ndof=3,
atomic=True, # the grid is defined for each atom, so it is atomic
must=True,
high_prec=True,
)
)
if self.has_d:
label_requirement.append(
DataRequirementItem(
"density",
ndof=1,
atomic=True,
must=False,
high_prec=True,
)
)
return label_requirement
4 changes: 4 additions & 0 deletions deepmd/pt/model/atomic_model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
from .base_atomic_model import (
BaseAtomicModel,
)
from .density_atomic_model import (
DPDensityAtomicModel,
)
from .dipole_atomic_model import (
DPDipoleAtomicModel,
)
Expand Down Expand Up @@ -47,6 +50,7 @@
"BaseAtomicModel",
"DPAtomicModel",
"DPDOSAtomicModel",
"DPDensityAtomicModel",
"DPDipoleAtomicModel",
"DPEnergyAtomicModel",
"DPPolarAtomicModel",
Expand Down
Loading