fix(spatial): fall back to PyTorch path on Blackwell (sm_120) GPUs when USE_COMPILED=True

garciadias · garciadias · commit 09c2cd91ed96 · 2026-03-08T12:23:14.000Z
monai._C (grid_pull) was not compiled with sm_120 (Blackwell) architecture
support, causing spatial_resample to produce incorrect results on RTX 50-series
GPUs when USE_COMPILED=True.

Add _compiled_unsupported() to detect compute capability major &gt;= 12 at
runtime and transparently fall back to the PyTorch-native affine_grid +
grid_sample path, which is verified correct on sm_120.

Fixes test_flips_inverse_124 in tests.transforms.spatial.test_spatial_resampled
on NVIDIA GeForce RTX 5090 (Blackwell, sm_120).
diff --git a/monai/transforms/spatial/functional.py b/monai/transforms/spatial/functional.py
@@ -54,6 +54,27 @@
 __all__ = ["spatial_resample", "orientation", "flip", "resize", "rotate", "zoom", "rotate90", "affine_func"]
 
 
+def _compiled_unsupported(device: torch.device) -> bool:
+    """
+    Return True if ``monai._C`` (the compiled C extension providing ``grid_pull``) is not
+    compiled with support for the given CUDA device's compute capability.
+
+    ``monai._C`` is built at install time against a fixed set of CUDA architectures.
+    NVIDIA Blackwell GPUs (sm_120, compute capability 12.x) and newer were not included in
+    the default ``TORCH_CUDA_ARCH_LIST`` when the MONAI slim image was originally built,
+    so executing ``grid_pull`` on those devices produces incorrect results.  Falling back to
+    the PyTorch-native ``affine_grid`` + ``grid_sample`` path (``USE_COMPILED=False``) gives
+    correct output on all architectures.
+
+    The threshold (``major >= 12``) matches the first architecture family (Blackwell, sm_120)
+    that shipped after the highest sm supported in the current default build list (sm_90,
+    Hopper).  Adjust this constant when ``monai._C`` is rebuilt with sm_120+ support.
+    """
+    if device.type != "cuda":
+        return False
+    return torch.cuda.get_device_properties(device).major >= 12
+
+
 def _maybe_new_metatensor(img, dtype=None, device=None):
     """create a metatensor with fresh metadata if track_meta is True otherwise convert img into a torch tensor"""
     return convert_to_tensor(
@@ -158,7 +179,8 @@ def spatial_resample(
         xform_shape = [-1] + in_sp_size
         img = img.reshape(xform_shape)
     img = img.to(dtype_pt)
-    if isinstance(mode, int) or USE_COMPILED:
+    _use_compiled = USE_COMPILED and not _compiled_unsupported(img.device)
+    if isinstance(mode, int) or _use_compiled:
         dst_xform = create_translate(spatial_rank, [float(d - 1) / 2 for d in spatial_size])
         xform = xform @ convert_to_dst_type(dst_xform, xform)[0]
         affine_xform = monai.transforms.Affine(