diff --git a/cuda_core/cuda/core/_dlpack.pyx b/cuda_core/cuda/core/_dlpack.pyx index 5547216891..371ced011b 100644 --- a/cuda_core/cuda/core/_dlpack.pyx +++ b/cuda_core/cuda/core/_dlpack.pyx @@ -88,20 +88,28 @@ cdef inline int setup_dl_tensor_layout(DLTensor* dl_tensor, object buf) except - return 0 +def classify_dl_device(buf) -> tuple[int, int]: + """Classify a buffer into a DLPack (device_type, device_id) pair. + + ``buf`` must expose ``is_device_accessible``, ``is_host_accessible``, + ``is_managed``, and ``device_id`` attributes. + """ + cdef bint d = buf.is_device_accessible + cdef bint h = buf.is_host_accessible + if d and not h: + return (_kDLCUDA, buf.device_id) + if d and h: + return (_kDLCUDAManaged if buf.is_managed else _kDLCUDAHost, 0) + if not d and h: + return (_kDLCPU, 0) + raise BufferError("buffer is neither device-accessible nor host-accessible") + + cdef inline int setup_dl_tensor_device(DLTensor* dl_tensor, object buf) except -1: cdef DLDevice* device = &dl_tensor.device - # buf should be a Buffer instance - if buf.is_device_accessible and not buf.is_host_accessible: - device.device_type = _kDLCUDA - device.device_id = buf.device_id - elif buf.is_device_accessible and buf.is_host_accessible: - device.device_type = _kDLCUDAHost - device.device_id = 0 - elif not buf.is_device_accessible and buf.is_host_accessible: - device.device_type = _kDLCPU - device.device_id = 0 - else: # not buf.is_device_accessible and not buf.is_host_accessible - raise BufferError("invalid buffer") + dev_type, dev_id = classify_dl_device(buf) + device.device_type = <_DLDeviceType>dev_type + device.device_id = dev_id return 0 diff --git a/cuda_core/cuda/core/_memory/_buffer.pxd b/cuda_core/cuda/core/_memory/_buffer.pxd index 91c0cfe24a..04b5707e18 100644 --- a/cuda_core/cuda/core/_memory/_buffer.pxd +++ b/cuda_core/cuda/core/_memory/_buffer.pxd @@ -12,6 +12,7 @@ cdef struct _MemAttrs: int device_id bint is_device_accessible bint is_host_accessible + bint is_managed cdef class Buffer: diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx index b836972f5f..ec871ddc11 100644 --- a/cuda_core/cuda/core/_memory/_buffer.pyx +++ b/cuda_core/cuda/core/_memory/_buffer.pyx @@ -34,7 +34,7 @@ if sys.version_info >= (3, 12): else: BufferProtocol = object -from cuda.core._dlpack import DLDeviceType, make_py_capsule +from cuda.core._dlpack import classify_dl_device, make_py_capsule from cuda.core._utils.cuda_utils import driver from cuda.core._device import Device @@ -323,16 +323,7 @@ cdef class Buffer: return capsule def __dlpack_device__(self) -> tuple[int, int]: - cdef bint d = self.is_device_accessible - cdef bint h = self.is_host_accessible - if d and (not h): - return (DLDeviceType.kDLCUDA, self.device_id) - if d and h: - # TODO: this can also be kDLCUDAManaged, we need more fine-grained checks - return (DLDeviceType.kDLCUDAHost, 0) - if (not d) and h: - return (DLDeviceType.kDLCPU, 0) - raise BufferError("buffer is neither device-accessible nor host-accessible") + return classify_dl_device(self) def __buffer__(self, flags: int, /) -> memoryview: # Support for Python-level buffer protocol as per PEP 688. @@ -396,6 +387,12 @@ cdef class Buffer: _init_mem_attrs(self) return self._mem_attrs.is_host_accessible + @property + def is_managed(self) -> bool: + """Return True if this buffer is CUDA managed (unified) memory, otherwise False.""" + _init_mem_attrs(self) + return self._mem_attrs.is_managed + @property def is_mapped(self) -> bool: """Return True if this buffer is mapped into the process via IPC.""" @@ -459,6 +456,7 @@ cdef inline int _query_memory_attrs( out.is_host_accessible = True out.is_device_accessible = False out.device_id = -1 + out.is_managed = False elif ( is_managed or memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_HOST @@ -467,10 +465,12 @@ cdef inline int _query_memory_attrs( out.is_host_accessible = True out.is_device_accessible = True out.device_id = device_id + out.is_managed = is_managed elif memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_DEVICE: out.is_host_accessible = False out.is_device_accessible = True out.device_id = device_id + out.is_managed = False else: with cython.gil: raise ValueError(f"Unsupported memory type: {memory_type}") diff --git a/cuda_core/cuda/core/_memoryview.pyx b/cuda_core/cuda/core/_memoryview.pyx index 7dc32b7ec7..e0439ef23c 100644 --- a/cuda_core/cuda/core/_memoryview.pyx +++ b/cuda_core/cuda/core/_memoryview.pyx @@ -5,6 +5,7 @@ from __future__ import annotations from ._dlpack cimport * +from ._dlpack import classify_dl_device from libc.stdint cimport intptr_t from cuda.core._layout cimport _StridedLayout, get_strides_ptr from cuda.core._stream import Stream @@ -590,8 +591,6 @@ cdef inline int _smv_get_dl_device( cdef _DLDeviceType device_type cdef int32_t device_id cdef object buf - cdef bint d - cdef bint h if view.dl_tensor != NULL: device_type = view.dl_tensor.device.device_type if device_type == _kDLCUDA: @@ -601,20 +600,9 @@ cdef inline int _smv_get_dl_device( device_id = 0 elif view.is_device_accessible: buf = view.get_buffer() - d = buf.is_device_accessible - h = buf.is_host_accessible - if d and (not h): - device_type = _kDLCUDA - device_id = buf.device_id - elif d and h: - # We do not currently differentiate pinned vs managed here. - device_type = _kDLCUDAHost - device_id = 0 - elif (not d) and h: - device_type = _kDLCPU - device_id = 0 - else: - raise BufferError("buffer is neither device-accessible nor host-accessible") + dev_type, dev_id = classify_dl_device(buf) + device_type = <_DLDeviceType>dev_type + device_id = dev_id else: device_type = _kDLCPU device_id = 0 diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py index 8005d3ce6c..a8e44a7946 100644 --- a/cuda_core/tests/test_memory.py +++ b/cuda_core/tests/test_memory.py @@ -556,7 +556,7 @@ def test_buffer_dunder_dlpack(): [ (DummyDeviceMemoryResource, (DLDeviceType.kDLCUDA, 0)), (DummyHostMemoryResource, (DLDeviceType.kDLCPU, 0)), - (DummyUnifiedMemoryResource, (DLDeviceType.kDLCUDAHost, 0)), + (DummyUnifiedMemoryResource, (DLDeviceType.kDLCUDAManaged, 0)), (DummyPinnedMemoryResource, (DLDeviceType.kDLCUDAHost, 0)), ], ) @@ -579,7 +579,7 @@ def test_buffer_dlpack_failure_clean_up(): dummy_mr = NullMemoryResource() buffer = dummy_mr.allocate(size=1024) before = sys.getrefcount(buffer) - with pytest.raises(BufferError, match="invalid buffer"): + with pytest.raises(BufferError, match="buffer is neither device-accessible nor host-accessible"): buffer.__dlpack__() after = sys.getrefcount(buffer) # we use the buffer refcount as sentinel for proper clean-up here, @@ -588,6 +588,23 @@ def test_buffer_dlpack_failure_clean_up(): assert after == before +def test_managed_buffer_dlpack_roundtrip_device_type(): + """Verify that a managed Buffer round-trips through DLPack with kDLCUDAManaged.""" + device = Device() + device.set_current() + skip_if_managed_memory_unsupported(device) + mr = DummyUnifiedMemoryResource(device) + buf = mr.allocate(size=1024) + + # Buffer-level classification should report managed. + assert buf.__dlpack_device__() == (DLDeviceType.kDLCUDAManaged, 0) + + # The end-to-end path: Buffer -> DLPack capsule -> StridedMemoryView + # must preserve kDLCUDAManaged rather than downgrading to kDLCUDAHost. + view = StridedMemoryView.from_any_interface(buf, stream_ptr=-1) + assert view.__dlpack_device__() == (int(DLDeviceType.kDLCUDAManaged), 0) + + @pytest.mark.parametrize("use_device_object", [True, False]) def test_device_memory_resource_initialization(use_device_object): """Test that DeviceMemoryResource can be initialized successfully.