Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 20 additions & 12 deletions cuda_core/cuda/core/_dlpack.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -88,20 +88,28 @@ cdef inline int setup_dl_tensor_layout(DLTensor* dl_tensor, object buf) except -
return 0


def classify_dl_device(buf) -> tuple[int, int]:
"""Classify a buffer into a DLPack (device_type, device_id) pair.

``buf`` must expose ``is_device_accessible``, ``is_host_accessible``,
``is_managed``, and ``device_id`` attributes.
"""
cdef bint d = buf.is_device_accessible
cdef bint h = buf.is_host_accessible
if d and not h:
return (_kDLCUDA, buf.device_id)
if d and h:
return (_kDLCUDAManaged if buf.is_managed else _kDLCUDAHost, 0)
if not d and h:
return (_kDLCPU, 0)
raise BufferError("buffer is neither device-accessible nor host-accessible")


cdef inline int setup_dl_tensor_device(DLTensor* dl_tensor, object buf) except -1:
cdef DLDevice* device = &dl_tensor.device
# buf should be a Buffer instance
if buf.is_device_accessible and not buf.is_host_accessible:
device.device_type = _kDLCUDA
device.device_id = buf.device_id
elif buf.is_device_accessible and buf.is_host_accessible:
device.device_type = _kDLCUDAHost
device.device_id = 0
elif not buf.is_device_accessible and buf.is_host_accessible:
device.device_type = _kDLCPU
device.device_id = 0
else: # not buf.is_device_accessible and not buf.is_host_accessible
raise BufferError("invalid buffer")
dev_type, dev_id = classify_dl_device(buf)
device.device_type = <_DLDeviceType>dev_type
device.device_id = <int32_t>dev_id
return 0


Expand Down
1 change: 1 addition & 0 deletions cuda_core/cuda/core/_memory/_buffer.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cdef struct _MemAttrs:
int device_id
bint is_device_accessible
bint is_host_accessible
bint is_managed


cdef class Buffer:
Expand Down
22 changes: 11 additions & 11 deletions cuda_core/cuda/core/_memory/_buffer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ if sys.version_info >= (3, 12):
else:
BufferProtocol = object

from cuda.core._dlpack import DLDeviceType, make_py_capsule
from cuda.core._dlpack import classify_dl_device, make_py_capsule
from cuda.core._utils.cuda_utils import driver
from cuda.core._device import Device

Expand Down Expand Up @@ -323,16 +323,7 @@ cdef class Buffer:
return capsule

def __dlpack_device__(self) -> tuple[int, int]:
cdef bint d = self.is_device_accessible
cdef bint h = self.is_host_accessible
if d and (not h):
return (DLDeviceType.kDLCUDA, self.device_id)
if d and h:
# TODO: this can also be kDLCUDAManaged, we need more fine-grained checks
return (DLDeviceType.kDLCUDAHost, 0)
if (not d) and h:
return (DLDeviceType.kDLCPU, 0)
raise BufferError("buffer is neither device-accessible nor host-accessible")
return classify_dl_device(self)

def __buffer__(self, flags: int, /) -> memoryview:
# Support for Python-level buffer protocol as per PEP 688.
Expand Down Expand Up @@ -396,6 +387,12 @@ cdef class Buffer:
_init_mem_attrs(self)
return self._mem_attrs.is_host_accessible

@property
def is_managed(self) -> bool:
"""Return True if this buffer is CUDA managed (unified) memory, otherwise False."""
_init_mem_attrs(self)
return self._mem_attrs.is_managed

@property
def is_mapped(self) -> bool:
"""Return True if this buffer is mapped into the process via IPC."""
Expand Down Expand Up @@ -459,6 +456,7 @@ cdef inline int _query_memory_attrs(
out.is_host_accessible = True
out.is_device_accessible = False
out.device_id = -1
out.is_managed = False
elif (
is_managed
or memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_HOST
Expand All @@ -467,10 +465,12 @@ cdef inline int _query_memory_attrs(
out.is_host_accessible = True
out.is_device_accessible = True
out.device_id = device_id
out.is_managed = is_managed
elif memory_type == cydriver.CUmemorytype.CU_MEMORYTYPE_DEVICE:
out.is_host_accessible = False
out.is_device_accessible = True
out.device_id = device_id
out.is_managed = False
else:
with cython.gil:
raise ValueError(f"Unsupported memory type: {memory_type}")
Expand Down
20 changes: 4 additions & 16 deletions cuda_core/cuda/core/_memoryview.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from __future__ import annotations

from ._dlpack cimport *
from ._dlpack import classify_dl_device
from libc.stdint cimport intptr_t
from cuda.core._layout cimport _StridedLayout, get_strides_ptr
from cuda.core._stream import Stream
Expand Down Expand Up @@ -590,8 +591,6 @@ cdef inline int _smv_get_dl_device(
cdef _DLDeviceType device_type
cdef int32_t device_id
cdef object buf
cdef bint d
cdef bint h
if view.dl_tensor != NULL:
device_type = view.dl_tensor.device.device_type
if device_type == _kDLCUDA:
Expand All @@ -601,20 +600,9 @@ cdef inline int _smv_get_dl_device(
device_id = 0
elif view.is_device_accessible:
buf = view.get_buffer()
d = buf.is_device_accessible
h = buf.is_host_accessible
if d and (not h):
device_type = _kDLCUDA
device_id = buf.device_id
elif d and h:
# We do not currently differentiate pinned vs managed here.
device_type = _kDLCUDAHost
device_id = 0
elif (not d) and h:
device_type = _kDLCPU
device_id = 0
else:
raise BufferError("buffer is neither device-accessible nor host-accessible")
dev_type, dev_id = classify_dl_device(buf)
device_type = <_DLDeviceType>dev_type
device_id = <int32_t>dev_id
else:
device_type = _kDLCPU
device_id = 0
Expand Down
21 changes: 19 additions & 2 deletions cuda_core/tests/test_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ def test_buffer_dunder_dlpack():
[
(DummyDeviceMemoryResource, (DLDeviceType.kDLCUDA, 0)),
(DummyHostMemoryResource, (DLDeviceType.kDLCPU, 0)),
(DummyUnifiedMemoryResource, (DLDeviceType.kDLCUDAHost, 0)),
(DummyUnifiedMemoryResource, (DLDeviceType.kDLCUDAManaged, 0)),
(DummyPinnedMemoryResource, (DLDeviceType.kDLCUDAHost, 0)),
],
)
Expand All @@ -579,7 +579,7 @@ def test_buffer_dlpack_failure_clean_up():
dummy_mr = NullMemoryResource()
buffer = dummy_mr.allocate(size=1024)
before = sys.getrefcount(buffer)
with pytest.raises(BufferError, match="invalid buffer"):
with pytest.raises(BufferError, match="buffer is neither device-accessible nor host-accessible"):
buffer.__dlpack__()
after = sys.getrefcount(buffer)
# we use the buffer refcount as sentinel for proper clean-up here,
Expand All @@ -588,6 +588,23 @@ def test_buffer_dlpack_failure_clean_up():
assert after == before


def test_managed_buffer_dlpack_roundtrip_device_type():
"""Verify that a managed Buffer round-trips through DLPack with kDLCUDAManaged."""
device = Device()
device.set_current()
skip_if_managed_memory_unsupported(device)
mr = DummyUnifiedMemoryResource(device)
buf = mr.allocate(size=1024)

# Buffer-level classification should report managed.
assert buf.__dlpack_device__() == (DLDeviceType.kDLCUDAManaged, 0)

# The end-to-end path: Buffer -> DLPack capsule -> StridedMemoryView
# must preserve kDLCUDAManaged rather than downgrading to kDLCUDAHost.
view = StridedMemoryView.from_any_interface(buf, stream_ptr=-1)
assert view.__dlpack_device__() == (int(DLDeviceType.kDLCUDAManaged), 0)


@pytest.mark.parametrize("use_device_object", [True, False])
def test_device_memory_resource_initialization(use_device_object):
"""Test that DeviceMemoryResource can be initialized successfully.
Expand Down
Loading