forked from NVIDIA/cuda-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path_dlpack.pyx
More file actions
172 lines (146 loc) · 5.83 KB
/
_dlpack.pyx
File metadata and controls
172 lines (146 loc) · 5.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0
from enum import IntEnum
cdef void pycapsule_deleter(object capsule) noexcept:
cdef DLManagedTensor* dlm_tensor
cdef DLManagedTensorVersioned* dlm_tensor_ver
# Do not invoke the deleter on a used capsule.
if cpython.PyCapsule_IsValid(
capsule, DLPACK_TENSOR_UNUSED_NAME):
dlm_tensor = <DLManagedTensor*>(
cpython.PyCapsule_GetPointer(
capsule, DLPACK_TENSOR_UNUSED_NAME))
if dlm_tensor.deleter:
dlm_tensor.deleter(dlm_tensor)
elif cpython.PyCapsule_IsValid(
capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME):
dlm_tensor_ver = <DLManagedTensorVersioned*>(
cpython.PyCapsule_GetPointer(
capsule, DLPACK_VERSIONED_TENSOR_UNUSED_NAME))
if dlm_tensor_ver.deleter:
dlm_tensor_ver.deleter(dlm_tensor_ver)
cdef void deleter(DLManagedTensor* tensor) noexcept with gil:
if tensor:
if tensor.dl_tensor.shape:
stdlib.free(tensor.dl_tensor.shape)
if tensor.manager_ctx:
cpython.Py_DECREF(<object>tensor.manager_ctx)
tensor.manager_ctx = NULL
stdlib.free(tensor)
cdef void versioned_deleter(DLManagedTensorVersioned* tensor) noexcept with gil:
if tensor:
if tensor.dl_tensor.shape:
stdlib.free(tensor.dl_tensor.shape)
if tensor.manager_ctx:
cpython.Py_DECREF(<object>tensor.manager_ctx)
tensor.manager_ctx = NULL
stdlib.free(tensor)
cdef inline DLManagedTensorVersioned* allocate_dlm_tensor_versioned() except? NULL:
cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL
try:
dlm_tensor_ver = <DLManagedTensorVersioned*>(
stdlib.malloc(sizeof(DLManagedTensorVersioned)))
dlm_tensor_ver.dl_tensor.shape = NULL
dlm_tensor_ver.manager_ctx = NULL
return dlm_tensor_ver
except:
if dlm_tensor_ver:
stdlib.free(dlm_tensor_ver)
raise
cdef inline DLManagedTensor* allocate_dlm_tensor() except? NULL:
cdef DLManagedTensor* dlm_tensor = NULL
try:
dlm_tensor = <DLManagedTensor*>(
stdlib.malloc(sizeof(DLManagedTensor)))
dlm_tensor.dl_tensor.shape = NULL
dlm_tensor.manager_ctx = NULL
return dlm_tensor
except:
if dlm_tensor:
stdlib.free(dlm_tensor)
raise
cdef inline int setup_dl_tensor_layout(DLTensor* dl_tensor, object buf) except -1:
dl_tensor.ndim = 1
cdef int64_t* shape_strides = \
<int64_t*>stdlib.malloc(sizeof(int64_t) * 2)
if shape_strides == NULL:
raise MemoryError()
# DLPack v1.2+ requires non-NULL strides for ndim != 0.
shape_strides[0] = <int64_t>buf.size
shape_strides[1] = 1
dl_tensor.shape = shape_strides
dl_tensor.strides = shape_strides + 1
dl_tensor.byte_offset = 0
return 0
def classify_dl_device(buf) -> tuple[int, int]:
"""Classify a buffer into a DLPack (device_type, device_id) pair.
``buf`` must expose ``is_device_accessible``, ``is_host_accessible``,
``is_managed``, and ``device_id`` attributes.
"""
cdef bint d = buf.is_device_accessible
cdef bint h = buf.is_host_accessible
if d and not h:
return (_kDLCUDA, buf.device_id)
if d and h:
return (_kDLCUDAManaged if buf.is_managed else _kDLCUDAHost, 0)
if not d and h:
return (_kDLCPU, 0)
raise BufferError("buffer is neither device-accessible nor host-accessible")
cdef inline int setup_dl_tensor_device(DLTensor* dl_tensor, object buf) except -1:
cdef DLDevice* device = &dl_tensor.device
dev_type, dev_id = classify_dl_device(buf)
device.device_type = <_DLDeviceType>dev_type
device.device_id = <int32_t>dev_id
return 0
cdef inline int setup_dl_tensor_dtype(DLTensor* dl_tensor) except -1 nogil:
cdef DLDataType* dtype = &dl_tensor.dtype
dtype.code = <uint8_t>kDLInt
dtype.lanes = <uint16_t>1
dtype.bits = <uint8_t>8
return 0
cpdef object make_py_capsule(object buf, bint versioned):
cdef DLManagedTensor* dlm_tensor = NULL
cdef DLManagedTensorVersioned* dlm_tensor_ver = NULL
cdef DLTensor* dl_tensor
cdef void* tensor_ptr
cdef const char* capsule_name
cdef object ret = None
try:
if versioned:
dlm_tensor_ver = allocate_dlm_tensor_versioned()
# Transfer the reference to manager_ctx
cpython.Py_INCREF(buf)
dlm_tensor_ver.manager_ctx = <void*>buf
dlm_tensor_ver.deleter = versioned_deleter
dlm_tensor_ver.version.major = DLPACK_MAJOR_VERSION
dlm_tensor_ver.version.minor = DLPACK_MINOR_VERSION
dlm_tensor_ver.flags = 0
dl_tensor = &dlm_tensor_ver.dl_tensor
tensor_ptr = dlm_tensor_ver
capsule_name = DLPACK_VERSIONED_TENSOR_UNUSED_NAME
else:
dlm_tensor = allocate_dlm_tensor()
# Transfer the reference to manager_ctx
cpython.Py_INCREF(buf)
dlm_tensor.manager_ctx = <void*>buf
dlm_tensor.deleter = deleter
dl_tensor = &dlm_tensor.dl_tensor
tensor_ptr = dlm_tensor
capsule_name = DLPACK_TENSOR_UNUSED_NAME
dl_tensor.data = <void*><intptr_t>(int(buf.handle))
setup_dl_tensor_layout(dl_tensor, buf)
setup_dl_tensor_device(dl_tensor, buf)
setup_dl_tensor_dtype(dl_tensor)
ret = cpython.PyCapsule_New(tensor_ptr, capsule_name, pycapsule_deleter)
except:
if ret is None:
deleter(dlm_tensor)
versioned_deleter(dlm_tensor_ver)
raise
return ret
class DLDeviceType(IntEnum):
kDLCPU = _kDLCPU
kDLCUDA = _kDLCUDA
kDLCUDAHost = _kDLCUDAHost
kDLCUDAManaged = _kDLCUDAManaged