Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 32 additions & 24 deletions src/shim/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@

namespace {

const auto heap_page_size = 64ul * 1024 * 1024;

uint8_t
use_to_fw_debug_type(uint8_t use)
{
Expand Down Expand Up @@ -88,11 +86,6 @@ page_size_roundup(size_t size)
return (size + page_size - 1) & ~(page_size - 1);
}

size_t
heap_page_size_roundup(size_t size)
{
return (size + heap_page_size - 1) & ~(heap_page_size - 1);
}

std::string
use_flag_to_name(uint32_t use)
Expand Down Expand Up @@ -201,12 +194,7 @@ is_driver_pin_arg_bo()
return drv_pin;
}

uint64_t
bo_addr_align(int type)
{
// Device mem heap must align at heap_page_size boundary. Others can be byte aligned.
return (type == AMDXDNA_BO_DEV_HEAP) ? heap_page_size : 1;
}


int
bo_flags_to_type(uint64_t bo_flags, bool has_dev_mem)
Expand Down Expand Up @@ -317,12 +305,11 @@ alloc(const pdev *dev, uint64_t dev_offset, size_t size)
//

drm_bo::
drm_bo(const pdev& pdev, size_t size, uint32_t type)
drm_bo(const pdev& pdev, size_t size, uint32_t type, size_t alignment)
: m_pdev(pdev), m_size(size)
{
auto align = bo_addr_align(type);
bo_info arg = {
.xdna_addr_align = (align == 1 ? 0 : align),
.xdna_addr_align = (alignment == 1 ? 0 : alignment),
.size = m_size,
.type = type,
};
Expand Down Expand Up @@ -398,6 +385,23 @@ buffer(const pdev& dev, size_t size, int type)
{
}

buffer::
buffer(const pdev& dev, size_t initial_size, size_t max_size,
int type, size_t alignment)
: m_pdev(dev)
, m_type(type)
, m_alignment(alignment)
, m_total_size(max_size)
, m_cur_size(0)
{
if (m_type == AMDXDNA_BO_INVALID)
shim_err(EINVAL, "Bad BO type.");

m_range_addr = std::make_unique<mmap_ptr>(m_total_size, m_alignment);
expand(initial_size);
shim_debug("Created expandable %s", describe().c_str());
}

buffer::
buffer(const pdev& dev, size_t size, int type, void *uptr)
: m_pdev(dev)
Expand All @@ -420,7 +424,7 @@ buffer(const pdev& dev, size_t size, int type, void *uptr)
shim_err(EINVAL, "User pointer BO must be AMDXDNA_BO_SHARE type.");

// Prepare the mmap range for the entire buffer
m_range_addr = std::make_unique<mmap_ptr>(m_total_size, bo_addr_align(m_type));
m_range_addr = std::make_unique<mmap_ptr>(m_total_size, m_alignment);

// Obtain the buffer
expand(m_total_size);
Expand All @@ -436,7 +440,7 @@ buffer(const pdev& dev, xrt_core::shared_handle::export_handle ehdl)

m_total_size = m_cur_size = bo->m_size;
// Prepare the mmap range for the entire buffer
m_range_addr = std::make_unique<mmap_ptr>(m_total_size, bo_addr_align(m_type));
m_range_addr = std::make_unique<mmap_ptr>(m_total_size, m_alignment);

mmap_drm_bo(bo.get());
m_bos.push_back(std::move(bo));
Expand All @@ -447,19 +451,23 @@ void
buffer::
expand(size_t size)
{
size = (m_type == AMDXDNA_BO_DEV_HEAP) ? heap_page_size_roundup(size) : size;
size = (size + m_alignment - 1) & ~(m_alignment - 1);

if (m_cur_size >= m_total_size)
shim_err(ENOSPC, "Heap at max size %zu, can't expand further", m_total_size);

if (size > m_total_size - m_cur_size)
size = m_total_size - m_cur_size;

auto cur_sz = m_cur_size;
auto new_sz = size + m_cur_size;
shim_debug("Expanding BO from %ld to %ld", cur_sz, new_sz);

if (new_sz > m_total_size)
shim_err(EINVAL, "Can't expand BO beyond total size %ld", m_total_size);
shim_debug("Expanding BO from %zu to %zu", cur_sz, new_sz);

std::unique_ptr<drm_bo> bo;
if (m_uptr)
bo = std::make_unique<drm_bo>(m_pdev, size, m_uptr);
else
bo = std::make_unique<drm_bo>(m_pdev, size, m_type);
bo = std::make_unique<drm_bo>(m_pdev, size, m_type, m_alignment);
mmap_drm_bo(bo.get());

m_bos.push_back(std::move(bo));
Expand Down
5 changes: 4 additions & 1 deletion src/shim/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class mmap_ptr {

class drm_bo {
public:
drm_bo(const pdev& pdev, size_t size, uint32_t type);
drm_bo(const pdev& pdev, size_t size, uint32_t type, size_t alignment = 1);
drm_bo(const pdev& pdev, size_t size, void *uptr);
drm_bo(const pdev& pdev, xrt_core::shared_handle::export_handle ehdl);
~drm_bo();
Expand All @@ -54,6 +54,8 @@ class buffer : public xrt_core::buffer_handle
{
public:
buffer(const pdev& dev, size_t size, int type);
buffer(const pdev& dev, size_t initial_size, size_t max_size,
int type, size_t alignment);
buffer(const pdev& dev, size_t size, uint64_t flags);
buffer(const pdev& dev, size_t size, void *uptr, uint64_t flags);
buffer(const pdev& dev, xrt_core::shared_handle::export_handle ehdl);
Expand Down Expand Up @@ -137,6 +139,7 @@ class buffer : public xrt_core::buffer_handle
std::vector< std::unique_ptr<drm_bo> > m_bos;
void *m_uptr = nullptr;
int m_type = AMDXDNA_BO_INVALID;
size_t m_alignment = 1;
size_t m_total_size = 0;
size_t m_cur_size = 0;
};
Expand Down
37 changes: 23 additions & 14 deletions src/shim/kmq/pcidev.cpp
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright (C) 2022-2025, Advanced Micro Devices, Inc. All rights reserved.

#include <algorithm>

#include "../buffer.h"
#include "pcidev.h"
#include "core/common/config_reader.h"

namespace {

// Device memory heap needs to be multiple of 64MB page.
const size_t heap_page_size = (64 << 20);

unsigned int
get_heap_num_pages()
{
Expand All @@ -24,13 +23,15 @@ get_heap_num_pages()

namespace shim_xdna {

constexpr size_t heap_page_size = 64ul * 1024 * 1024;

void
pdev_kmq::
on_first_open() const
{
auto heap_sz = heap_page_size * get_heap_num_pages();
// Alloc device memory on first device open.
m_dev_heap_bo = std::make_unique<buffer>(*this, heap_sz, AMDXDNA_BO_DEV_HEAP);
const size_t max_heap_sz = 512UL << 20;
auto heap_sz = std::min(heap_page_size * get_heap_num_pages(), max_heap_sz);
m_dev_heap_bo = std::make_unique<buffer>(*this, heap_sz, max_heap_sz, AMDXDNA_BO_DEV_HEAP, heap_page_size);
}

void
Expand Down Expand Up @@ -78,15 +79,23 @@ create_drm_bo(bo_info *arg) const
}

// Dynamically expanding heap buffer when allocating device BO.
// Expand one heap_page_size chunk at a time and retry until
// the allocation succeeds or the heap is maxed out.
// e.g. In case of QEMU guest, QEMU can have number of SG entries
// virtio gpu mem limitation, limited to 64MB each time
// to avoid failure.
// we need to lock when we are trying to allocate DEV BO as
// it is possible that the heap is being expanded by another thread.
const std::lock_guard<std::mutex> lock(m_lock);
try {
drv_ioctl(drv_ioctl_cmd::create_bo, arg);
} catch (const xrt_core::system_error& ex) {
if (ex.get_code() != ENOMEM)
throw;
// Expanding current heap size and try one more time.
m_dev_heap_bo->expand(arg->size);
drv_ioctl(drv_ioctl_cmd::create_bo, arg);
for (;;) {
try {
drv_ioctl(drv_ioctl_cmd::create_bo, arg);
return;
} catch (const xrt_core::system_error& ex) {
if (ex.get_code() != EAGAIN)
throw;
m_dev_heap_bo->expand(heap_page_size);
}
}
}

Expand Down
87 changes: 71 additions & 16 deletions src/vxdna/src/vaccel_amdxdna.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ vxdna_bo(int ctx_fd_in, const struct amdxdna_ccmd_create_bo_req *req)
vxdna_dbg("Create bo: ctx_fd=%d, type=%d, size=%lu", m_ctx_fd, m_bo_type, m_size);
ret = ioctl(m_ctx_fd, DRM_IOCTL_AMDXDNA_CREATE_BO, &args);
if (ret)
VACCEL_THROW_MSG(-errno, "Create bo failed ret %d", ret);
VACCEL_THROW_MSG(-errno, "Create bo failed ret %d, errno %d, %s",
ret, errno, strerror(errno));

m_bo_handle = args.handle;
bo_info.handle = m_bo_handle;
Expand All @@ -90,7 +91,8 @@ vxdna_bo(int ctx_fd_in, const struct amdxdna_ccmd_create_bo_req *req)

vxdna_bo::
vxdna_bo(const std::shared_ptr<vaccel_resource> &res, int ctx_fd_in,
const struct amdxdna_ccmd_create_bo_req *req)
const struct amdxdna_ccmd_create_bo_req *req,
void *mmap_target)
: m_opaque_handle(res->get_opaque_handle())
{
struct amdxdna_drm_get_bo_info bo_info = {};
Expand Down Expand Up @@ -171,18 +173,25 @@ vxdna_bo(const std::shared_ptr<vaccel_resource> &res, int ctx_fd_in,

vxdna_dbg("mmap is required for handle: res_id=%u, handle=%u, opaque_handle=%d, vaddr=%lx, xdna_addr=%lx",
res->get_res_id(), m_bo_handle, res->get_opaque_handle(), m_vaddr, m_xdna_addr);
// mmap is required for non-dev BOs
uint64_t resv_vaddr = 0, resv_size = 0, va_to_map = 0;
void *resv_va = nullptr;
int flags = MAP_SHARED | MAP_LOCKED;
if (req->map_align) {
if (mmap_target) {
va_to_map = reinterpret_cast<uint64_t>(mmap_target);
if (req->map_align && (va_to_map & (req->map_align - 1))) {
if (has_created_bo)
close_gem_handle(m_ctx_fd, m_bo_handle);
VACCEL_THROW_MSG(-EINVAL,
"mmap_target %p not aligned to 0x%lx",
mmap_target, (unsigned long)req->map_align);
}
flags |= MAP_FIXED;
} else if (req->map_align) {
resv_va = ::mmap(0, m_map_size + req->map_align, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (resv_va == MAP_FAILED) {
// Clean up if we created the BO
if (has_created_bo) {
if (has_created_bo)
close_gem_handle(m_ctx_fd, m_bo_handle);
}
VACCEL_THROW_MSG(-ENOMEM, "Reserve vaddr range failed, map_align=%zu", req->map_align);
}

Expand All @@ -198,22 +207,21 @@ vxdna_bo(const std::shared_ptr<vaccel_resource> &res, int ctx_fd_in,
int saved_errno = errno;
if (resv_va && resv_va != MAP_FAILED)
::munmap(resv_va, resv_size);
// Clean up if we created the BO
if (has_created_bo) {
if (has_created_bo)
close_gem_handle(m_ctx_fd, m_bo_handle);
}
VACCEL_THROW_MSG(-saved_errno,
"Map bo failed, errno %d, %s, to map startaddr 0x%lx, map_offset 0x%lx, map_size 0x%lx",
saved_errno, strerror(saved_errno), va_to_map, m_map_offset, m_map_size);
}
m_vaddr = reinterpret_cast<uint64_t>(va);

if (req->map_align && m_vaddr > resv_vaddr)
if (!mmap_target && req->map_align && m_vaddr > resv_vaddr)
::munmap(resv_va, static_cast<size_t>(m_vaddr - resv_vaddr));
if (resv_vaddr + resv_size > m_vaddr + m_map_size)
if (!mmap_target && resv_vaddr + resv_size > m_vaddr + m_map_size)
munmap(reinterpret_cast<void *>(m_vaddr + m_map_size),
static_cast<size_t>(resv_vaddr + resv_size - m_vaddr - m_map_size));
vxdna_dbg("Created BO with resource: type=%u, res_id=%u", req->bo_type, req->res_id);
vxdna_dbg("Created BO with resource: type=%u, res_id=%u, vaddr=0x%lx",
req->bo_type, req->res_id, m_vaddr);
}

vxdna_bo::
Expand Down Expand Up @@ -420,12 +428,50 @@ void
vxdna_context::
create_bo(const struct amdxdna_ccmd_create_bo_req *req)
{
if (m_heap_destroyed &&
(req->bo_type == AMDXDNA_BO_DEV || req->bo_type == AMDXDNA_BO_DEV_HEAP))
VACCEL_THROW_MSG(-EINVAL, "Heap destroyed, cannot allocate type %u", req->bo_type);

std::shared_ptr<vxdna_bo> xdna_bo;
if (req->bo_type != AMDXDNA_BO_DEV) {
auto res = get_device().get_resource(req->res_id);
if (!res)
VACCEL_THROW_MSG(-EINVAL, "Res: %u not found", req->res_id);
xdna_bo = std::make_shared<vxdna_bo>(res, get_fd(), req);

void *mmap_target = nullptr;
if (req->bo_type == AMDXDNA_BO_DEV_HEAP) {
if (!m_heap_base) {
size_t align = req->map_align ? req->map_align : 1;
size_t resv_sz = HEAP_MAX_SIZE + align;
void *p = ::mmap(0, resv_sz, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (p == MAP_FAILED) {
VACCEL_THROW_MSG(-ENOMEM,
"Failed to reserve heap VA range (%zu bytes)",
resv_sz);
}
auto base = reinterpret_cast<uintptr_t>(p);
auto aligned = (base + align - 1) & ~(align - 1);

if (aligned > base)
::munmap(p, aligned - base);
size_t tail = (base + resv_sz) - (aligned + HEAP_MAX_SIZE);
if (tail > 0)
::munmap(reinterpret_cast<void *>(aligned + HEAP_MAX_SIZE), tail);

m_heap_base = reinterpret_cast<void *>(aligned);
vxdna_dbg("Reserved heap VA range: base=%p, size=0x%zx, align=0x%zx",
m_heap_base, HEAP_MAX_SIZE, align);
}
if (!req->size || req->size > HEAP_MAX_SIZE - m_heap_cur_offset)
VACCEL_THROW_MSG(-ENOSPC,
"Heap expansion rejected: size 0x%lx, remaining 0x%zx",
(unsigned long)req->size,
HEAP_MAX_SIZE - m_heap_cur_offset);
mmap_target = static_cast<char *>(m_heap_base) + m_heap_cur_offset;
}

xdna_bo = std::make_shared<vxdna_bo>(res, get_fd(), req, mmap_target);
} else {
xdna_bo = std::make_shared<vxdna_bo>(get_fd(), req);
}
Expand All @@ -440,6 +486,9 @@ create_bo(const struct amdxdna_ccmd_create_bo_req *req)
VACCEL_THROW_MSG(-EINVAL, "Resp resource not found for context %u", get_id());
(void)resp_res->write(req->hdr.rsp_off, &rsp, sizeof(rsp));
add_bo(std::move(xdna_bo));

if (req->bo_type == AMDXDNA_BO_DEV_HEAP)
m_heap_cur_offset += req->size;
vxdna_dbg("Created bo: handle=%u, xdna_addr=%lu", rsp.handle, rsp.xdna_addr);
}

Expand All @@ -454,6 +503,9 @@ void
vxdna_context::
remove_bo(uint32_t handle)
{
auto bo = m_bo_table.lookup(handle);
if (bo && bo->get_type() == AMDXDNA_BO_DEV_HEAP)
m_heap_destroyed = true;
vxdna_dbg("Removing bo: handle=%u", handle);
m_bo_table.erase(handle);
}
Expand Down Expand Up @@ -674,8 +726,11 @@ vxdna_context::
write_err_rsp(int err)
{
auto resp_res = get_resp_res();
if (!resp_res)
VACCEL_THROW_MSG(-EINVAL, "Resp resource not found for context %u", get_id());
if (!resp_res) {
vxdna_err("write_err_rsp: no resp resource for ctx %u, err %d dropped",
get_id(), err);
return;
}
struct amdxdna_ccmd_rsp rsp = {};
rsp.ret = err;
rsp.base.len = sizeof(rsp);
Expand Down
Loading