diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index b34f4047..c609aa37 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -101,9 +101,86 @@ static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwct #endif } +static void aie2_hwctx_release_heap(struct amdxdna_hwctx *hwctx) +{ + struct amdxdna_client *client = hwctx->client; + struct amdxdna_hwctx_priv *priv = hwctx->priv; + struct amdxdna_gem_obj *chunk; + + if (!priv->last_heap_chunk_added) + return; + + drm_WARN_ON(&client->xdna->ddev, !mutex_is_locked(&client->mm_lock)); + list_for_each_entry(chunk, &client->dev_heap_chunks, heap_chunk_node) { + amdxdna_gem_unpin(chunk); + drm_gem_object_put(to_gobj(chunk)); + if (chunk == priv->last_heap_chunk_added) + break; + } + priv->last_heap_chunk_added = NULL; +} + +static int aie2_hwctx_map_heap(struct amdxdna_hwctx *hwctx, bool map_all) +{ + struct amdxdna_client *client = hwctx->client; + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_gem_obj *last = hwctx->priv->last_heap_chunk_added; + struct amdxdna_gem_obj *chunk; + bool need_ref = !last; + u64 offset = 0; + u64 addr; + int ret; + + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock)); + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&client->mm_lock)); + + list_for_each_entry(chunk, &client->dev_heap_chunks, heap_chunk_node) { + if (!need_ref && chunk == list_next_entry(last, heap_chunk_node)) + need_ref = true; + + if (need_ref) { + ret = amdxdna_gem_pin(chunk); + if (ret) { + XDNA_ERR(xdna, "Pin chunk for hwctx %d failed, ret %d", + hwctx->fw_ctx_id, ret); + return ret; + } + drm_gem_object_get(to_gobj(chunk)); + } + + if (map_all || need_ref) { + addr = amdxdna_obj_dma_addr(chunk); + + if (!offset) + ret = aie2_map_host_buf(xdna->dev_handle, + hwctx->fw_ctx_id, + addr, chunk->mem.size); + else + ret = aie2_add_host_buf(xdna->dev_handle, + hwctx->fw_ctx_id, + addr, chunk->mem.size); + if (ret) { + XDNA_ERR(xdna, + "Notify FW hwctx %d chunk offset 0x%llx failed, ret %d", + hwctx->fw_ctx_id, offset, ret); + if (need_ref) { + amdxdna_gem_unpin(chunk); + drm_gem_object_put(to_gobj(chunk)); + } + return ret; + } + } + + offset += chunk->mem.size; + hwctx->priv->last_heap_chunk_added = chunk; + } + + return 0; +} + static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx) { - struct amdxdna_gem_obj *heap = hwctx->priv->heap; + struct amdxdna_client *client = hwctx->client; int ret; ret = aie2_create_context(xdna->dev_handle, hwctx); @@ -112,9 +189,9 @@ static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hw goto out; } - ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, - amdxdna_obj_dma_addr(heap), - heap->mem.size); + mutex_lock(&client->mm_lock); + ret = aie2_hwctx_map_heap(hwctx, true); + mutex_unlock(&client->mm_lock); if (ret) { XDNA_ERR(xdna, "Map host buf failed, ret %d", ret); goto out; @@ -690,7 +767,6 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) #endif struct drm_gpu_scheduler *sched; struct amdxdna_hwctx_priv *priv; - struct amdxdna_gem_obj *heap; int i, ret; priv = kzalloc_obj(*hwctx->priv); @@ -698,25 +774,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) return -ENOMEM; hwctx->priv = priv; - mutex_lock(&client->mm_lock); - heap = client->dev_heap; - if (!heap) { - XDNA_ERR(xdna, "The client dev heap object not exist"); - mutex_unlock(&client->mm_lock); - ret = -ENOENT; - goto free_priv; - } - drm_gem_object_get(to_gobj(heap)); - mutex_unlock(&client->mm_lock); - priv->heap = heap; sema_init(&priv->job_sem, HWCTX_MAX_CMDS); - ret = amdxdna_gem_pin(heap); - if (ret) { - XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret); - goto put_heap; - } - for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) { struct amdxdna_gem_obj *abo; struct amdxdna_drm_create_bo args = { @@ -778,18 +837,25 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) goto suspend_put; } - ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id, - amdxdna_obj_dma_addr(heap), - heap->mem.size); + mutex_lock(&client->mm_lock); + if (list_empty(&client->dev_heap_chunks)) { + XDNA_ERR(xdna, "The client dev heap doesn't exist"); + mutex_unlock(&client->mm_lock); + ret = -ENOENT; + goto release_resource; + } + + ret = aie2_hwctx_map_heap(hwctx, true); + mutex_unlock(&client->mm_lock); if (ret) { XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret); - goto release_resource; + goto release_heap_chunks; } ret = aie2_ctx_syncobj_create(hwctx); if (ret) { XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret); - goto release_resource; + goto release_heap_chunks; } amdxdna_pm_suspend_put(xdna); @@ -799,6 +865,10 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) return 0; +release_heap_chunks: + mutex_lock(&client->mm_lock); + aie2_hwctx_release_heap(hwctx); + mutex_unlock(&client->mm_lock); release_resource: aie2_release_resource(hwctx); suspend_put: @@ -815,10 +885,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx) continue; drm_gem_object_put(to_gobj(priv->cmd_buf[i])); } - amdxdna_gem_unpin(heap); -put_heap: - drm_gem_object_put(to_gobj(heap)); -free_priv: + kfree(priv); return ret; } @@ -858,8 +925,10 @@ void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx) for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++) drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx])); - amdxdna_gem_unpin(hwctx->priv->heap); - drm_gem_object_put(to_gobj(hwctx->priv->heap)); + + mutex_lock(&hwctx->client->mm_lock); + aie2_hwctx_release_heap(hwctx); + mutex_unlock(&hwctx->client->mm_lock); mutex_destroy(&hwctx->priv->io_lock); kfree(hwctx->col_list); @@ -1211,6 +1280,11 @@ int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, return ret; } +int aie2_hwctx_heap_change(struct amdxdna_hwctx *hwctx) +{ + return aie2_hwctx_map_heap(hwctx, false); +} + void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq) { diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c index 415c38a7..8603db7b 100644 --- a/drivers/accel/amdxdna/aie2_message.c +++ b/drivers/accel/amdxdna/aie2_message.c @@ -336,25 +336,56 @@ int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwc return ret; } -int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size) +static int aie2_send_host_buf_msgs(struct amdxdna_dev_hdl *ndev, u32 context_id, + u64 addr, u64 size, u32 initial_opcode) { DECLARE_AIE_MSG(map_host_buffer, MSG_OP_MAP_HOST_BUFFER); struct amdxdna_dev *xdna = ndev->aie.xdna; + size_t chunk_size; int ret; - req.context_id = context_id; - req.buf_addr = addr; - req.buf_size = size; - ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg); - if (ret) - return ret; + chunk_size = xdna->dev_info->dev_mem_size; + if (!size || !IS_ALIGNED(size, chunk_size)) { + XDNA_ERR(xdna, "Invalid size 0x%llx for chunk 0x%lx", + size, chunk_size); + return -EINVAL; + } + + msg.opcode = initial_opcode; + do { + req.context_id = context_id; + req.buf_addr = addr; + req.buf_size = chunk_size; + ret = aie_send_mgmt_msg_wait(&ndev->aie, &msg); + if (ret) { + XDNA_ERR(xdna, "fw ctx %d addr 0x%llx size 0x%lx", + context_id, addr, chunk_size); + return ret; + } + + XDNA_DBG(xdna, "fw ctx %d host buf op 0x%x addr 0x%llx size 0x%lx", + context_id, msg.opcode, addr, chunk_size); - XDNA_DBG(xdna, "fw ctx %d map host buf addr 0x%llx size 0x%llx", - context_id, addr, size); + addr += chunk_size; + size -= chunk_size; + msg.opcode = MSG_OP_ADD_HOST_BUFFER; + } while (size); return 0; } +int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size) +{ + return aie2_send_host_buf_msgs(ndev, context_id, addr, size, + MSG_OP_MAP_HOST_BUFFER); +} + +int aie2_add_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size) +{ + return aie2_send_host_buf_msgs(ndev, context_id, addr, size, + MSG_OP_ADD_HOST_BUFFER); +} + static int amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx, void *arg) { u32 *bitmap = arg; @@ -1028,7 +1059,6 @@ int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx, if (msg.opcode == MSG_OP_MAX_OPCODE) return -EOPNOTSUPP; - /* The offset is the accumulated total size of the cmd buffer */ EXEC_MSG_OPS(xdna)->init_chain_req(&req, amdxdna_gem_dev_addr(cmdbuf_abo), offset, ccnt); drm_clflush_virt_range(cmd_buf, offset); @@ -1088,7 +1118,7 @@ int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx, &req, msg.send_size, false); ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT); if (ret) { - XDNA_ERR(hwctx->client->xdna, "Send message failed"); + XDNA_ERR(xdna, "Send message failed"); return ret; } diff --git a/drivers/accel/amdxdna/aie2_msg_priv.h b/drivers/accel/amdxdna/aie2_msg_priv.h index bcc7e300..8c72d079 100644 --- a/drivers/accel/amdxdna/aie2_msg_priv.h +++ b/drivers/accel/amdxdna/aie2_msg_priv.h @@ -33,6 +33,7 @@ enum aie2_msg_opcode { MSG_OP_REGISTER_ASYNC_EVENT_MSG = 0x10C, MSG_OP_UPDATE_PROPERTY = 0x113, MSG_OP_GET_APP_HEALTH = 0x114, + MSG_OP_ADD_HOST_BUFFER = 0x115, MSG_OP_GET_DEV_REVISION = 0x117, MSG_OP_MAX_DRV_OPCODE, MSG_OP_GET_PROTOCOL_VERSION = 0x301, diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c index e36be39e..70cdeb45 100644 --- a/drivers/accel/amdxdna/aie2_pci.c +++ b/drivers/accel/amdxdna/aie2_pci.c @@ -1303,4 +1303,5 @@ const struct amdxdna_dev_ops aie2_ops = { .hmm_invalidate = aie2_hmm_invalidate, .get_array = aie2_get_array, .get_dev_revision = aie2_get_dev_rev, + .hwctx_heap_change = aie2_hwctx_heap_change, }; diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h index 0a1013f4..6e6beeac 100644 --- a/drivers/accel/amdxdna/aie2_pci.h +++ b/drivers/accel/amdxdna/aie2_pci.h @@ -18,8 +18,9 @@ #include "amdxdna_mailbox.h" /* Firmware determines device memory base address and size */ -#define AIE2_DEVM_BASE 0x4000000 -#define AIE2_DEVM_SIZE SZ_64M +#define AIE2_DEVM_BASE 0x4000000 +#define AIE2_DEVM_SIZE SZ_64M +#define AIE2_DEVM_MAX_SIZE SZ_512M #define NDEV2PDEV(ndev) (to_pci_dev((ndev)->aie.xdna->ddev.dev)) @@ -129,7 +130,7 @@ struct dpm_clk_freq { #define HWCTX_MAX_CMDS 4 #define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1)) struct amdxdna_hwctx_priv { - struct amdxdna_gem_obj *heap; + struct amdxdna_gem_obj *last_heap_chunk_added; void *mbox_chann; struct drm_gpu_scheduler sched; @@ -234,6 +235,7 @@ enum aie2_fw_feature { AIE2_PREEMPT, AIE2_TEMPORAL_ONLY, AIE2_APP_HEALTH, + AIE2_ADD_HOST_BUFFER, AIE2_UPDATE_PROPERTY, AIE2_GET_DEV_REVISION, AIE2_FEATURE_MAX @@ -304,6 +306,7 @@ int aie2_get_dev_revision(struct amdxdna_dev_hdl *ndev, enum aie2_dev_revision * int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx); int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); +int aie2_add_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size); int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled); int aie2_query_telemetry(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, @@ -338,6 +341,7 @@ int aie2_hwctx_sync_debug_bo(struct amdxdna_hwctx *hwctx, u32 debug_bo_hdl); void aie2_hwctx_suspend(struct amdxdna_client *client); int aie2_hwctx_resume(struct amdxdna_client *client); int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq); +int aie2_hwctx_heap_change(struct amdxdna_hwctx *hwctx); void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq); /* TDR APIs */ diff --git a/drivers/accel/amdxdna/amdxdna_gem.c b/drivers/accel/amdxdna/amdxdna_gem.c index 560739d6..9224c205 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.c +++ b/drivers/accel/amdxdna/amdxdna_gem.c @@ -19,6 +19,7 @@ #include "amdxdna_ctx.h" #include "amdxdna_gem.h" #include "amdxdna_pci_drv.h" +#include "amdxdna_pm.h" #include "amdxdna_ubuf.h" #ifdef HAVE_6_13_MODULE_IMPORT_NS @@ -51,19 +52,35 @@ amdxdna_gem_heap_alloc(struct amdxdna_gem_obj *abo) goto unlock_out; } - if (mem->size == 0 || mem->size > heap->mem.size) { - XDNA_ERR(xdna, "Invalid dev bo size 0x%lx, limit 0x%lx", - mem->size, heap->mem.size); + if (!mem->size || mem->size > xdna->dev_info->dev_heap_max_size) { + XDNA_ERR(xdna, "Invalid dev bo size 0x%lx, max heap 0x%lx", + mem->size, xdna->dev_info->dev_heap_max_size); ret = -EINVAL; goto unlock_out; } - align = 1 << max(PAGE_SHIFT, xdna->dev_info->dev_mem_buf_shift); - ret = drm_mm_insert_node_generic(&heap->mm, &abo->mm_node, - mem->size, align, - 0, DRM_MM_INSERT_BEST); + ret = drm_mm_insert_node_in_range(&heap->mm, &abo->mm_node, + mem->size, align, 0, + xdna->dev_info->dev_mem_base, + xdna->dev_info->dev_mem_base + + client->total_heap_size, + DRM_MM_INSERT_BEST); if (ret) { - XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret); + if (client->total_heap_size < xdna->dev_info->dev_heap_max_size) { + /* + * Heap can still grow. Return -EAGAIN so userspace + * shim expands the heap and retries the allocation. + * Use XDNA_INFO intentionally — heap expansion is + * infrequent and worth logging for diagnostics. + */ + XDNA_INFO(xdna, "No space in committed heap 0x%lx for bo 0x%lx", + client->total_heap_size, mem->size); + ret = -EAGAIN; + } else { + XDNA_ERR(xdna, "Failed to alloc dev bo 0x%lx, heap at max 0x%lx", + mem->size, client->total_heap_size); + ret = -ENOSPC; + } goto unlock_out; } @@ -112,6 +129,7 @@ amdxdna_gem_create_obj(struct drm_device *dev, size_t size) abo->open_ref = 0; abo->internal = false; INIT_LIST_HEAD(&abo->mem.umap_list); + INIT_LIST_HEAD(&abo->heap_chunk_node); return abo; } @@ -575,7 +593,14 @@ static void amdxdna_gem_obj_free(struct drm_gem_object *gobj) if (abo->pinned) amdxdna_gem_unpin(abo); - if (abo->type == AMDXDNA_BO_DEV_HEAP) + /* + * DEV_HEAP chunks are always removed from client->dev_heap_chunks + * by amdxdna_client_cleanup() before this free callback runs. + * DRM core calls drm_gem_release() before postclose, so abo->client + * is already NULL here. No list removal needed. + */ + if (abo->type == AMDXDNA_BO_DEV_HEAP && + drm_mm_initialized(&abo->mm)) drm_mm_takedown(&abo->mm); if (amdxdna_iova_on(xdna)) @@ -627,16 +652,46 @@ static void amdxdna_gem_obj_close(struct drm_gem_object *gobj, struct drm_file * } } +/* + * Map a DEV BO for CPU access using the containing chunk's kva. + * The chunk is lazily vmapped on first access via amdxdna_gem_vmap(). + * The BO must fit entirely within a single chunk. + */ static int amdxdna_gem_dev_obj_vmap(struct drm_gem_object *obj, struct iosys_map *map) { struct amdxdna_gem_obj *abo = to_xdna_obj(obj); - void *base = amdxdna_gem_vmap(abo->client->dev_heap); - u64 offset = amdxdna_dev_bo_offset(abo); + struct amdxdna_client *client = abo->client; + u64 dev_base = client->xdna->dev_info->dev_mem_base; + u64 bo_start = abo->mm_node.start - dev_base; + u64 bo_end = bo_start + abo->mm_node.size; + struct amdxdna_gem_obj *chunk; + u64 chunk_start = 0; - if (!base) - return -ENOMEM; - iosys_map_set_vaddr(map, base + offset); - return 0; + mutex_lock(&client->mm_lock); + list_for_each_entry(chunk, &client->dev_heap_chunks, heap_chunk_node) { + u64 chunk_end = chunk_start + chunk->mem.size; + + if (bo_start >= chunk_start && bo_end <= chunk_end) { + u64 offset = bo_start - chunk_start; + void *kva = chunk->mem.kva; + + mutex_unlock(&client->mm_lock); + if (!kva) { + kva = amdxdna_gem_vmap(chunk); + if (!kva) + return -ENOMEM; + } + iosys_map_set_vaddr(map, kva + offset); + return 0; + } + chunk_start = chunk_end; + } + mutex_unlock(&client->mm_lock); + + drm_WARN(&client->xdna->ddev, 1, + "DEV BO [0x%llx, 0x%llx) not contained in a single heap chunk", + bo_start, bo_end); + return -EINVAL; } static const struct drm_gem_object_funcs amdxdna_gem_dev_obj_funcs = { @@ -754,6 +809,7 @@ amdxdna_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) abo->attach = attach; abo->dma_buf = dma_buf; abo->type = AMDXDNA_BO_SHARE; + gobj->import_attach = attach; gobj->resv = dma_buf->resv; return gobj; @@ -792,6 +848,33 @@ amdxdna_drm_create_share_bo(struct drm_device *dev, return abo; } +/* + * Expand existing dev heap by adding a new chunk. + * mm_lock is acquired briefly for heap state update only, + * not held during firmware notification. + */ +static int +amdxdna_drm_dev_heap_notify(struct amdxdna_client *client) +{ + struct amdxdna_dev *xdna = client->xdna; + struct amdxdna_hwctx *hwctx; + unsigned long hwctx_id; + + if (!xdna->dev_info->ops->hwctx_heap_change) { + XDNA_ERR(xdna, "Heap expansion not supported"); + return -EOPNOTSUPP; + } + + guard(mutex)(&xdna->dev_lock); + guard(mutex)(&client->mm_lock); + amdxdna_pm_resume_get_locked(xdna); + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) + xdna->dev_info->ops->hwctx_heap_change(hwctx); + amdxdna_pm_suspend_put(xdna); + + return 0; +} + static struct amdxdna_gem_obj * amdxdna_drm_create_dev_heap_bo(struct drm_device *dev, struct amdxdna_drm_create_bo *args, struct drm_file *filp) @@ -799,42 +882,37 @@ amdxdna_drm_create_dev_heap_bo(struct drm_device *dev, struct amdxdna_client *client = filp->driver_priv; struct amdxdna_dev *xdna = to_xdna_dev(dev); struct amdxdna_gem_obj *abo; - int ret; WARN_ON(!is_power_of_2(xdna->dev_info->dev_mem_size)); XDNA_DBG(xdna, "Requested dev heap size 0x%llx", args->size); - if (!args->size || !IS_ALIGNED(args->size, xdna->dev_info->dev_mem_size)) { - XDNA_ERR(xdna, "The dev heap size 0x%llx is not multiple of 0x%lx", - args->size, xdna->dev_info->dev_mem_size); + if (!args->size || !IS_ALIGNED(args->size, xdna->dev_info->dev_mem_size) || + args->size > xdna->dev_info->dev_heap_max_size) { + XDNA_ERR(xdna, "Invalid dev heap size 0x%llx, chunk 0x%lx, max 0x%lx", + args->size, xdna->dev_info->dev_mem_size, + xdna->dev_info->dev_heap_max_size); return ERR_PTR(-EINVAL); } - /* HEAP BO is a special case of SHARE BO. */ abo = amdxdna_drm_create_share_bo(dev, args, filp); if (IS_ERR(abo)) return ERR_CAST(abo); - /* Set up heap for this client. */ - mutex_lock(&client->mm_lock); + drm_gem_object_get(to_gobj(abo)); /* get ref for the chunk */ - if (client->dev_heap) { - XDNA_DBG(client->xdna, "dev heap is already created"); - ret = -EBUSY; - goto mm_unlock; + guard(mutex)((&client->mm_lock)); + + if (!client->dev_heap) { + client->dev_heap = abo; + drm_mm_init(&abo->mm, xdna->dev_info->dev_mem_base, + xdna->dev_info->dev_heap_max_size); } - client->dev_heap = abo; - drm_gem_object_get(to_gobj(abo)); - drm_mm_init(&abo->mm, xdna->dev_info->dev_mem_base, abo->mem.size); + list_add_tail(&abo->heap_chunk_node, &client->dev_heap_chunks); + client->total_heap_size += abo->mem.size; - mutex_unlock(&client->mm_lock); + XDNA_DBG(xdna, "Dev heap chunk created, size 0x%lx", abo->mem.size); return abo; - -mm_unlock: - mutex_unlock(&client->mm_lock); - drm_gem_object_put(to_gobj(abo)); - return ERR_PTR(ret); } struct amdxdna_gem_obj * @@ -868,7 +946,8 @@ amdxdna_drm_create_dev_bo(struct drm_device *dev, ret = amdxdna_gem_heap_alloc(abo); if (ret) { - XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret); + if (ret != -EAGAIN) + XDNA_ERR(xdna, "Failed to alloc dev bo memory, ret %d", ret); amdxdna_gem_destroy_obj(abo); return ERR_PTR(ret); } @@ -900,6 +979,18 @@ int amdxdna_drm_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_f break; case AMDXDNA_BO_DEV: abo = amdxdna_drm_create_dev_bo(dev, args, filp); + if (!IS_ERR(abo)) { + /* + * Notify firmware about the dev heap change + * when dev bo is created, the dev heap should be mapped. + */ + ret = amdxdna_drm_dev_heap_notify(filp->driver_priv); + if (ret) { + XDNA_ERR(xdna, "Notify dev heap change failed, ret %d", ret); + drm_gem_object_put(to_gobj(abo)); + abo = ERR_PTR(ret); + } + } break; default: return -EINVAL; @@ -929,7 +1020,7 @@ int amdxdna_gem_pin_nolock(struct amdxdna_gem_obj *abo) int ret; if (abo->type == AMDXDNA_BO_DEV) - abo = abo->client->dev_heap; + return 0; /* Heap chunks are pinned at expansion time */ if (is_import_bo(abo)) return 0; @@ -954,7 +1045,7 @@ int amdxdna_gem_pin(struct amdxdna_gem_obj *abo) void amdxdna_gem_unpin(struct amdxdna_gem_obj *abo) { if (abo->type == AMDXDNA_BO_DEV) - abo = abo->client->dev_heap; + return; /* Heap chunks are unpinned at client cleanup */ if (is_import_bo(abo)) return; @@ -1017,6 +1108,35 @@ int amdxdna_drm_get_bo_info_ioctl(struct drm_device *dev, void *data, struct drm return ret; } +static void amdxdna_gem_dev_bo_clflush(struct amdxdna_gem_obj *abo, + u64 offset, size_t len) +{ + struct amdxdna_client *client = abo->client; + u64 dev_base = client->xdna->dev_info->dev_mem_base; + u64 flush_start = abo->mm_node.start - dev_base + offset; + u64 flush_end = flush_start + len; + struct amdxdna_gem_obj *chunk; + u64 chunk_start = 0; + + mutex_lock(&client->mm_lock); + list_for_each_entry(chunk, &client->dev_heap_chunks, heap_chunk_node) { + u64 chunk_end = chunk_start + chunk->mem.size; + + if (chunk_start >= flush_end) + break; + + if (chunk_end > flush_start && chunk->mem.kva) { + u64 ov_start = max(flush_start, chunk_start); + u64 ov_end = min(flush_end, chunk_end); + + drm_clflush_virt_range(chunk->mem.kva + ov_start - chunk_start, + ov_end - ov_start); + } + chunk_start = chunk_end; + } + mutex_unlock(&client->mm_lock); +} + /* * The sync bo ioctl is to make sure the CPU cache is in sync with memory. * This is required because NPU is not cache coherent device. CPU cache @@ -1046,10 +1166,10 @@ int amdxdna_drm_sync_bo_ioctl(struct drm_device *dev, goto put_obj; } - if (is_import_bo(abo)) + if (abo->type == AMDXDNA_BO_DEV) + amdxdna_gem_dev_bo_clflush(abo, args->offset, args->size); + else if (is_import_bo(abo)) drm_clflush_sg(abo->base.sgt); - else if (amdxdna_gem_vmap(abo)) - drm_clflush_virt_range(amdxdna_gem_vmap(abo) + args->offset, args->size); else if (abo->base.pages) drm_clflush_pages(abo->base.pages, gobj->size >> PAGE_SHIFT); else diff --git a/drivers/accel/amdxdna/amdxdna_gem.h b/drivers/accel/amdxdna/amdxdna_gem.h index 6e2438f0..496d9065 100644 --- a/drivers/accel/amdxdna/amdxdna_gem.h +++ b/drivers/accel/amdxdna/amdxdna_gem.h @@ -46,7 +46,8 @@ struct amdxdna_gem_obj { int open_ref; /* Below members are initialized when needed */ - struct drm_mm mm; /* For AMDXDNA_BO_DEV_HEAP */ + struct drm_mm mm; /* For first AMDXDNA_BO_DEV_HEAP */ + struct list_head heap_chunk_node; /* Link in client chunk list */ struct drm_mm_node mm_node; /* For AMDXDNA_BO_DEV */ u32 assigned_hwctx; struct dma_buf *dma_buf; diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 611e906e..a52b3abc 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -104,6 +104,7 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp) init_srcu_struct(&client->hwctx_srcu); xa_init_flags(&client->hwctx_xa, XA_FLAGS_ALLOC); mutex_init(&client->mm_lock); + INIT_LIST_HEAD(&client->dev_heap_chunks); mutex_lock(&xdna->dev_lock); list_add_tail(&client->node, &xdna->client_list); @@ -131,8 +132,14 @@ static void amdxdna_client_cleanup(struct amdxdna_client *client) xa_destroy(&client->hwctx_xa); cleanup_srcu_struct(&client->hwctx_srcu); - if (client->dev_heap) - drm_gem_object_put(to_gobj(client->dev_heap)); + while (!list_empty(&client->dev_heap_chunks)) { + struct amdxdna_gem_obj *chunk; + + chunk = list_last_entry(&client->dev_heap_chunks, + struct amdxdna_gem_obj, heap_chunk_node); + list_del_init(&chunk->heap_chunk_node); + drm_gem_object_put(to_gobj(chunk)); /* drop chunk list ref */ + } mutex_destroy(&client->mm_lock); diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/amdxdna/amdxdna_pci_drv.h index e181ac25..c7ab9328 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h @@ -67,6 +67,7 @@ struct amdxdna_dev_ops { int (*set_aie_state)(struct amdxdna_client *client, struct amdxdna_drm_set_state *args); int (*get_array)(struct amdxdna_client *client, struct amdxdna_drm_get_array *args); int (*get_dev_revision)(struct amdxdna_dev *xdna, u32 *rev); + int (*hwctx_heap_change)(struct amdxdna_hwctx *hwctx); }; struct amdxdna_fw_feature_tbl { @@ -93,6 +94,7 @@ struct amdxdna_dev_info { size_t dev_mem_size; const char *default_vbnv; const struct amdxdna_rev_vbnv *rev_vbnv_tbl; + size_t dev_heap_max_size; const struct amdxdna_dev_priv *dev_priv; const struct amdxdna_fw_feature_tbl *fw_feature_tbl; const struct amdxdna_dev_ops *ops; @@ -148,6 +150,8 @@ struct amdxdna_client { struct mutex mm_lock; /* protect memory related */ struct amdxdna_gem_obj *dev_heap; + struct list_head dev_heap_chunks; + size_t total_heap_size; struct iommu_sva *sva; int pasid; diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c index 1a989840..55d576cb 100644 --- a/drivers/accel/amdxdna/npu1_regs.c +++ b/drivers/accel/amdxdna/npu1_regs.c @@ -138,6 +138,7 @@ const struct amdxdna_dev_info dev_npu1_info = { .dev_mem_base = AIE2_DEVM_BASE, .dev_mem_size = AIE2_DEVM_SIZE, .default_vbnv = "RyzenAI-npu1", + .dev_heap_max_size = AIE2_DEVM_SIZE, .device_type = AMDXDNA_DEV_TYPE_KMQ, .dev_priv = &npu1_dev_priv, .fw_feature_tbl = npu1_fw_feature_table, diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c index bb6ff101..d14ae778 100644 --- a/drivers/accel/amdxdna/npu4_regs.c +++ b/drivers/accel/amdxdna/npu4_regs.c @@ -98,6 +98,7 @@ const struct amdxdna_fw_feature_tbl npu4_fw_feature_table[] = { { .features = BIT_U64(AIE2_NPU_COMMAND), .major = 6, .min_minor = 15 }, { .features = BIT_U64(AIE2_UPDATE_PROPERTY), .major = 6, .min_minor = 15 }, { .features = BIT_U64(AIE2_APP_HEALTH), .major = 6, .min_minor = 18 }, + { .features = BIT_U64(AIE2_ADD_HOST_BUFFER), .major = 6, .min_minor = 18 }, { .features = BIT_U64(AIE2_GET_DEV_REVISION), .major = 6, .min_minor = 24 }, { .features = AIE2_ALL_FEATURES, .major = 7 }, { 0 } @@ -203,6 +204,7 @@ const struct amdxdna_dev_info dev_npu4_info = { .dev_mem_base = AIE2_DEVM_BASE, .dev_mem_size = AIE2_DEVM_SIZE, .default_vbnv = "RyzenAI-npu4", + .dev_heap_max_size = AIE2_DEVM_MAX_SIZE, .device_type = AMDXDNA_DEV_TYPE_KMQ, .rev_vbnv_tbl = npu4_rev_vbnv_tbl, .dev_priv = &npu4_dev_priv, diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c index 00d21fd9..c957c06a 100644 --- a/drivers/accel/amdxdna/npu5_regs.c +++ b/drivers/accel/amdxdna/npu5_regs.c @@ -106,6 +106,7 @@ const struct amdxdna_dev_info dev_npu5_info = { .dev_mem_base = AIE2_DEVM_BASE, .dev_mem_size = AIE2_DEVM_SIZE, .default_vbnv = "RyzenAI-npu5", + .dev_heap_max_size = AIE2_DEVM_MAX_SIZE, .device_type = AMDXDNA_DEV_TYPE_KMQ, .rev_vbnv_tbl = npu4_rev_vbnv_tbl, .dev_priv = &npu5_dev_priv, diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c index 705ca8bc..af697072 100644 --- a/drivers/accel/amdxdna/npu6_regs.c +++ b/drivers/accel/amdxdna/npu6_regs.c @@ -107,6 +107,7 @@ const struct amdxdna_dev_info dev_npu6_info = { .dev_mem_base = AIE2_DEVM_BASE, .dev_mem_size = AIE2_DEVM_SIZE, .default_vbnv = "RyzenAI-npu6", + .dev_heap_max_size = AIE2_DEVM_MAX_SIZE, .device_type = AMDXDNA_DEV_TYPE_KMQ, .rev_vbnv_tbl = npu4_rev_vbnv_tbl, .dev_priv = &npu6_dev_priv,