diff --git a/include/API/CommandBuffer.h b/include/API/CommandBuffer.h new file mode 100644 index 000000000..5a49f6a0e --- /dev/null +++ b/include/API/CommandBuffer.h @@ -0,0 +1,44 @@ +//===- CommandBuffer.h - Offload Command Buffer API -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOADTEST_API_COMMANDBUFFER_H +#define OFFLOADTEST_API_COMMANDBUFFER_H + +#include "API/API.h" + +#include + +namespace offloadtest { + +class CommandBuffer { + GPUAPI API; + +public: + explicit CommandBuffer(GPUAPI API) : API(API) {} + virtual ~CommandBuffer() = default; + CommandBuffer(const CommandBuffer &) = delete; + CommandBuffer &operator=(const CommandBuffer &) = delete; + + GPUAPI getAPI() const { return API; } + + template T &as() { + assert(API == T::BackendAPI && "CommandBuffer backend mismatch"); + return static_cast(*this); + } + template const T &as() const { + assert(API == T::BackendAPI && "CommandBuffer backend mismatch"); + return static_cast(*this); + } +}; + +} // namespace offloadtest + +#endif // OFFLOADTEST_API_COMMANDBUFFER_H diff --git a/include/API/Device.h b/include/API/Device.h index 6cdfcba7b..7f9b316fb 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -16,8 +16,10 @@ #include "API/API.h" #include "API/Capabilities.h" +#include "API/CommandBuffer.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Error.h" #include #include @@ -99,6 +101,12 @@ class Device { size_t SizeInBytes) = 0; virtual void printExtra(llvm::raw_ostream &OS) {} + virtual llvm::Expected> createCommandBuffer() { + return llvm::createStringError( + std::errc::not_supported, + "createCommandBuffer not implemented for this backend"); + } + virtual ~Device() = 0; llvm::StringRef getDescription() const { return Description; } diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 14046aa1e..0e82aa37d 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -389,6 +389,36 @@ class DXQueue : public offloadtest::Queue { } }; +class DXCommandBuffer : public offloadtest::CommandBuffer { +public: + static constexpr GPUAPI BackendAPI = GPUAPI::DirectX; + + ComPtr Allocator; + ComPtr CmdList; + + static llvm::Expected> + create(ComPtr Device) { + auto CB = std::unique_ptr(new DXCommandBuffer()); + if (auto Err = HR::toError( + Device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(&CB->Allocator)), + "Failed to create command allocator.")) + return Err; + if (auto Err = HR::toError( + Device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, + CB->Allocator.Get(), nullptr, + IID_PPV_ARGS(&CB->CmdList)), + "Failed to create command list.")) + return Err; + return CB; + } + + ~DXCommandBuffer() override = default; + +private: + DXCommandBuffer() : CommandBuffer(GPUAPI::DirectX) {} +}; + class DXDevice : public offloadtest::Device { private: ComPtr Adapter; @@ -420,8 +450,7 @@ class DXDevice : public offloadtest::Device { ComPtr RootSig; ComPtr DescHeap; ComPtr PSO; - ComPtr Allocator; - ComPtr CmdList; + std::unique_ptr CB; std::unique_ptr Fence; // Resources for graphics pipelines. @@ -683,19 +712,9 @@ class DXDevice : public offloadtest::Device { return llvm::Error::success(); } - llvm::Error createCommandStructures(InvocationState &IS) { - if (auto Err = HR::toError( - Device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, - IID_PPV_ARGS(&IS.Allocator)), - "Failed to create command allocator.")) - return Err; - if (auto Err = HR::toError( - Device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, - IS.Allocator.Get(), nullptr, - IID_PPV_ARGS(&IS.CmdList)), - "Failed to create command list.")) - return Err; - return llvm::Error::success(); + llvm::Expected> + createCommandBuffer() override { + return DXCommandBuffer::create(Device); } void addResourceUploadCommands(Resource &R, InvocationState &IS, @@ -712,10 +731,10 @@ class DXDevice : public offloadtest::Device { const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(Destination.Get(), 0); const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(Source.Get(), Footprint); - IS.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); } else - IS.CmdList->CopyBufferRegion(Destination.Get(), 0, Source.Get(), 0, - R.size()); + IS.CB->CmdList->CopyBufferRegion(Destination.Get(), 0, Source.Get(), 0, + R.size()); addUploadEndBarrier(IS, Destination, R.isReadWrite()); } @@ -1182,7 +1201,7 @@ class DXDevice : public offloadtest::Device { {D3D12_RESOURCE_TRANSITION_BARRIER{ R.Get(), D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST}}}; - IS.CmdList->ResourceBarrier(1, &Barrier); + IS.CB->CmdList->ResourceBarrier(1, &Barrier); } void addUploadEndBarrier(InvocationState &IS, ComPtr R, @@ -1195,21 +1214,21 @@ class DXDevice : public offloadtest::Device { D3D12_RESOURCE_STATE_COPY_DEST, IsUAV ? D3D12_RESOURCE_STATE_UNORDERED_ACCESS : D3D12_RESOURCE_STATE_GENERIC_READ}}}; - IS.CmdList->ResourceBarrier(1, &Barrier); + IS.CB->CmdList->ResourceBarrier(1, &Barrier); } void addReadbackBeginBarrier(InvocationState &IS, ComPtr R) { const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( R.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); - IS.CmdList->ResourceBarrier(1, &Barrier); + IS.CB->CmdList->ResourceBarrier(1, &Barrier); } void addReadbackEndBarrier(InvocationState &IS, ComPtr R) { const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( R.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - IS.CmdList->ResourceBarrier(1, &Barrier); + IS.CB->CmdList->ResourceBarrier(1, &Barrier); } llvm::Error waitForSignal(InvocationState &IS) { @@ -1231,11 +1250,11 @@ class DXDevice : public offloadtest::Device { } llvm::Error executeCommandList(InvocationState &IS) { - if (auto Err = - HR::toError(IS.CmdList->Close(), "Failed to close command list.")) + if (auto Err = HR::toError(IS.CB->CmdList->Close(), + "Failed to close command list.")) return Err; - ID3D12CommandList *const CmdLists[] = {IS.CmdList.Get()}; + ID3D12CommandList *const CmdLists[] = {IS.CB->CmdList.Get()}; GraphicsQueue.Queue->ExecuteCommandLists(1, CmdLists); return waitForSignal(IS); @@ -1245,11 +1264,11 @@ class DXDevice : public offloadtest::Device { CD3DX12_GPU_DESCRIPTOR_HANDLE Handle; if (IS.DescHeap) { ID3D12DescriptorHeap *const Heaps[] = {IS.DescHeap.Get()}; - IS.CmdList->SetDescriptorHeaps(1, Heaps); + IS.CB->CmdList->SetDescriptorHeaps(1, Heaps); Handle = IS.DescHeap->GetGPUDescriptorHandleForHeapStart(); } - IS.CmdList->SetComputeRootSignature(IS.RootSig.Get()); - IS.CmdList->SetPipelineState(IS.PSO.Get()); + IS.CB->CmdList->SetComputeRootSignature(IS.RootSig.Get()); + IS.CB->CmdList->SetPipelineState(IS.PSO.Get()); const uint32_t Inc = Device->GetDescriptorHandleIncrementSize( D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); @@ -1269,14 +1288,15 @@ class DXDevice : public offloadtest::Device { "Root constant cannot refer to resource arrays."); const uint32_t NumValues = Constant.BufferPtr->size() / sizeof(uint32_t); - IS.CmdList->SetComputeRoot32BitConstants( + IS.CB->CmdList->SetComputeRoot32BitConstants( RootParamIndex++, NumValues, Constant.BufferPtr->Data.back().get(), ConstantOffset); ConstantOffset += NumValues; break; } case dx::RootParamKind::DescriptorTable: - IS.CmdList->SetComputeRootDescriptorTable(RootParamIndex++, Handle); + IS.CB->CmdList->SetComputeRootDescriptorTable(RootParamIndex++, + Handle); Handle.Offset(P.Sets[DescriptorTableIndex++].Resources.size(), Inc); break; case dx::RootParamKind::RootDescriptor: @@ -1287,17 +1307,17 @@ class DXDevice : public offloadtest::Device { "Root descriptor cannot refer to resource arrays."); switch (getDXKind(RootDescIt->first->Kind)) { case SRV: - IS.CmdList->SetComputeRootShaderResourceView( + IS.CB->CmdList->SetComputeRootShaderResourceView( RootParamIndex++, RootDescIt->second.back().Buffer->GetGPUVirtualAddress()); break; case UAV: - IS.CmdList->SetComputeRootUnorderedAccessView( + IS.CB->CmdList->SetComputeRootUnorderedAccessView( RootParamIndex++, RootDescIt->second.back().Buffer->GetGPUVirtualAddress()); break; case CBV: - IS.CmdList->SetComputeRootConstantBufferView( + IS.CB->CmdList->SetComputeRootConstantBufferView( RootParamIndex++, RootDescIt->second.back().Buffer->GetGPUVirtualAddress()); break; @@ -1313,7 +1333,7 @@ class DXDevice : public offloadtest::Device { // descriptor set layout. This is to make it easier to write tests that // don't need complicated root signatures. for (uint32_t Idx = 0u; Idx < P.Sets.size(); ++Idx) { - IS.CmdList->SetComputeRootDescriptorTable(Idx, Handle); + IS.CB->CmdList->SetComputeRootDescriptorTable(Idx, Handle); Handle.Offset(P.Sets[Idx].Resources.size(), Inc); } } @@ -1321,7 +1341,7 @@ class DXDevice : public offloadtest::Device { const llvm::ArrayRef DispatchSize = llvm::ArrayRef(P.Shaders[0].DispatchSize); - IS.CmdList->Dispatch(DispatchSize[0], DispatchSize[1], DispatchSize[2]); + IS.CB->CmdList->Dispatch(DispatchSize[0], DispatchSize[1], DispatchSize[2]); auto CopyBackResource = [&IS, this](ResourcePair &R) { if (R.first->isTexture()) { @@ -1338,7 +1358,7 @@ class DXDevice : public offloadtest::Device { const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(RS.Readback.Get(), Footprint); const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(RS.Buffer.Get(), 0); - IS.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); addReadbackEndBarrier(IS, RS.Buffer); } return; @@ -1347,7 +1367,7 @@ class DXDevice : public offloadtest::Device { if (RS.Readback == nullptr) continue; addReadbackBeginBarrier(IS, RS.Buffer); - IS.CmdList->CopyResource(RS.Readback.Get(), RS.Buffer.Get()); + IS.CB->CmdList->CopyResource(RS.Readback.Get(), RS.Buffer.Get()); addReadbackEndBarrier(IS, RS.Buffer); } }; @@ -1527,8 +1547,8 @@ class DXDevice : public offloadtest::Device { VBView.SizeInBytes = static_cast(VBSize); VBView.StrideInBytes = P.Bindings.getVertexStride(); - IS.CmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - IS.CmdList->IASetVertexBuffers(0, 1, &VBView); + IS.CB->CmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + IS.CB->CmdList->IASetVertexBuffers(0, 1, &VBView); return llvm::Error::success(); } @@ -1606,16 +1626,16 @@ class DXDevice : public offloadtest::Device { IS.RTVHeap->GetCPUDescriptorHandleForHeapStart(); Device->CreateRenderTargetView(IS.RT.Get(), nullptr, RTVHandle); - IS.CmdList->SetGraphicsRootSignature(IS.RootSig.Get()); + IS.CB->CmdList->SetGraphicsRootSignature(IS.RootSig.Get()); if (IS.DescHeap) { ID3D12DescriptorHeap *const Heaps[] = {IS.DescHeap.Get()}; - IS.CmdList->SetDescriptorHeaps(1, Heaps); - IS.CmdList->SetGraphicsRootDescriptorTable( + IS.CB->CmdList->SetDescriptorHeaps(1, Heaps); + IS.CB->CmdList->SetGraphicsRootDescriptorTable( 0, IS.DescHeap->GetGPUDescriptorHandleForHeapStart()); } - IS.CmdList->SetPipelineState(IS.PSO.Get()); + IS.CB->CmdList->SetPipelineState(IS.PSO.Get()); - IS.CmdList->OMSetRenderTargets(1, &RTVHandle, false, nullptr); + IS.CB->CmdList->OMSetRenderTargets(1, &RTVHandle, false, nullptr); D3D12_VIEWPORT VP = {}; VP.Width = @@ -1626,19 +1646,19 @@ class DXDevice : public offloadtest::Device { VP.MaxDepth = 1.0f; VP.TopLeftX = 0.0f; VP.TopLeftY = 0.0f; - IS.CmdList->RSSetViewports(1, &VP); + IS.CB->CmdList->RSSetViewports(1, &VP); const D3D12_RECT Scissor = {0, 0, static_cast(VP.Width), static_cast(VP.Height)}; - IS.CmdList->RSSetScissorRects(1, &Scissor); + IS.CB->CmdList->RSSetScissorRects(1, &Scissor); - IS.CmdList->DrawInstanced(P.Bindings.getVertexCount(), 1, 0, 0); + IS.CB->CmdList->DrawInstanced(P.Bindings.getVertexCount(), 1, 0, 0); // Transition the render target to copy source and copy to the readback // buffer. const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( IS.RT.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); - IS.CmdList->ResourceBarrier(1, &Barrier); + IS.CB->CmdList->ResourceBarrier(1, &Barrier); const CPUBuffer &B = *P.Bindings.RTargetBufferPtr; const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ @@ -1649,7 +1669,7 @@ class DXDevice : public offloadtest::Device { const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(IS.RTReadback.Get(), Footprint); const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(IS.RT.Get(), 0); - IS.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); auto CopyBackResource = [&IS, this](ResourcePair &R) { if (R.first->isTexture()) { @@ -1666,7 +1686,7 @@ class DXDevice : public offloadtest::Device { const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(RS.Readback.Get(), Footprint); const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(RS.Buffer.Get(), 0); - IS.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); addReadbackEndBarrier(IS, RS.Buffer); } return; @@ -1675,7 +1695,7 @@ class DXDevice : public offloadtest::Device { if (RS.Readback == nullptr) continue; addReadbackBeginBarrier(IS, RS.Buffer); - IS.CmdList->CopyResource(RS.Readback.Get(), RS.Buffer.Get()); + IS.CB->CmdList->CopyResource(RS.Readback.Get(), RS.Buffer.Get()); addReadbackEndBarrier(IS, RS.Buffer); } }; @@ -1726,9 +1746,11 @@ class DXDevice : public offloadtest::Device { return Err; llvm::outs() << "Descriptor heap created.\n"; - if (auto Err = createCommandStructures(State)) - return Err; - llvm::outs() << "Command structures created.\n"; + auto CBOrErr = DXCommandBuffer::create(Device); + if (!CBOrErr) + return CBOrErr.takeError(); + State.CB = std::move(*CBOrErr); + llvm::outs() << "Command buffer created.\n"; auto FenceOrErr = createFence("Fence"); if (!FenceOrErr) diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index dad8a6d04..dd1d5feb6 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -131,6 +131,28 @@ class MTLBuffer : public offloadtest::Buffer { } }; +class MTLCommandBuffer : public offloadtest::CommandBuffer { +public: + static constexpr GPUAPI BackendAPI = GPUAPI::Metal; + + MTL::CommandBuffer *CmdBuffer = nullptr; + + static llvm::Expected> + create(MTL::CommandQueue *Queue) { + auto CB = std::unique_ptr(new MTLCommandBuffer()); + CB->CmdBuffer = Queue->commandBuffer(); + if (!CB->CmdBuffer) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed to create Metal command buffer."); + return CB; + } + + ~MTLCommandBuffer() override = default; + +private: + MTLCommandBuffer() : CommandBuffer(GPUAPI::Metal) {} +}; + class MTLDevice : public offloadtest::Device { Capabilities Caps; MTL::Device *Device; @@ -160,7 +182,7 @@ class MTLDevice : public offloadtest::Device { llvm::SmallVector Textures; llvm::SmallVector Buffers; MTL::Texture *FrameBufferTexture = nullptr; - MTL::CommandBuffer *CmdBuffer = nullptr; + std::unique_ptr CB; std::unique_ptr Fence; }; @@ -400,10 +422,8 @@ class MTLDevice : public offloadtest::Device { } llvm::Error createComputeCommands(Pipeline &P, InvocationState &IS) { - IS.CmdBuffer = GraphicsQueue.Queue->commandBuffer(); - MTL::ComputeCommandEncoder *CmdEncoder = - IS.CmdBuffer->computeCommandEncoder(); + IS.CB->CmdBuffer->computeCommandEncoder(); CmdEncoder->setComputePipelineState(IS.ComputePipeline); CmdEncoder->setBuffer(IS.ArgBuffer, 0, 2); @@ -468,8 +488,6 @@ class MTLDevice : public offloadtest::Device { } llvm::Error createGraphicsCommands(Pipeline &P, InvocationState &IS) { - IS.CmdBuffer = GraphicsQueue.Queue->commandBuffer(); - MTL::RenderPassDescriptor *Desc = MTL::RenderPassDescriptor::alloc()->init(); @@ -499,7 +517,7 @@ class MTLDevice : public offloadtest::Device { Desc->colorAttachments()->setObject(CADesc, 0); MTL::RenderCommandEncoder *CmdEncoder = - IS.CmdBuffer->renderCommandEncoder(Desc); + IS.CB->CmdBuffer->renderCommandEncoder(Desc); CmdEncoder->setRenderPipelineState(IS.RenderPipeline); // Explicitly set viewport to texture dimensions. @@ -525,14 +543,14 @@ class MTLDevice : public offloadtest::Device { const uint64_t CurrentCounter = FenceCounter + 1; auto *F = static_cast(IS.Fence.get()); - IS.CmdBuffer->encodeSignalEvent(F->Event, CurrentCounter); - IS.CmdBuffer->commit(); + IS.CB->CmdBuffer->encodeSignalEvent(F->Event, CurrentCounter); + IS.CB->CmdBuffer->commit(); if (auto Err = IS.Fence->waitForCompletion(CurrentCounter)) return Err; // Check and surface any errors that occurred during execution. - NS::Error *CBErr = IS.CmdBuffer->error(); + NS::Error *CBErr = IS.CB->CmdBuffer->error(); if (CBErr) return toError(CBErr); @@ -632,9 +650,19 @@ class MTLDevice : public offloadtest::Device { return std::make_shared(Buf, Name, Desc, SizeInBytes); } + llvm::Expected> + createCommandBuffer() override { + return MTLCommandBuffer::create(GraphicsQueue.Queue); + } + llvm::Error executeProgram(Pipeline &P) override { InvocationState IS; + auto CBOrErr = MTLCommandBuffer::create(GraphicsQueue.Queue); + if (!CBOrErr) + return CBOrErr.takeError(); + IS.CB = std::move(*CBOrErr); + auto FenceOrErr = createFence("Fence"); if (!FenceOrErr) return FenceOrErr.takeError(); diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 81df567bb..99fcab56a 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -451,6 +451,56 @@ class VulkanQueue : public offloadtest::Queue { : Queue(Q), QueueFamilyIdx(QueueFamilyIdx) {} }; +class VulkanCommandBuffer : public offloadtest::CommandBuffer { +public: + static constexpr GPUAPI BackendAPI = GPUAPI::Vulkan; + + VkDevice Device = VK_NULL_HANDLE; + // Owned per command buffer so that recording, submission, and lifetime + // management of each command buffer are independently safe without external + // synchronization. + VkCommandPool CmdPool = VK_NULL_HANDLE; + VkCommandBuffer CmdBuffer = VK_NULL_HANDLE; + + static llvm::Expected> + create(VkDevice Device, uint32_t QueueFamilyIdx) { + auto CB = std::unique_ptr(new VulkanCommandBuffer()); + CB->Device = Device; + + VkCommandPoolCreateInfo CmdPoolInfo = {}; + CmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + CmdPoolInfo.queueFamilyIndex = QueueFamilyIdx; + CmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + if (vkCreateCommandPool(Device, &CmdPoolInfo, nullptr, &CB->CmdPool)) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Could not create command pool."); + + VkCommandBufferAllocateInfo CBufAllocInfo = {}; + CBufAllocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + CBufAllocInfo.commandPool = CB->CmdPool; + CBufAllocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + CBufAllocInfo.commandBufferCount = 1; + if (vkAllocateCommandBuffers(Device, &CBufAllocInfo, &CB->CmdBuffer)) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Could not create command buffer."); + + VkCommandBufferBeginInfo BufferInfo = {}; + BufferInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + if (vkBeginCommandBuffer(CB->CmdBuffer, &BufferInfo)) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Could not begin command buffer."); + return CB; + } + + ~VulkanCommandBuffer() override { + if (CmdPool != VK_NULL_HANDLE) + vkDestroyCommandPool(Device, CmdPool, nullptr); + } + +private: + VulkanCommandBuffer() : CommandBuffer(GPUAPI::Vulkan) {} +}; + class VulkanDevice : public offloadtest::Device { private: std::shared_ptr Instance; @@ -532,8 +582,7 @@ class VulkanDevice : public offloadtest::Device { }; struct InvocationState { - VkCommandPool CmdPool = VK_NULL_HANDLE; - VkCommandBuffer CmdBuffer = VK_NULL_HANDLE; + std::unique_ptr CB; VkPipelineLayout PipelineLayout = VK_NULL_HANDLE; VkDescriptorPool Pool = VK_NULL_HANDLE; VkPipelineCache PipelineCache = VK_NULL_HANDLE; @@ -850,33 +899,9 @@ class VulkanDevice : public offloadtest::Device { } public: - llvm::Error createDevice(InvocationState &IS) { - VkCommandPoolCreateInfo CmdPoolInfo = {}; - CmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - CmdPoolInfo.queueFamilyIndex = GraphicsQueue.QueueFamilyIdx; - CmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - - if (vkCreateCommandPool(Device, &CmdPoolInfo, nullptr, &IS.CmdPool)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Could not create command pool."); - return llvm::Error::success(); - } - - llvm::Error createCommandBuffer(InvocationState &IS) { - VkCommandBufferAllocateInfo CBufAllocInfo = {}; - CBufAllocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - CBufAllocInfo.commandPool = IS.CmdPool; - CBufAllocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - CBufAllocInfo.commandBufferCount = 1; - if (vkAllocateCommandBuffers(Device, &CBufAllocInfo, &IS.CmdBuffer)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Could not create command buffer."); - VkCommandBufferBeginInfo BufferInfo = {}; - BufferInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - if (vkBeginCommandBuffer(IS.CmdBuffer, &BufferInfo)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Could not begin command buffer."); - return llvm::Error::success(); + llvm::Expected> + createCommandBuffer() override { + return VulkanCommandBuffer::create(Device, GraphicsQueue.QueueFamilyIdx); } llvm::Expected createBuffer(VkBufferUsageFlags Usage, @@ -1062,8 +1087,8 @@ class VulkanDevice : public offloadtest::Device { return ExDeviceBuf.takeError(); VkBufferCopy Copy = {}; Copy.size = R.size(); - vkCmdCopyBuffer(IS.CmdBuffer, ExHostBuf->Buffer, ExDeviceBuf->Buffer, 1, - &Copy); + vkCmdCopyBuffer(IS.CB->CmdBuffer, ExHostBuf->Buffer, + ExDeviceBuf->Buffer, 1, &Copy); Bundle.ResourceRefs.emplace_back(*ExHostBuf, *ExDeviceBuf); } } @@ -1085,8 +1110,8 @@ class VulkanDevice : public offloadtest::Device { return ExDeviceBuf.takeError(); VkBufferCopy Copy = {}; Copy.size = sizeof(uint32_t); - vkCmdCopyBuffer(IS.CmdBuffer, ExHostBuf->Buffer, ExDeviceBuf->Buffer, 1, - &Copy); + vkCmdCopyBuffer(IS.CB->CmdBuffer, ExHostBuf->Buffer, + ExDeviceBuf->Buffer, 1, &Copy); Bundle.CounterResourceRefs.emplace_back(*ExHostBuf, *ExDeviceBuf); } } @@ -1205,8 +1230,8 @@ class VulkanDevice : public offloadtest::Device { return ExDeviceBuf.takeError(); VkBufferCopy Copy = {}; Copy.size = VertexBuffer.size(); - vkCmdCopyBuffer(IS.CmdBuffer, ExVHostBuf->Buffer, ExDeviceBuf->Buffer, 1, - &Copy); + vkCmdCopyBuffer(IS.CB->CmdBuffer, ExVHostBuf->Buffer, ExDeviceBuf->Buffer, + 1, &Copy); IS.VertexBuffer = ResourceRef(*ExVHostBuf, *ExDeviceBuf); } @@ -1218,7 +1243,7 @@ class VulkanDevice : public offloadtest::Device { static uint64_t FenceCounter = 0; const uint64_t CurrentCounter = FenceCounter + 1; - if (vkEndCommandBuffer(IS.CmdBuffer)) + if (vkEndCommandBuffer(IS.CB->CmdBuffer)) return llvm::createStringError(std::errc::device_or_resource_busy, "Could not end command buffer."); @@ -1233,7 +1258,7 @@ class VulkanDevice : public offloadtest::Device { SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; SubmitInfo.pNext = &TimelineSubmitInfo; SubmitInfo.commandBufferCount = 1; - SubmitInfo.pCommandBuffers = &IS.CmdBuffer; + SubmitInfo.pCommandBuffers = &IS.CB->CmdBuffer; SubmitInfo.signalSemaphoreCount = 1; SubmitInfo.pSignalSemaphores = &F->Semaphore; @@ -1245,7 +1270,7 @@ class VulkanDevice : public offloadtest::Device { if (auto Err = IS.Fence->waitForCompletion(CurrentCounter)) return Err; - vkFreeCommandBuffers(Device, IS.CmdPool, 1, &IS.CmdBuffer); + vkFreeCommandBuffers(Device, IS.CB->CmdPool, 1, &IS.CB->CmdBuffer); FenceCounter = CurrentCounter; return llvm::Error::success(); @@ -2007,11 +2032,11 @@ class VulkanDevice : public offloadtest::Device { for (auto &ResRef : R.ResourceRefs) { ImageBarrier.image = ResRef.Image.Image; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &ImageBarrier); - vkCmdCopyBufferToImage(IS.CmdBuffer, ResRef.Host.Buffer, + vkCmdCopyBufferToImage(IS.CB->CmdBuffer, ResRef.Host.Buffer, ResRef.Image.Image, VK_IMAGE_LAYOUT_GENERAL, Regions.size(), Regions.data()); } @@ -2025,7 +2050,7 @@ class VulkanDevice : public offloadtest::Device { for (auto &ResRef : R.ResourceRefs) { ImageBarrier.image = ResRef.Image.Image; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &ImageBarrier); } @@ -2040,7 +2065,7 @@ class VulkanDevice : public offloadtest::Device { Barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; for (auto &ResRef : R.ResourceRefs) { Barrier.buffer = ResRef.Host.Buffer; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 1, &Barrier, 0, nullptr); } @@ -2071,7 +2096,8 @@ class VulkanDevice : public offloadtest::Device { for (auto &ResRef : R.ResourceRefs) { ImageBarrier.image = ResRef.Image.Image; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &ImageBarrier); } @@ -2100,7 +2126,7 @@ class VulkanDevice : public offloadtest::Device { } for (auto &ResRef : R.ResourceRefs) - vkCmdCopyImageToBuffer(IS.CmdBuffer, ResRef.Image.Image, + vkCmdCopyImageToBuffer(IS.CB->CmdBuffer, ResRef.Image.Image, VK_IMAGE_LAYOUT_GENERAL, ResRef.Host.Buffer, Regions.size(), Regions.data()); @@ -2113,7 +2139,7 @@ class VulkanDevice : public offloadtest::Device { Barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; for (auto &ResRef : R.ResourceRefs) { Barrier.buffer = ResRef.Host.Buffer; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, 1, &Barrier, 0, nullptr); } @@ -2129,21 +2155,22 @@ class VulkanDevice : public offloadtest::Device { Barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; for (auto &ResRef : R.ResourceRefs) { Barrier.buffer = ResRef.Host.Buffer; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 1, &Barrier, 0, nullptr); } VkBufferCopy CopyRegion = {}; CopyRegion.size = R.size(); for (auto &ResRef : R.ResourceRefs) - vkCmdCopyBuffer(IS.CmdBuffer, ResRef.Device.Buffer, ResRef.Host.Buffer, 1, - &CopyRegion); + vkCmdCopyBuffer(IS.CB->CmdBuffer, ResRef.Device.Buffer, + ResRef.Host.Buffer, 1, &CopyRegion); VkBufferCopy CounterCopyRegion = {}; CounterCopyRegion.size = sizeof(uint32_t); for (auto &ResRef : R.CounterResourceRefs) - vkCmdCopyBuffer(IS.CmdBuffer, ResRef.Device.Buffer, ResRef.Host.Buffer, 1, - &CounterCopyRegion); + vkCmdCopyBuffer(IS.CB->CmdBuffer, ResRef.Device.Buffer, + ResRef.Host.Buffer, 1, &CounterCopyRegion); Barrier.size = VK_WHOLE_SIZE; Barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; @@ -2152,13 +2179,13 @@ class VulkanDevice : public offloadtest::Device { Barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; for (auto &ResRef : R.ResourceRefs) { Barrier.buffer = ResRef.Host.Buffer; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, 1, &Barrier, 0, nullptr); } for (auto &ResRef : R.CounterResourceRefs) { Barrier.buffer = ResRef.Host.Buffer; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, 1, &Barrier, 0, nullptr); } @@ -2184,7 +2211,7 @@ class VulkanDevice : public offloadtest::Device { RenderPassBeginInfo.clearValueCount = 2; RenderPassBeginInfo.pClearValues = ClearValues; - vkCmdBeginRenderPass(IS.CmdBuffer, &RenderPassBeginInfo, + vkCmdBeginRenderPass(IS.CB->CmdBuffer, &RenderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE); VkViewport Viewport = {}; @@ -2196,28 +2223,28 @@ class VulkanDevice : public offloadtest::Device { static_cast(P.Bindings.RTargetBufferPtr->OutputProps.Height); Viewport.minDepth = 0.0f; Viewport.maxDepth = 1.0f; - vkCmdSetViewport(IS.CmdBuffer, 0, 1, &Viewport); + vkCmdSetViewport(IS.CB->CmdBuffer, 0, 1, &Viewport); VkRect2D Scissor = {}; Scissor.offset = {0, 0}; Scissor.extent.width = P.Bindings.RTargetBufferPtr->OutputProps.Width; Scissor.extent.height = P.Bindings.RTargetBufferPtr->OutputProps.Height; - vkCmdSetScissor(IS.CmdBuffer, 0, 1, &Scissor); + vkCmdSetScissor(IS.CB->CmdBuffer, 0, 1, &Scissor); } const VkPipelineBindPoint BindPoint = P.isGraphics() ? VK_PIPELINE_BIND_POINT_GRAPHICS : VK_PIPELINE_BIND_POINT_COMPUTE; - vkCmdBindPipeline(IS.CmdBuffer, BindPoint, IS.Pipeline); + vkCmdBindPipeline(IS.CB->CmdBuffer, BindPoint, IS.Pipeline); if (IS.DescriptorSets.size() > 0) - vkCmdBindDescriptorSets(IS.CmdBuffer, BindPoint, IS.PipelineLayout, 0, + vkCmdBindDescriptorSets(IS.CB->CmdBuffer, BindPoint, IS.PipelineLayout, 0, IS.DescriptorSets.size(), IS.DescriptorSets.data(), 0, 0); for (const auto &PCB : P.PushConstants) { llvm::SmallVector Data; PCB.getContent(Data); - vkCmdPushConstants(IS.CmdBuffer, IS.PipelineLayout, + vkCmdPushConstants(IS.CB->CmdBuffer, IS.PipelineLayout, getShaderStageFlag(PCB.Stage), 0, Data.size(), Data.data()); } @@ -2225,19 +2252,19 @@ class VulkanDevice : public offloadtest::Device { if (P.isCompute()) { const llvm::ArrayRef DispatchSize = llvm::ArrayRef(P.Shaders[0].DispatchSize); - vkCmdDispatch(IS.CmdBuffer, DispatchSize[0], DispatchSize[1], + vkCmdDispatch(IS.CB->CmdBuffer, DispatchSize[0], DispatchSize[1], DispatchSize[2]); llvm::outs() << "Dispatched compute shader: { " << DispatchSize[0] << ", " << DispatchSize[1] << ", " << DispatchSize[2] << " }\n"; } else { VkDeviceSize Offsets[1]{0}; assert(IS.VertexBuffer.has_value()); - vkCmdBindVertexBuffers(IS.CmdBuffer, 0, 1, + vkCmdBindVertexBuffers(IS.CB->CmdBuffer, 0, 1, &IS.VertexBuffer->Device.Buffer, Offsets); // instanceCount must be >=1 to draw; previously was 0 which draws nothing - vkCmdDraw(IS.CmdBuffer, P.Bindings.getVertexCount(), 1, 0, 0); + vkCmdDraw(IS.CB->CmdBuffer, P.Bindings.getVertexCount(), 1, 0, 0); llvm::outs() << "Drew " << P.Bindings.getVertexCount() << " vertices.\n"; - vkCmdEndRenderPass(IS.CmdBuffer); + vkCmdEndRenderPass(IS.CB->CmdBuffer); copyResourceDataToHost(IS, IS.FrameBufferResource); } @@ -2381,9 +2408,6 @@ class VulkanDevice : public offloadtest::Device { if (IS.Pool) vkDestroyDescriptorPool(Device, IS.Pool, nullptr); - - if (IS.CmdPool) - vkDestroyCommandPool(Device, IS.CmdPool, nullptr); } llvm::Error executeProgram(Pipeline &P) override { @@ -2393,20 +2417,20 @@ class VulkanDevice : public offloadtest::Device { llvm::outs() << "Cleanup complete.\n"; }); - if (auto Err = createDevice(State)) - return Err; + auto CBOrErr = + VulkanCommandBuffer::create(Device, GraphicsQueue.QueueFamilyIdx); + if (!CBOrErr) + return CBOrErr.takeError(); + State.CB = std::move(*CBOrErr); + llvm::outs() << "Command buffer created.\n"; auto FenceOrErr = createFence("Fence"); if (!FenceOrErr) return FenceOrErr.takeError(); State.Fence = std::move(*FenceOrErr); - - llvm::outs() << "Physical device created.\n"; if (auto Err = createShaderModules(P, State)) return Err; llvm::outs() << "Shader module created.\n"; - if (auto Err = createCommandBuffer(State)) - return Err; llvm::outs() << "Copy command buffer created.\n"; if (auto Err = createResources(P, State)) return Err; @@ -2422,8 +2446,11 @@ class VulkanDevice : public offloadtest::Device { if (auto Err = executeCommandBuffer(State)) return Err; llvm::outs() << "Executed copy command buffer.\n"; - if (auto Err = createCommandBuffer(State)) - return Err; + auto DispatchCBOrErr = + VulkanCommandBuffer::create(Device, GraphicsQueue.QueueFamilyIdx); + if (!DispatchCBOrErr) + return DispatchCBOrErr.takeError(); + State.CB = std::move(*DispatchCBOrErr); llvm::outs() << "Execute command buffer created.\n"; if (auto Err = createDescriptorPool(P, State)) return Err;