diff --git a/include/API/CommandBuffer.h b/include/API/CommandBuffer.h new file mode 100644 index 000000000..5a49f6a0e --- /dev/null +++ b/include/API/CommandBuffer.h @@ -0,0 +1,44 @@ +//===- CommandBuffer.h - Offload Command Buffer API -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOADTEST_API_COMMANDBUFFER_H +#define OFFLOADTEST_API_COMMANDBUFFER_H + +#include "API/API.h" + +#include + +namespace offloadtest { + +class CommandBuffer { + GPUAPI API; + +public: + explicit CommandBuffer(GPUAPI API) : API(API) {} + virtual ~CommandBuffer() = default; + CommandBuffer(const CommandBuffer &) = delete; + CommandBuffer &operator=(const CommandBuffer &) = delete; + + GPUAPI getAPI() const { return API; } + + template T &as() { + assert(API == T::BackendAPI && "CommandBuffer backend mismatch"); + return static_cast(*this); + } + template const T &as() const { + assert(API == T::BackendAPI && "CommandBuffer backend mismatch"); + return static_cast(*this); + } +}; + +} // namespace offloadtest + +#endif // OFFLOADTEST_API_COMMANDBUFFER_H diff --git a/include/API/Device.h b/include/API/Device.h index f99271233..b927b9f3c 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -16,8 +16,10 @@ #include "API/API.h" #include "API/Capabilities.h" +#include "API/CommandBuffer.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Error.h" #include #include @@ -60,6 +62,19 @@ class Queue { public: virtual ~Queue() = 0; + /// Submit command buffers for execution and block until completion. + // TODO: Return a Fence instead of blocking, once the Fence abstraction + // from PR #1007 is available. + virtual llvm::Error + submit(llvm::SmallVector> CBs) = 0; + + /// Convenience overload for submitting a single command buffer. + llvm::Error submit(std::unique_ptr CB) { + llvm::SmallVector> CBs; + CBs.push_back(std::move(CB)); + return submit(std::move(CBs)); + } + protected: Queue() = default; }; @@ -82,6 +97,12 @@ class Device { size_t SizeInBytes) = 0; virtual void printExtra(llvm::raw_ostream &OS) {} + virtual llvm::Expected> createCommandBuffer() { + return llvm::createStringError( + std::errc::not_supported, + "createCommandBuffer not implemented for this backend"); + } + virtual ~Device() = 0; llvm::StringRef getDescription() const { return Description; } diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 2eec8b777..f1d64972d 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -295,24 +295,136 @@ class DXBuffer : public offloadtest::Buffer { class DXQueue : public offloadtest::Queue { public: + using Queue::submit; + ComPtr Queue; DXQueue(ComPtr Queue) : Queue(Queue) {} - virtual ~DXQueue() {} + ~DXQueue() override {} static llvm::Expected createGraphicsQueue(ComPtr Device) { const D3D12_COMMAND_QUEUE_DESC Desc = {D3D12_COMMAND_LIST_TYPE_DIRECT, 0, D3D12_COMMAND_QUEUE_FLAG_NONE, 0}; - ComPtr Queue; - if (auto Err = - HR::toError(Device->CreateCommandQueue(&Desc, IID_PPV_ARGS(&Queue)), - "Failed to create command queue.")) + ComPtr CmdQueue; + if (auto Err = HR::toError( + Device->CreateCommandQueue(&Desc, IID_PPV_ARGS(&CmdQueue)), + "Failed to create command queue.")) + return Err; + return DXQueue(CmdQueue); + } + + llvm::Error + submit(llvm::SmallVector> CBs) + override; +}; + +class DXCommandBuffer : public offloadtest::CommandBuffer { +public: + static constexpr GPUAPI BackendAPI = GPUAPI::DirectX; + + // Owned per command buffer so that recording, submission, and lifetime + // management of each command buffer are independently safe without external + // synchronization. + ComPtr Allocator; + ComPtr CmdList; + ComPtr Fence; +#ifdef _WIN32 + HANDLE Event = nullptr; +#else // WSL + int Event = -1; +#endif + + static llvm::Expected> + create(ComPtr Device) { + auto CB = std::unique_ptr(new DXCommandBuffer()); + if (auto Err = HR::toError( + Device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(&CB->Allocator)), + "Failed to create command allocator.")) + return Err; + if (auto Err = HR::toError( + Device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, + CB->Allocator.Get(), nullptr, + IID_PPV_ARGS(&CB->CmdList)), + "Failed to create command list.")) + return Err; + if (auto Err = HR::toError(Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, + IID_PPV_ARGS(&CB->Fence)), + "Failed to create fence.")) return Err; - return DXQueue(Queue); +#ifdef _WIN32 + CB->Event = CreateEventA(nullptr, false, false, nullptr); + if (!CB->Event) +#else // WSL + CB->Event = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (CB->Event == -1) +#endif + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed to create event."); + return CB; + } + + ~DXCommandBuffer() override { +#ifdef _WIN32 + if (Event) + CloseHandle(Event); +#else // WSL + if (Event != -1) + close(Event); +#endif } + +private: + DXCommandBuffer() : CommandBuffer(GPUAPI::DirectX) {} }; +llvm::Error DXQueue::submit( + llvm::SmallVector> CBs) { + // This is a hack but it works since this is all single threaded code. + static uint64_t FenceCounter = 0; + + for (auto &CB : CBs) { + auto &DCB = CB->as(); + if (auto Err = + HR::toError(DCB.CmdList->Close(), "Failed to close command list.")) + return Err; + + ID3D12CommandList *const CmdLists[] = {DCB.CmdList.Get()}; + Queue->ExecuteCommandLists(1, CmdLists); + + const uint64_t CurrentCounter = FenceCounter + 1; + if (auto Err = HR::toError(Queue->Signal(DCB.Fence.Get(), CurrentCounter), + "Failed to add signal.")) + return Err; + + if (DCB.Fence->GetCompletedValue() < CurrentCounter) { +#ifdef _WIN32 + HANDLE Event = DCB.Event; +#else // WSL + HANDLE Event = reinterpret_cast(DCB.Event); +#endif + if (auto Err = HR::toError( + DCB.Fence->SetEventOnCompletion(CurrentCounter, Event), + "Failed to register end event.")) + return Err; + +#ifdef _WIN32 + WaitForSingleObject(DCB.Event, INFINITE); +#else // WSL + pollfd PollEvent; + PollEvent.fd = DCB.Event; + PollEvent.events = POLLIN; + PollEvent.revents = 0; + if (poll(&PollEvent, 1, -1) == -1) + return llvm::createStringError( + std::error_code(errno, std::system_category()), strerror(errno)); +#endif + } + FenceCounter = CurrentCounter; + } + return llvm::Error::success(); +} class DXDevice : public offloadtest::Device { private: ComPtr Adapter; @@ -344,14 +456,7 @@ class DXDevice : public offloadtest::Device { ComPtr RootSig; ComPtr DescHeap; ComPtr PSO; - ComPtr Allocator; - ComPtr CmdList; - ComPtr Fence; -#ifdef _WIN32 - HANDLE Event; -#else // WSL - int Event; -#endif + std::unique_ptr CB; // Resources for graphics pipelines. ComPtr RT; @@ -607,19 +712,9 @@ class DXDevice : public offloadtest::Device { return llvm::Error::success(); } - llvm::Error createCommandStructures(InvocationState &IS) { - if (auto Err = HR::toError( - Device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, - IID_PPV_ARGS(&IS.Allocator)), - "Failed to create command allocator.")) - return Err; - if (auto Err = HR::toError( - Device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, - IS.Allocator.Get(), nullptr, - IID_PPV_ARGS(&IS.CmdList)), - "Failed to create command list.")) - return Err; - return llvm::Error::success(); + llvm::Expected> + createCommandBuffer() override { + return DXCommandBuffer::create(Device); } void addResourceUploadCommands(Resource &R, InvocationState &IS, @@ -636,10 +731,10 @@ class DXDevice : public offloadtest::Device { const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(Destination.Get(), 0); const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(Source.Get(), Footprint); - IS.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); } else - IS.CmdList->CopyBufferRegion(Destination.Get(), 0, Source.Get(), 0, - R.size()); + IS.CB->CmdList->CopyBufferRegion(Destination.Get(), 0, Source.Get(), 0, + R.size()); addUploadEndBarrier(IS, Destination, R.isReadWrite()); } @@ -1106,7 +1201,7 @@ class DXDevice : public offloadtest::Device { {D3D12_RESOURCE_TRANSITION_BARRIER{ R.Get(), D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST}}}; - IS.CmdList->ResourceBarrier(1, &Barrier); + IS.CB->CmdList->ResourceBarrier(1, &Barrier); } void addUploadEndBarrier(InvocationState &IS, ComPtr R, @@ -1119,66 +1214,51 @@ class DXDevice : public offloadtest::Device { D3D12_RESOURCE_STATE_COPY_DEST, IsUAV ? D3D12_RESOURCE_STATE_UNORDERED_ACCESS : D3D12_RESOURCE_STATE_GENERIC_READ}}}; - IS.CmdList->ResourceBarrier(1, &Barrier); + IS.CB->CmdList->ResourceBarrier(1, &Barrier); } void addReadbackBeginBarrier(InvocationState &IS, ComPtr R) { const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( R.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE); - IS.CmdList->ResourceBarrier(1, &Barrier); + IS.CB->CmdList->ResourceBarrier(1, &Barrier); } void addReadbackEndBarrier(InvocationState &IS, ComPtr R) { const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( R.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - IS.CmdList->ResourceBarrier(1, &Barrier); - } - - llvm::Error createEvent(InvocationState &IS) { - if (auto Err = HR::toError(Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, - IID_PPV_ARGS(&IS.Fence)), - "Failed to create fence.")) - return Err; -#ifdef _WIN32 - IS.Event = CreateEventA(nullptr, false, false, nullptr); - if (!IS.Event) -#else // WSL - IS.Event = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); - if (IS.Event == -1) -#endif - return llvm::createStringError(std::errc::device_or_resource_busy, - "Failed to create event."); - return llvm::Error::success(); + IS.CB->CmdList->ResourceBarrier(1, &Barrier); } + // waitForSignal is used for tile mapping synchronization, not command buffer + // submission. TODO: Replace with a proper fence abstraction. llvm::Error waitForSignal(InvocationState &IS) { - // This is a hack but it works since this is all single threaded code. + // Reuse the command buffer's fence for a quick queue-level signal/wait. static uint64_t FenceCounter = 0; const uint64_t CurrentCounter = FenceCounter + 1; if (auto Err = HR::toError( - GraphicsQueue.Queue->Signal(IS.Fence.Get(), CurrentCounter), + GraphicsQueue.Queue->Signal(IS.CB->Fence.Get(), CurrentCounter), "Failed to add signal.")) return Err; - if (IS.Fence->GetCompletedValue() < CurrentCounter) { + if (IS.CB->Fence->GetCompletedValue() < CurrentCounter) { #ifdef _WIN32 - HANDLE Event = IS.Event; + HANDLE Event = IS.CB->Event; #else // WSL - HANDLE Event = reinterpret_cast(IS.Event); + HANDLE Event = reinterpret_cast(IS.CB->Event); #endif - if (auto Err = - HR::toError(IS.Fence->SetEventOnCompletion(CurrentCounter, Event), - "Failed to register end event.")) + if (auto Err = HR::toError( + IS.CB->Fence->SetEventOnCompletion(CurrentCounter, Event), + "Failed to register end event.")) return Err; #ifdef _WIN32 - WaitForSingleObject(IS.Event, INFINITE); + WaitForSingleObject(IS.CB->Event, INFINITE); #else // WSL pollfd PollEvent; - PollEvent.fd = IS.Event; + PollEvent.fd = IS.CB->Event; PollEvent.events = POLLIN; PollEvent.revents = 0; if (poll(&PollEvent, 1, -1) == -1) @@ -1191,25 +1271,18 @@ class DXDevice : public offloadtest::Device { } llvm::Error executeCommandList(InvocationState &IS) { - if (auto Err = - HR::toError(IS.CmdList->Close(), "Failed to close command list.")) - return Err; - - ID3D12CommandList *const CmdLists[] = {IS.CmdList.Get()}; - GraphicsQueue.Queue->ExecuteCommandLists(1, CmdLists); - - return waitForSignal(IS); + return GraphicsQueue.submit(std::move(IS.CB)); } llvm::Error createComputeCommands(Pipeline &P, InvocationState &IS) { CD3DX12_GPU_DESCRIPTOR_HANDLE Handle; if (IS.DescHeap) { ID3D12DescriptorHeap *const Heaps[] = {IS.DescHeap.Get()}; - IS.CmdList->SetDescriptorHeaps(1, Heaps); + IS.CB->CmdList->SetDescriptorHeaps(1, Heaps); Handle = IS.DescHeap->GetGPUDescriptorHandleForHeapStart(); } - IS.CmdList->SetComputeRootSignature(IS.RootSig.Get()); - IS.CmdList->SetPipelineState(IS.PSO.Get()); + IS.CB->CmdList->SetComputeRootSignature(IS.RootSig.Get()); + IS.CB->CmdList->SetPipelineState(IS.PSO.Get()); const uint32_t Inc = Device->GetDescriptorHandleIncrementSize( D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); @@ -1229,14 +1302,15 @@ class DXDevice : public offloadtest::Device { "Root constant cannot refer to resource arrays."); const uint32_t NumValues = Constant.BufferPtr->size() / sizeof(uint32_t); - IS.CmdList->SetComputeRoot32BitConstants( + IS.CB->CmdList->SetComputeRoot32BitConstants( RootParamIndex++, NumValues, Constant.BufferPtr->Data.back().get(), ConstantOffset); ConstantOffset += NumValues; break; } case dx::RootParamKind::DescriptorTable: - IS.CmdList->SetComputeRootDescriptorTable(RootParamIndex++, Handle); + IS.CB->CmdList->SetComputeRootDescriptorTable(RootParamIndex++, + Handle); Handle.Offset(P.Sets[DescriptorTableIndex++].Resources.size(), Inc); break; case dx::RootParamKind::RootDescriptor: @@ -1247,17 +1321,17 @@ class DXDevice : public offloadtest::Device { "Root descriptor cannot refer to resource arrays."); switch (getDXKind(RootDescIt->first->Kind)) { case SRV: - IS.CmdList->SetComputeRootShaderResourceView( + IS.CB->CmdList->SetComputeRootShaderResourceView( RootParamIndex++, RootDescIt->second.back().Buffer->GetGPUVirtualAddress()); break; case UAV: - IS.CmdList->SetComputeRootUnorderedAccessView( + IS.CB->CmdList->SetComputeRootUnorderedAccessView( RootParamIndex++, RootDescIt->second.back().Buffer->GetGPUVirtualAddress()); break; case CBV: - IS.CmdList->SetComputeRootConstantBufferView( + IS.CB->CmdList->SetComputeRootConstantBufferView( RootParamIndex++, RootDescIt->second.back().Buffer->GetGPUVirtualAddress()); break; @@ -1273,7 +1347,7 @@ class DXDevice : public offloadtest::Device { // descriptor set layout. This is to make it easier to write tests that // don't need complicated root signatures. for (uint32_t Idx = 0u; Idx < P.Sets.size(); ++Idx) { - IS.CmdList->SetComputeRootDescriptorTable(Idx, Handle); + IS.CB->CmdList->SetComputeRootDescriptorTable(Idx, Handle); Handle.Offset(P.Sets[Idx].Resources.size(), Inc); } } @@ -1281,7 +1355,7 @@ class DXDevice : public offloadtest::Device { const llvm::ArrayRef DispatchSize = llvm::ArrayRef(P.Shaders[0].DispatchSize); - IS.CmdList->Dispatch(DispatchSize[0], DispatchSize[1], DispatchSize[2]); + IS.CB->CmdList->Dispatch(DispatchSize[0], DispatchSize[1], DispatchSize[2]); auto CopyBackResource = [&IS, this](ResourcePair &R) { if (R.first->isTexture()) { @@ -1298,7 +1372,7 @@ class DXDevice : public offloadtest::Device { const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(RS.Readback.Get(), Footprint); const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(RS.Buffer.Get(), 0); - IS.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); addReadbackEndBarrier(IS, RS.Buffer); } return; @@ -1307,7 +1381,7 @@ class DXDevice : public offloadtest::Device { if (RS.Readback == nullptr) continue; addReadbackBeginBarrier(IS, RS.Buffer); - IS.CmdList->CopyResource(RS.Readback.Get(), RS.Buffer.Get()); + IS.CB->CmdList->CopyResource(RS.Readback.Get(), RS.Buffer.Get()); addReadbackEndBarrier(IS, RS.Buffer); } }; @@ -1487,8 +1561,8 @@ class DXDevice : public offloadtest::Device { VBView.SizeInBytes = static_cast(VBSize); VBView.StrideInBytes = P.Bindings.getVertexStride(); - IS.CmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - IS.CmdList->IASetVertexBuffers(0, 1, &VBView); + IS.CB->CmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + IS.CB->CmdList->IASetVertexBuffers(0, 1, &VBView); return llvm::Error::success(); } @@ -1566,16 +1640,16 @@ class DXDevice : public offloadtest::Device { IS.RTVHeap->GetCPUDescriptorHandleForHeapStart(); Device->CreateRenderTargetView(IS.RT.Get(), nullptr, RTVHandle); - IS.CmdList->SetGraphicsRootSignature(IS.RootSig.Get()); + IS.CB->CmdList->SetGraphicsRootSignature(IS.RootSig.Get()); if (IS.DescHeap) { ID3D12DescriptorHeap *const Heaps[] = {IS.DescHeap.Get()}; - IS.CmdList->SetDescriptorHeaps(1, Heaps); - IS.CmdList->SetGraphicsRootDescriptorTable( + IS.CB->CmdList->SetDescriptorHeaps(1, Heaps); + IS.CB->CmdList->SetGraphicsRootDescriptorTable( 0, IS.DescHeap->GetGPUDescriptorHandleForHeapStart()); } - IS.CmdList->SetPipelineState(IS.PSO.Get()); + IS.CB->CmdList->SetPipelineState(IS.PSO.Get()); - IS.CmdList->OMSetRenderTargets(1, &RTVHandle, false, nullptr); + IS.CB->CmdList->OMSetRenderTargets(1, &RTVHandle, false, nullptr); D3D12_VIEWPORT VP = {}; VP.Width = @@ -1586,19 +1660,19 @@ class DXDevice : public offloadtest::Device { VP.MaxDepth = 1.0f; VP.TopLeftX = 0.0f; VP.TopLeftY = 0.0f; - IS.CmdList->RSSetViewports(1, &VP); + IS.CB->CmdList->RSSetViewports(1, &VP); const D3D12_RECT Scissor = {0, 0, static_cast(VP.Width), static_cast(VP.Height)}; - IS.CmdList->RSSetScissorRects(1, &Scissor); + IS.CB->CmdList->RSSetScissorRects(1, &Scissor); - IS.CmdList->DrawInstanced(P.Bindings.getVertexCount(), 1, 0, 0); + IS.CB->CmdList->DrawInstanced(P.Bindings.getVertexCount(), 1, 0, 0); // Transition the render target to copy source and copy to the readback // buffer. const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( IS.RT.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); - IS.CmdList->ResourceBarrier(1, &Barrier); + IS.CB->CmdList->ResourceBarrier(1, &Barrier); const CPUBuffer &B = *P.Bindings.RTargetBufferPtr; const D3D12_PLACED_SUBRESOURCE_FOOTPRINT Footprint{ @@ -1609,7 +1683,7 @@ class DXDevice : public offloadtest::Device { const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(IS.RTReadback.Get(), Footprint); const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(IS.RT.Get(), 0); - IS.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); auto CopyBackResource = [&IS, this](ResourcePair &R) { if (R.first->isTexture()) { @@ -1626,7 +1700,7 @@ class DXDevice : public offloadtest::Device { const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(RS.Readback.Get(), Footprint); const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(RS.Buffer.Get(), 0); - IS.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); + IS.CB->CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); addReadbackEndBarrier(IS, RS.Buffer); } return; @@ -1635,7 +1709,7 @@ class DXDevice : public offloadtest::Device { if (RS.Readback == nullptr) continue; addReadbackBeginBarrier(IS, RS.Buffer); - IS.CmdList->CopyResource(RS.Readback.Get(), RS.Buffer.Get()); + IS.CB->CmdList->CopyResource(RS.Readback.Get(), RS.Buffer.Get()); addReadbackEndBarrier(IS, RS.Buffer); } }; @@ -1686,13 +1760,11 @@ class DXDevice : public offloadtest::Device { return Err; llvm::outs() << "Descriptor heap created.\n"; - if (auto Err = createCommandStructures(State)) - return Err; - llvm::outs() << "Command structures created.\n"; - - if (auto Err = createEvent(State)) - return Err; - llvm::outs() << "Event prepared.\n"; + auto CBOrErr = DXCommandBuffer::create(Device); + if (!CBOrErr) + return CBOrErr.takeError(); + State.CB = std::move(*CBOrErr); + llvm::outs() << "Command buffer created.\n"; if (auto Err = createBuffers(P, State)) return Err; diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 0c26f7c7a..ea096153e 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -75,12 +75,18 @@ static MTL::VertexFormat getMTLVertexFormat(DataFormat Format, int Channels) { namespace { class MTLQueue : public offloadtest::Queue { public: + using Queue::submit; + MTL::CommandQueue *Queue; MTLQueue(MTL::CommandQueue *Queue) : Queue(Queue) {} - ~MTLQueue() { + ~MTLQueue() override { if (Queue) Queue->release(); } + + llvm::Error + submit(llvm::SmallVector> CBs) + override; }; class MTLBuffer : public offloadtest::Buffer { @@ -100,6 +106,41 @@ class MTLBuffer : public offloadtest::Buffer { } }; +class MTLCommandBuffer : public offloadtest::CommandBuffer { +public: + static constexpr GPUAPI BackendAPI = GPUAPI::Metal; + + MTL::CommandBuffer *CmdBuffer = nullptr; + + static llvm::Expected> + create(MTL::CommandQueue *Queue) { + auto CB = std::unique_ptr(new MTLCommandBuffer()); + CB->CmdBuffer = Queue->commandBuffer(); + if (!CB->CmdBuffer) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed to create Metal command buffer."); + return CB; + } + + ~MTLCommandBuffer() override = default; + +private: + MTLCommandBuffer() : CommandBuffer(GPUAPI::Metal) {} +}; + +llvm::Error MTLQueue::submit( + llvm::SmallVector> CBs) { + for (auto &CB : CBs) { + auto &MCB = CB->as(); + MCB.CmdBuffer->commit(); + MCB.CmdBuffer->waitUntilCompleted(); + + NS::Error *Err = MCB.CmdBuffer->error(); + if (Err) + return toError(Err); + } + return llvm::Error::success(); +} class MTLDevice : public offloadtest::Device { Capabilities Caps; MTL::Device *Device; @@ -129,7 +170,7 @@ class MTLDevice : public offloadtest::Device { llvm::SmallVector Textures; llvm::SmallVector Buffers; MTL::Texture *FrameBufferTexture = nullptr; - MTL::CommandBuffer *CmdBuffer = nullptr; + std::unique_ptr CB; }; llvm::Error setupVertexShader(InvocationState &IS, const Pipeline &P, @@ -368,10 +409,8 @@ class MTLDevice : public offloadtest::Device { } llvm::Error createComputeCommands(Pipeline &P, InvocationState &IS) { - IS.CmdBuffer = GraphicsQueue.Queue->commandBuffer(); - MTL::ComputeCommandEncoder *CmdEncoder = - IS.CmdBuffer->computeCommandEncoder(); + IS.CB->CmdBuffer->computeCommandEncoder(); CmdEncoder->setComputePipelineState(IS.ComputePipeline); CmdEncoder->setBuffer(IS.ArgBuffer, 0, 2); @@ -436,8 +475,6 @@ class MTLDevice : public offloadtest::Device { } llvm::Error createGraphicsCommands(Pipeline &P, InvocationState &IS) { - IS.CmdBuffer = GraphicsQueue.Queue->commandBuffer(); - MTL::RenderPassDescriptor *Desc = MTL::RenderPassDescriptor::alloc()->init(); @@ -467,7 +504,7 @@ class MTLDevice : public offloadtest::Device { Desc->colorAttachments()->setObject(CADesc, 0); MTL::RenderCommandEncoder *CmdEncoder = - IS.CmdBuffer->renderCommandEncoder(Desc); + IS.CB->CmdBuffer->renderCommandEncoder(Desc); CmdEncoder->setRenderPipelineState(IS.RenderPipeline); // Explicitly set viewport to texture dimensions. @@ -488,15 +525,7 @@ class MTLDevice : public offloadtest::Device { } llvm::Error executeCommands(InvocationState &IS) { - IS.CmdBuffer->commit(); - IS.CmdBuffer->waitUntilCompleted(); - - // Check and surface any errors that occurred during execution. - NS::Error *CBErr = IS.CmdBuffer->error(); - if (CBErr) - return toError(CBErr); - - return llvm::Error::success(); + return GraphicsQueue.submit(std::move(IS.CB)); } llvm::Error copyBack(Pipeline &P, InvocationState &IS) { @@ -586,9 +615,19 @@ class MTLDevice : public offloadtest::Device { return std::make_shared(Buf, Name, Desc, SizeInBytes); } + llvm::Expected> + createCommandBuffer() override { + return MTLCommandBuffer::create(GraphicsQueue.Queue); + } + llvm::Error executeProgram(Pipeline &P) override { InvocationState IS; + auto CBOrErr = MTLCommandBuffer::create(GraphicsQueue.Queue); + if (!CBOrErr) + return CBOrErr.takeError(); + IS.CB = std::move(*CBOrErr); + if (auto Err = createBuffers(P, IS)) return Err; diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 5c78b8afa..aab76529a 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -393,12 +393,101 @@ class VulkanBuffer : public offloadtest::Buffer { class VulkanQueue : public offloadtest::Queue { public: + using Queue::submit; + VkQueue Queue = VK_NULL_HANDLE; uint32_t QueueFamilyIdx = 0; - VulkanQueue(VkQueue Q, uint32_t QueueFamilyIdx) - : Queue(Q), QueueFamilyIdx(QueueFamilyIdx) {} + // TODO: Ensure device lifetime is managed (e.g. via shared_ptr). + VkDevice Device = VK_NULL_HANDLE; + VulkanQueue(VkQueue Q, uint32_t QueueFamilyIdx, VkDevice Device) + : Queue(Q), QueueFamilyIdx(QueueFamilyIdx), Device(Device) {} + + llvm::Error + submit(llvm::SmallVector> CBs) + override; }; +class VulkanCommandBuffer : public offloadtest::CommandBuffer { +public: + static constexpr GPUAPI BackendAPI = GPUAPI::Vulkan; + + VkDevice Device = VK_NULL_HANDLE; + // Owned per command buffer so that recording, submission, and lifetime + // management of each command buffer are independently safe without external + // synchronization. + VkCommandPool CmdPool = VK_NULL_HANDLE; + VkCommandBuffer CmdBuffer = VK_NULL_HANDLE; + + static llvm::Expected> + create(VkDevice Device, uint32_t QueueFamilyIdx) { + auto CB = std::unique_ptr(new VulkanCommandBuffer()); + CB->Device = Device; + + VkCommandPoolCreateInfo CmdPoolInfo = {}; + CmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + CmdPoolInfo.queueFamilyIndex = QueueFamilyIdx; + CmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + if (vkCreateCommandPool(Device, &CmdPoolInfo, nullptr, &CB->CmdPool)) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Could not create command pool."); + + VkCommandBufferAllocateInfo CBufAllocInfo = {}; + CBufAllocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + CBufAllocInfo.commandPool = CB->CmdPool; + CBufAllocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + CBufAllocInfo.commandBufferCount = 1; + if (vkAllocateCommandBuffers(Device, &CBufAllocInfo, &CB->CmdBuffer)) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Could not create command buffer."); + + VkCommandBufferBeginInfo BufferInfo = {}; + BufferInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + if (vkBeginCommandBuffer(CB->CmdBuffer, &BufferInfo)) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Could not begin command buffer."); + return CB; + } + + ~VulkanCommandBuffer() override { + if (CmdPool != VK_NULL_HANDLE) + vkDestroyCommandPool(Device, CmdPool, nullptr); + } + +private: + VulkanCommandBuffer() : CommandBuffer(GPUAPI::Vulkan) {} +}; + +llvm::Error VulkanQueue::submit( + llvm::SmallVector> CBs) { + for (auto &CB : CBs) { + auto &VCB = CB->as(); + if (vkEndCommandBuffer(VCB.CmdBuffer)) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Could not end command buffer."); + + VkSubmitInfo SubmitInfo = {}; + SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + SubmitInfo.commandBufferCount = 1; + SubmitInfo.pCommandBuffers = &VCB.CmdBuffer; + + VkFenceCreateInfo FenceInfo = {}; + FenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + VkFence Fence; + if (vkCreateFence(Device, &FenceInfo, nullptr, &Fence)) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Could not create fence."); + + if (vkQueueSubmit(Queue, 1, &SubmitInfo, Fence)) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed to submit to queue."); + if (vkWaitForFences(Device, 1, &Fence, VK_TRUE, UINT64_MAX)) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed waiting for fence."); + + vkDestroyFence(Device, Fence, nullptr); + } + return llvm::Error::success(); +} class VulkanDevice : public offloadtest::Device { private: std::shared_ptr Instance; @@ -480,8 +569,7 @@ class VulkanDevice : public offloadtest::Device { }; struct InvocationState { - VkCommandPool CmdPool = VK_NULL_HANDLE; - VkCommandBuffer CmdBuffer = VK_NULL_HANDLE; + std::unique_ptr CB; VkPipelineLayout PipelineLayout = VK_NULL_HANDLE; VkDescriptorPool Pool = VK_NULL_HANDLE; VkPipelineCache PipelineCache = VK_NULL_HANDLE; @@ -596,7 +684,8 @@ class VulkanDevice : public offloadtest::Device { VkQueue DeviceQueue = VK_NULL_HANDLE; vkGetDeviceQueue(Device, QueueFamilyIdx, 0, &DeviceQueue); - const VulkanQueue GraphicsQueue = VulkanQueue(DeviceQueue, QueueFamilyIdx); + const VulkanQueue GraphicsQueue = + VulkanQueue(DeviceQueue, QueueFamilyIdx, Device); return std::make_unique(Instance, PhysicalDevice, Props, Device, std::move(GraphicsQueue), @@ -791,33 +880,9 @@ class VulkanDevice : public offloadtest::Device { } public: - llvm::Error createDevice(InvocationState &IS) { - VkCommandPoolCreateInfo CmdPoolInfo = {}; - CmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - CmdPoolInfo.queueFamilyIndex = GraphicsQueue.QueueFamilyIdx; - CmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - - if (vkCreateCommandPool(Device, &CmdPoolInfo, nullptr, &IS.CmdPool)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Could not create command pool."); - return llvm::Error::success(); - } - - llvm::Error createCommandBuffer(InvocationState &IS) { - VkCommandBufferAllocateInfo CBufAllocInfo = {}; - CBufAllocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - CBufAllocInfo.commandPool = IS.CmdPool; - CBufAllocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - CBufAllocInfo.commandBufferCount = 1; - if (vkAllocateCommandBuffers(Device, &CBufAllocInfo, &IS.CmdBuffer)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Could not create command buffer."); - VkCommandBufferBeginInfo BufferInfo = {}; - BufferInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - if (vkBeginCommandBuffer(IS.CmdBuffer, &BufferInfo)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Could not begin command buffer."); - return llvm::Error::success(); + llvm::Expected> + createCommandBuffer() override { + return VulkanCommandBuffer::create(Device, GraphicsQueue.QueueFamilyIdx); } llvm::Expected createBuffer(VkBufferUsageFlags Usage, @@ -1003,8 +1068,8 @@ class VulkanDevice : public offloadtest::Device { return ExDeviceBuf.takeError(); VkBufferCopy Copy = {}; Copy.size = R.size(); - vkCmdCopyBuffer(IS.CmdBuffer, ExHostBuf->Buffer, ExDeviceBuf->Buffer, 1, - &Copy); + vkCmdCopyBuffer(IS.CB->CmdBuffer, ExHostBuf->Buffer, + ExDeviceBuf->Buffer, 1, &Copy); Bundle.ResourceRefs.emplace_back(*ExHostBuf, *ExDeviceBuf); } } @@ -1026,8 +1091,8 @@ class VulkanDevice : public offloadtest::Device { return ExDeviceBuf.takeError(); VkBufferCopy Copy = {}; Copy.size = sizeof(uint32_t); - vkCmdCopyBuffer(IS.CmdBuffer, ExHostBuf->Buffer, ExDeviceBuf->Buffer, 1, - &Copy); + vkCmdCopyBuffer(IS.CB->CmdBuffer, ExHostBuf->Buffer, + ExDeviceBuf->Buffer, 1, &Copy); Bundle.CounterResourceRefs.emplace_back(*ExHostBuf, *ExDeviceBuf); } } @@ -1146,8 +1211,8 @@ class VulkanDevice : public offloadtest::Device { return ExDeviceBuf.takeError(); VkBufferCopy Copy = {}; Copy.size = VertexBuffer.size(); - vkCmdCopyBuffer(IS.CmdBuffer, ExVHostBuf->Buffer, ExDeviceBuf->Buffer, 1, - &Copy); + vkCmdCopyBuffer(IS.CB->CmdBuffer, ExVHostBuf->Buffer, ExDeviceBuf->Buffer, + 1, &Copy); IS.VertexBuffer = ResourceRef(*ExVHostBuf, *ExDeviceBuf); } @@ -1155,32 +1220,7 @@ class VulkanDevice : public offloadtest::Device { } llvm::Error executeCommandBuffer(InvocationState &IS) { - if (vkEndCommandBuffer(IS.CmdBuffer)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Could not end command buffer."); - - VkSubmitInfo SubmitInfo = {}; - SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - SubmitInfo.commandBufferCount = 1; - SubmitInfo.pCommandBuffers = &IS.CmdBuffer; - VkFenceCreateInfo FenceInfo = {}; - FenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - VkFence Fence; - if (vkCreateFence(Device, &FenceInfo, nullptr, &Fence)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Could not create fence."); - - // Submit to the queue - if (vkQueueSubmit(GraphicsQueue.Queue, 1, &SubmitInfo, Fence)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Failed to submit to queue."); - if (vkWaitForFences(Device, 1, &Fence, VK_TRUE, UINT64_MAX)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Failed waiting for fence."); - - vkDestroyFence(Device, Fence, nullptr); - vkFreeCommandBuffers(Device, IS.CmdPool, 1, &IS.CmdBuffer); - return llvm::Error::success(); + return GraphicsQueue.submit(std::move(IS.CB)); } llvm::Error createDescriptorPool(Pipeline &P, InvocationState &IS) { @@ -1939,11 +1979,11 @@ class VulkanDevice : public offloadtest::Device { for (auto &ResRef : R.ResourceRefs) { ImageBarrier.image = ResRef.Image.Image; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &ImageBarrier); - vkCmdCopyBufferToImage(IS.CmdBuffer, ResRef.Host.Buffer, + vkCmdCopyBufferToImage(IS.CB->CmdBuffer, ResRef.Host.Buffer, ResRef.Image.Image, VK_IMAGE_LAYOUT_GENERAL, Regions.size(), Regions.data()); } @@ -1957,7 +1997,7 @@ class VulkanDevice : public offloadtest::Device { for (auto &ResRef : R.ResourceRefs) { ImageBarrier.image = ResRef.Image.Image; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 0, nullptr, 1, &ImageBarrier); } @@ -1972,7 +2012,7 @@ class VulkanDevice : public offloadtest::Device { Barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; for (auto &ResRef : R.ResourceRefs) { Barrier.buffer = ResRef.Host.Buffer; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, nullptr, 1, &Barrier, 0, nullptr); } @@ -2003,7 +2043,8 @@ class VulkanDevice : public offloadtest::Device { for (auto &ResRef : R.ResourceRefs) { ImageBarrier.image = ResRef.Image.Image; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, nullptr, 1, &ImageBarrier); } @@ -2032,7 +2073,7 @@ class VulkanDevice : public offloadtest::Device { } for (auto &ResRef : R.ResourceRefs) - vkCmdCopyImageToBuffer(IS.CmdBuffer, ResRef.Image.Image, + vkCmdCopyImageToBuffer(IS.CB->CmdBuffer, ResRef.Image.Image, VK_IMAGE_LAYOUT_GENERAL, ResRef.Host.Buffer, Regions.size(), Regions.data()); @@ -2045,7 +2086,7 @@ class VulkanDevice : public offloadtest::Device { Barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; for (auto &ResRef : R.ResourceRefs) { Barrier.buffer = ResRef.Host.Buffer; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, 1, &Barrier, 0, nullptr); } @@ -2061,21 +2102,22 @@ class VulkanDevice : public offloadtest::Device { Barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; for (auto &ResRef : R.ResourceRefs) { Barrier.buffer = ResRef.Host.Buffer; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 1, &Barrier, 0, nullptr); } VkBufferCopy CopyRegion = {}; CopyRegion.size = R.size(); for (auto &ResRef : R.ResourceRefs) - vkCmdCopyBuffer(IS.CmdBuffer, ResRef.Device.Buffer, ResRef.Host.Buffer, 1, - &CopyRegion); + vkCmdCopyBuffer(IS.CB->CmdBuffer, ResRef.Device.Buffer, + ResRef.Host.Buffer, 1, &CopyRegion); VkBufferCopy CounterCopyRegion = {}; CounterCopyRegion.size = sizeof(uint32_t); for (auto &ResRef : R.CounterResourceRefs) - vkCmdCopyBuffer(IS.CmdBuffer, ResRef.Device.Buffer, ResRef.Host.Buffer, 1, - &CounterCopyRegion); + vkCmdCopyBuffer(IS.CB->CmdBuffer, ResRef.Device.Buffer, + ResRef.Host.Buffer, 1, &CounterCopyRegion); Barrier.size = VK_WHOLE_SIZE; Barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; @@ -2084,13 +2126,13 @@ class VulkanDevice : public offloadtest::Device { Barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; for (auto &ResRef : R.ResourceRefs) { Barrier.buffer = ResRef.Host.Buffer; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, 1, &Barrier, 0, nullptr); } for (auto &ResRef : R.CounterResourceRefs) { Barrier.buffer = ResRef.Host.Buffer; - vkCmdPipelineBarrier(IS.CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + vkCmdPipelineBarrier(IS.CB->CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, 1, &Barrier, 0, nullptr); } @@ -2116,7 +2158,7 @@ class VulkanDevice : public offloadtest::Device { RenderPassBeginInfo.clearValueCount = 2; RenderPassBeginInfo.pClearValues = ClearValues; - vkCmdBeginRenderPass(IS.CmdBuffer, &RenderPassBeginInfo, + vkCmdBeginRenderPass(IS.CB->CmdBuffer, &RenderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE); VkViewport Viewport = {}; @@ -2128,28 +2170,28 @@ class VulkanDevice : public offloadtest::Device { static_cast(P.Bindings.RTargetBufferPtr->OutputProps.Height); Viewport.minDepth = 0.0f; Viewport.maxDepth = 1.0f; - vkCmdSetViewport(IS.CmdBuffer, 0, 1, &Viewport); + vkCmdSetViewport(IS.CB->CmdBuffer, 0, 1, &Viewport); VkRect2D Scissor = {}; Scissor.offset = {0, 0}; Scissor.extent.width = P.Bindings.RTargetBufferPtr->OutputProps.Width; Scissor.extent.height = P.Bindings.RTargetBufferPtr->OutputProps.Height; - vkCmdSetScissor(IS.CmdBuffer, 0, 1, &Scissor); + vkCmdSetScissor(IS.CB->CmdBuffer, 0, 1, &Scissor); } const VkPipelineBindPoint BindPoint = P.isGraphics() ? VK_PIPELINE_BIND_POINT_GRAPHICS : VK_PIPELINE_BIND_POINT_COMPUTE; - vkCmdBindPipeline(IS.CmdBuffer, BindPoint, IS.Pipeline); + vkCmdBindPipeline(IS.CB->CmdBuffer, BindPoint, IS.Pipeline); if (IS.DescriptorSets.size() > 0) - vkCmdBindDescriptorSets(IS.CmdBuffer, BindPoint, IS.PipelineLayout, 0, + vkCmdBindDescriptorSets(IS.CB->CmdBuffer, BindPoint, IS.PipelineLayout, 0, IS.DescriptorSets.size(), IS.DescriptorSets.data(), 0, 0); for (const auto &PCB : P.PushConstants) { llvm::SmallVector Data; PCB.getContent(Data); - vkCmdPushConstants(IS.CmdBuffer, IS.PipelineLayout, + vkCmdPushConstants(IS.CB->CmdBuffer, IS.PipelineLayout, getShaderStageFlag(PCB.Stage), 0, Data.size(), Data.data()); } @@ -2157,19 +2199,19 @@ class VulkanDevice : public offloadtest::Device { if (P.isCompute()) { const llvm::ArrayRef DispatchSize = llvm::ArrayRef(P.Shaders[0].DispatchSize); - vkCmdDispatch(IS.CmdBuffer, DispatchSize[0], DispatchSize[1], + vkCmdDispatch(IS.CB->CmdBuffer, DispatchSize[0], DispatchSize[1], DispatchSize[2]); llvm::outs() << "Dispatched compute shader: { " << DispatchSize[0] << ", " << DispatchSize[1] << ", " << DispatchSize[2] << " }\n"; } else { VkDeviceSize Offsets[1]{0}; assert(IS.VertexBuffer.has_value()); - vkCmdBindVertexBuffers(IS.CmdBuffer, 0, 1, + vkCmdBindVertexBuffers(IS.CB->CmdBuffer, 0, 1, &IS.VertexBuffer->Device.Buffer, Offsets); // instanceCount must be >=1 to draw; previously was 0 which draws nothing - vkCmdDraw(IS.CmdBuffer, P.Bindings.getVertexCount(), 1, 0, 0); + vkCmdDraw(IS.CB->CmdBuffer, P.Bindings.getVertexCount(), 1, 0, 0); llvm::outs() << "Drew " << P.Bindings.getVertexCount() << " vertices.\n"; - vkCmdEndRenderPass(IS.CmdBuffer); + vkCmdEndRenderPass(IS.CB->CmdBuffer); copyResourceDataToHost(IS, IS.FrameBufferResource); } @@ -2313,9 +2355,6 @@ class VulkanDevice : public offloadtest::Device { if (IS.Pool) vkDestroyDescriptorPool(Device, IS.Pool, nullptr); - - if (IS.CmdPool) - vkDestroyCommandPool(Device, IS.CmdPool, nullptr); } llvm::Error executeProgram(Pipeline &P) override { @@ -2325,14 +2364,15 @@ class VulkanDevice : public offloadtest::Device { llvm::outs() << "Cleanup complete.\n"; }); - if (auto Err = createDevice(State)) - return Err; - llvm::outs() << "Physical device created.\n"; + auto CBOrErr = + VulkanCommandBuffer::create(Device, GraphicsQueue.QueueFamilyIdx); + if (!CBOrErr) + return CBOrErr.takeError(); + State.CB = std::move(*CBOrErr); + llvm::outs() << "Command buffer created.\n"; if (auto Err = createShaderModules(P, State)) return Err; llvm::outs() << "Shader module created.\n"; - if (auto Err = createCommandBuffer(State)) - return Err; llvm::outs() << "Copy command buffer created.\n"; if (auto Err = createResources(P, State)) return Err; @@ -2348,8 +2388,11 @@ class VulkanDevice : public offloadtest::Device { if (auto Err = executeCommandBuffer(State)) return Err; llvm::outs() << "Executed copy command buffer.\n"; - if (auto Err = createCommandBuffer(State)) - return Err; + auto DispatchCBOrErr = + VulkanCommandBuffer::create(Device, GraphicsQueue.QueueFamilyIdx); + if (!DispatchCBOrErr) + return DispatchCBOrErr.takeError(); + State.CB = std::move(*DispatchCBOrErr); llvm::outs() << "Execute command buffer created.\n"; if (auto Err = createDescriptorPool(P, State)) return Err;