From 1286c7e12384157f87193582ee0c68d122e55dbe Mon Sep 17 00:00:00 2001 From: Manon Oomen Date: Fri, 20 Mar 2026 17:27:50 +0100 Subject: [PATCH 1/3] Implement fence creation in DX and VK. --- include/API/Device.h | 10 +++++++ lib/API/DX/Device.cpp | 65 +++++++++++++++++++++++++++++++++++++++++++ lib/API/VK/Device.cpp | 64 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+) diff --git a/include/API/Device.h b/include/API/Device.h index 75816432f..be45bbd84 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -56,6 +56,13 @@ class Buffer { Buffer() = default; }; +class Fence { +public: + virtual ~Fence() {} + virtual uint64_t getFenceValue() = 0; + virtual llvm::Error waitForCompletion(uint64_t SignalValue) = 0; +}; + class Queue { public: virtual ~Queue() = 0; @@ -77,6 +84,9 @@ class Device { virtual Queue &getGraphicsQueue() = 0; + virtual llvm::Expected> + createFence(llvm::StringRef Name) = 0; + virtual llvm::Expected> createBuffer(std::string Name, BufferCreateDesc &Desc, size_t SizeInBytes) = 0; diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index cddccbc93..cd6072fc5 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -289,6 +289,50 @@ class DXBuffer : public offloadtest::Buffer { : Buffer(Buffer), Name(Name), Desc(Desc), SizeInBytes(SizeInBytes) {} }; +class DXFence : public offloadtest::Fence { +public: + std::string Name; + ComPtr Fence; + HANDLE Event; + + uint64_t getFenceValue() override { return Fence->GetCompletedValue(); } + + llvm::Error waitForCompletion(uint64_t SignalValue) override { + + if (Fence->GetCompletedValue() >= SignalValue) { + return llvm::Error::success(); + } + + if (auto Err = HR::toError(Fence->SetEventOnCompletion(SignalValue, Event), + "Failed to register end event.")) + return Err; + +#ifdef _WIN32 + WaitForSingleObject(Event, INFINITE); +#else // WSL + pollfd PollEvent; + PollEvent.fd = (int)Event; + PollEvent.events = POLLIN; + PollEvent.revents = 0; + if (poll(&PollEvent, 1, -1) == -1) + return llvm::createStringError( + std::error_code(errno, std::system_category()), strerror(errno)); +#endif + return llvm::Error::success(); + } + + DXFence(ComPtr Fence, HANDLE Event, llvm::StringRef Name) + : Fence(Fence), Event(Event), Name(Name) {} + + ~DXFence() { +#ifdef _WIN32 + CloseHandle(Event); +#else // WSL + close((int)Event); +#endif + } +}; + class DXQueue : public offloadtest::Queue { public: ComPtr Queue; @@ -374,6 +418,27 @@ class DXDevice : public offloadtest::Device { Queue &getGraphicsQueue() override { return GraphicsQueue; } + llvm::Expected> + createFence(llvm::StringRef Name) override { + ComPtr Fence; + if (auto Err = HR::toError( + Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&Fence)), + "Failed to create Fence.")) + return Err; + +#ifdef _WIN32 + HANDLE Event = CreateEventA(nullptr, false, false, nullptr); + if (!Event) +#else // WSL + int Event = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (Event == -1) +#endif + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed to create event."); + + return std::make_shared(Fence, Event, Name); + } + llvm::Expected> createBuffer(std::string Name, BufferCreateDesc &Desc, size_t SizeInBytes) override { diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index bd80fd210..73ce5823b 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -356,6 +356,47 @@ class VulkanBuffer : public offloadtest::Buffer { } }; +class VulkanFence : public offloadtest::Fence { +public: + std::string Name; + VkDevice Device; + VkSemaphore Semaphore; + + uint64_t getFenceValue() override { + uint64_t Value = 0; + const VkResult Result = + vkGetSemaphoreCounterValue(Device, Semaphore, &Value); + assert(Result == VK_SUCCESS); + return Value; + } + llvm::Error waitForCompletion(uint64_t SignalValue) override { + VkSemaphoreWaitInfo WaitInfo = {}; + WaitInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO; + WaitInfo.semaphoreCount = 1; + WaitInfo.pSemaphores = &Semaphore; + WaitInfo.pValues = &SignalValue; + + const VkResult Result = vkWaitSemaphores(Device, &WaitInfo, UINT64_MAX); + + if (Result == VK_ERROR_DEVICE_LOST) + return llvm::createStringError(std::errc::no_such_device, "Device Lost."); + if (Result == VK_ERROR_OUT_OF_DEVICE_MEMORY) + return llvm::createStringError(std::errc::not_enough_memory, + "Out of Device Memory."); + if (Result == VK_ERROR_OUT_OF_HOST_MEMORY) + return llvm::createStringError(std::errc::not_enough_memory, + "Out of Host Memory."); + if (Result != VK_SUCCESS) + return llvm::createStringError(std::errc::not_enough_memory, + "Failed to wait on Semaphore."); + + return llvm::Error::success(); + } + + VulkanFence(VkDevice Device, VkSemaphore Semaphore, llvm::StringRef Name) + : Device(Device), Semaphore(Semaphore), Name(Name) {} +}; + class VulkanQueue : public offloadtest::Queue { public: VkQueue Queue = VK_NULL_HANDLE; @@ -612,6 +653,29 @@ class VulkanDevice : public offloadtest::Device { Queue &getGraphicsQueue() override { return GraphicsQueue; } + llvm::Expected> + createFence(llvm::StringRef Name) override { + VkSemaphoreTypeCreateInfo TypeCreateInfo = {}; + TypeCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO; + TypeCreateInfo.pNext = nullptr; + TypeCreateInfo.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE; + TypeCreateInfo.initialValue = 0; + + VkSemaphoreCreateInfo CreateInfo = {}; + CreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + CreateInfo.pNext = &TypeCreateInfo; + + VkSemaphore Semaphore = VK_NULL_HANDLE; + const VkResult Result = + vkCreateSemaphore(Device, &CreateInfo, nullptr, &Semaphore); + if (Result != VK_SUCCESS) { + return llvm::createStringError(std::errc::invalid_argument /*todo*/, + "Failed to create Semaphore"); + } + + return std::make_shared(Device, Semaphore, Name); + } + llvm::Expected> createBuffer(std::string Name, BufferCreateDesc &Desc, size_t SizeInBytes) override { From d312da4f43833fbd0f2ad0909f6d9e7e61b063cb Mon Sep 17 00:00:00 2001 From: Manon Oomen Date: Tue, 24 Mar 2026 15:36:51 +0100 Subject: [PATCH 2/3] Use the new Fence type in both DX and Vulkan backends. --- lib/API/DX/Device.cpp | 58 +++++++------------------------------------ lib/API/VK/Device.cpp | 41 ++++++++++++++++++++++-------- 2 files changed, 39 insertions(+), 60 deletions(-) diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index cd6072fc5..51e24ab47 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -386,12 +386,7 @@ class DXDevice : public offloadtest::Device { ComPtr PSO; ComPtr Allocator; ComPtr CmdList; - ComPtr Fence; -#ifdef _WIN32 - HANDLE Event; -#else // WSL - int Event; -#endif + std::shared_ptr Fence; // Resources for graphics pipelines. ComPtr RT; @@ -1197,56 +1192,20 @@ class DXDevice : public offloadtest::Device { IS.CmdList->ResourceBarrier(1, &Barrier); } - llvm::Error createEvent(InvocationState &IS) { - if (auto Err = HR::toError(Device->CreateFence(0, D3D12_FENCE_FLAG_NONE, - IID_PPV_ARGS(&IS.Fence)), - "Failed to create fence.")) - return Err; -#ifdef _WIN32 - IS.Event = CreateEventA(nullptr, false, false, nullptr); - if (!IS.Event) -#else // WSL - IS.Event = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); - if (IS.Event == -1) -#endif - return llvm::createStringError(std::errc::device_or_resource_busy, - "Failed to create event."); - return llvm::Error::success(); - } - llvm::Error waitForSignal(InvocationState &IS) { // This is a hack but it works since this is all single threaded code. static uint64_t FenceCounter = 0; const uint64_t CurrentCounter = FenceCounter + 1; + auto *F = static_cast(IS.Fence.get()); if (auto Err = HR::toError( - GraphicsQueue.Queue->Signal(IS.Fence.Get(), CurrentCounter), + GraphicsQueue.Queue->Signal(F->Fence.Get(), CurrentCounter), "Failed to add signal.")) return Err; - if (IS.Fence->GetCompletedValue() < CurrentCounter) { -#ifdef _WIN32 - HANDLE Event = IS.Event; -#else // WSL - HANDLE Event = reinterpret_cast(IS.Event); -#endif - if (auto Err = - HR::toError(IS.Fence->SetEventOnCompletion(CurrentCounter, Event), - "Failed to register end event.")) - return Err; + if (auto Err = IS.Fence->waitForCompletion(CurrentCounter)) + return Err; -#ifdef _WIN32 - WaitForSingleObject(IS.Event, INFINITE); -#else // WSL - pollfd PollEvent; - PollEvent.fd = IS.Event; - PollEvent.events = POLLIN; - PollEvent.revents = 0; - if (poll(&PollEvent, 1, -1) == -1) - return llvm::createStringError( - std::error_code(errno, std::system_category()), strerror(errno)); -#endif - } FenceCounter = CurrentCounter; return llvm::Error::success(); } @@ -1751,9 +1710,10 @@ class DXDevice : public offloadtest::Device { return Err; llvm::outs() << "Command structures created.\n"; - if (auto Err = createEvent(State)) - return Err; - llvm::outs() << "Event prepared.\n"; + auto FenceOrErr = this->createFence("Fence"); + if (!FenceOrErr) + return FenceOrErr.takeError(); + State.Fence = *FenceOrErr; if (auto Err = createBuffers(P, State)) return Err; diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 73ce5823b..85813f419 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -395,6 +395,8 @@ class VulkanFence : public offloadtest::Fence { VulkanFence(VkDevice Device, VkSemaphore Semaphore, llvm::StringRef Name) : Device(Device), Semaphore(Semaphore), Name(Name) {} + + ~VulkanFence() { vkDestroySemaphore(Device, Semaphore, nullptr); } }; class VulkanQueue : public offloadtest::Queue { @@ -492,6 +494,8 @@ class VulkanDevice : public offloadtest::Device { VkPipelineCache PipelineCache; VkPipeline Pipeline; + std::shared_ptr Fence; + // FrameBuffer associated data for offscreen rendering. VkFramebuffer FrameBuffer; ResourceBundle FrameBufferResource = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 0, @@ -1172,32 +1176,41 @@ class VulkanDevice : public offloadtest::Device { llvm::Error executeCommandBuffer(InvocationState &IS, VkPipelineStageFlags WaitMask = 0) { + // This is a hack but it works since this is all single threaded code. + static uint64_t FenceCounter = 0; + const uint64_t CurrentCounter = FenceCounter + 1; + if (vkEndCommandBuffer(IS.CmdBuffer)) return llvm::createStringError(std::errc::device_or_resource_busy, "Could not end command buffer."); + auto *F = static_cast(IS.Fence.get()); + + VkTimelineSemaphoreSubmitInfo TimelineSubmitInfo = {}; + TimelineSubmitInfo.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO; + TimelineSubmitInfo.signalSemaphoreValueCount = 1; + TimelineSubmitInfo.pSignalSemaphoreValues = &CurrentCounter; + VkSubmitInfo SubmitInfo = {}; SubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + SubmitInfo.pNext = &TimelineSubmitInfo; SubmitInfo.commandBufferCount = 1; SubmitInfo.pCommandBuffers = &IS.CmdBuffer; SubmitInfo.pWaitDstStageMask = &WaitMask; - VkFenceCreateInfo FenceInfo = {}; - FenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - VkFence Fence; - if (vkCreateFence(Device, &FenceInfo, nullptr, &Fence)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Could not create fence."); + SubmitInfo.signalSemaphoreCount = 1; + SubmitInfo.pSignalSemaphores = &F->Semaphore; // Submit to the queue - if (vkQueueSubmit(GraphicsQueue.Queue, 1, &SubmitInfo, Fence)) + if (vkQueueSubmit(GraphicsQueue.Queue, 1, &SubmitInfo, VK_NULL_HANDLE)) return llvm::createStringError(std::errc::device_or_resource_busy, "Failed to submit to queue."); - if (vkWaitForFences(Device, 1, &Fence, VK_TRUE, UINT64_MAX)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Failed waiting for fence."); - vkDestroyFence(Device, Fence, nullptr); + if (auto Err = IS.Fence->waitForCompletion(CurrentCounter)) + return Err; + vkFreeCommandBuffers(Device, IS.CmdPool, 1, &IS.CmdBuffer); + + FenceCounter = CurrentCounter; return llvm::Error::success(); } @@ -2333,6 +2346,12 @@ class VulkanDevice : public offloadtest::Device { InvocationState State; if (auto Err = createDevice(State)) return Err; + + auto FenceOrErr = this->createFence("Fence"); + if (!FenceOrErr) + return FenceOrErr.takeError(); + State.Fence = *FenceOrErr; + llvm::outs() << "Physical device created.\n"; if (auto Err = createShaderModules(P, State)) return Err; From 41f208f23c9134417b72db406a66a1152c619ec2 Mon Sep 17 00:00:00 2001 From: Manon Oomen Date: Tue, 24 Mar 2026 16:05:52 +0100 Subject: [PATCH 3/3] Add a Metal implementation. --- lib/API/MTL/MTLDevice.cpp | 49 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 817471c55..e1c7269bd 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -83,6 +83,29 @@ class MTLQueue : public offloadtest::Queue { } }; +class MTLFence : public offloadtest::Fence { +public: + std::string Name; + MTL::SharedEvent *Event; + + uint64_t getFenceValue() override { return Event->signaledValue(); } + + llvm::Error waitForCompletion(uint64_t SignalValue) override { + if (!Event->waitUntilSignaledValue(SignalValue, UINT64_MAX)) + return llvm::createStringError(std::errc::timed_out, + "Timed out waiting on shared event."); + return llvm::Error::success(); + } + + MTLFence(MTL::SharedEvent *Event, llvm::StringRef Name) + : Name(Name), Event(Event) {} + + ~MTLFence() { + if (Event) + Event->release(); + } +}; + class MTLBuffer : public offloadtest::Buffer { public: MTL::Buffer *Buf; @@ -130,6 +153,7 @@ class MTLDevice : public offloadtest::Device { llvm::SmallVector Buffers; MTL::Texture *FrameBufferTexture = nullptr; MTL::CommandBuffer *CmdBuffer = nullptr; + std::shared_ptr Fence; }; llvm::Error setupVertexShader(InvocationState &IS, const Pipeline &P, @@ -487,14 +511,23 @@ class MTLDevice : public offloadtest::Device { } llvm::Error executeCommands(InvocationState &IS) { + // This is a hack but it works since this is all single threaded code. + static uint64_t FenceCounter = 0; + const uint64_t CurrentCounter = FenceCounter + 1; + auto *F = static_cast(IS.Fence.get()); + + IS.CmdBuffer->encodeSignalEvent(F->Event, CurrentCounter); IS.CmdBuffer->commit(); - IS.CmdBuffer->waitUntilCompleted(); + + if (auto Err = IS.Fence->waitForCompletion(CurrentCounter)) + return Err; // Check and surface any errors that occurred during execution. NS::Error *CBErr = IS.CmdBuffer->error(); if (CBErr) return toError(CBErr); + FenceCounter = CurrentCounter; return llvm::Error::success(); } @@ -564,6 +597,15 @@ class MTLDevice : public offloadtest::Device { Queue &getGraphicsQueue() override { return GraphicsQueue; } + llvm::Expected> + createFence(llvm::StringRef Name) override { + MTL::SharedEvent *Event = Device->newSharedEvent(); + if (!Event) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed to create shared event."); + return std::make_shared(Event, Name); + } + llvm::Expected> createBuffer(std::string Name, BufferCreateDesc &Desc, size_t SizeInBytes) override { @@ -588,6 +630,11 @@ class MTLDevice : public offloadtest::Device { llvm::Error executeProgram(Pipeline &P) override { InvocationState IS; + auto FenceOrErr = this->createFence("Fence"); + if (!FenceOrErr) + return FenceOrErr.takeError(); + IS.Fence = *FenceOrErr; + if (auto Err = createBuffers(P, IS)) return Err;