diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 81df567bb..c7e6ddf2a 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -24,6 +25,26 @@ using namespace offloadtest; +// We use 64KB tile size because DX has a fixed tile size, and the offload test +// suite must work for all APIs. +constexpr uint32_t SparseBufferTileSize = 65536; + +static constexpr uint32_t InvalidQueueIndex = + std::numeric_limits::max(); + +static uint32_t findQueue(const VkQueueFamilyProperties *Props, uint32_t Count, + VkQueueFlags Flags) { + for (uint32_t I = 0; I < Count; ++I) { + if ((Props[I].queueFlags & Flags) == Flags) + return I; + } + return InvalidQueueIndex; +} + +static uint32_t getNumTiles(size_t Size) { + return (Size + SparseBufferTileSize - 1) / SparseBufferTileSize; +} + #define VKFormats(FMT, BITS) \ if (Channels == 1) \ return VK_FORMAT_R##BITS##_##FMT; \ @@ -65,6 +86,7 @@ static VkFormat getVKFormat(DataFormat Format, int Channels) { static VkDescriptorType getDescriptorType(const ResourceKind RK) { switch (RK) { case ResourceKind::Buffer: + return VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; case ResourceKind::RWBuffer: return VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; case ResourceKind::Texture2D: @@ -461,6 +483,7 @@ class VulkanDevice : public offloadtest::Device { VkPhysicalDeviceDriverProperties DriverProps; VkDevice Device = VK_NULL_HANDLE; VulkanQueue GraphicsQueue; + VulkanQueue SparseQueue; Capabilities Caps; using LayerVector = llvm::SmallVector; LayerVector InstanceLayers; @@ -588,33 +611,48 @@ class VulkanDevice : public offloadtest::Device { vkGetPhysicalDeviceQueueFamilyProperties(PhysicalDevice, &QueueCount, QueueFamilyProps.get()); - std::optional SelectedIdx; - for (uint32_t I = 0; I < QueueCount; ++I) { - const VkQueueFlags Flags = QueueFamilyProps[I].queueFlags; - // Prefer family supporting both GRAPHICS and COMPUTE - if ((Flags & VK_QUEUE_GRAPHICS_BIT) && (Flags & VK_QUEUE_COMPUTE_BIT)) { - SelectedIdx = static_cast(I); - break; - } + uint32_t MainQueueIdx = + findQueue(QueueFamilyProps.get(), QueueCount, + VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_SPARSE_BINDING_BIT); + uint32_t SparseQueueIdx = MainQueueIdx; + + // If not found, find separate queues + if (MainQueueIdx == InvalidQueueIndex) { + MainQueueIdx = findQueue(QueueFamilyProps.get(), QueueCount, + VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT); + SparseQueueIdx = findQueue(QueueFamilyProps.get(), QueueCount, + VK_QUEUE_SPARSE_BINDING_BIT); } - if (!SelectedIdx) - return llvm::createStringError(std::errc::no_such_device, - "No suitable queue family found."); - - const uint32_t QueueFamilyIdx = *SelectedIdx; + if (MainQueueIdx == InvalidQueueIndex) + return llvm::createStringError( + std::errc::no_such_device, + "No suitable queue family found for graphics and compute."); - VkDeviceQueueCreateInfo QueueInfo = {}; const float QueuePriority = 1.0f; - QueueInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; - QueueInfo.queueFamilyIndex = QueueFamilyIdx; - QueueInfo.queueCount = 1; - QueueInfo.pQueuePriorities = &QueuePriority; + std::vector QueueCreateInfos; + + auto AddQueueCreateInfo = [&](uint32_t QFamilyIndex) { + if (QFamilyIndex == InvalidQueueIndex) + return; + VkDeviceQueueCreateInfo QueueCreateInfo = {}; + QueueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + QueueCreateInfo.queueFamilyIndex = QFamilyIndex; + QueueCreateInfo.queueCount = 1; + QueueCreateInfo.pQueuePriorities = &QueuePriority; + QueueCreateInfos.push_back(QueueCreateInfo); + }; + + AddQueueCreateInfo(MainQueueIdx); + if (MainQueueIdx != SparseQueueIdx) + AddQueueCreateInfo(SparseQueueIdx); VkDeviceCreateInfo DeviceInfo = {}; DeviceInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; - DeviceInfo.queueCreateInfoCount = 1; - DeviceInfo.pQueueCreateInfos = &QueueInfo; + DeviceInfo.queueCreateInfoCount = + static_cast(QueueCreateInfos.size()); + DeviceInfo.pQueueCreateInfos = QueueCreateInfos.data(); VkPhysicalDeviceFeatures2 Features{}; Features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; @@ -647,21 +685,27 @@ class VulkanDevice : public offloadtest::Device { if (vkCreateDevice(PhysicalDevice, &DeviceInfo, nullptr, &Device)) return llvm::createStringError(std::errc::no_such_device, "Could not create Vulkan logical device."); - VkQueue DeviceQueue = VK_NULL_HANDLE; - vkGetDeviceQueue(Device, QueueFamilyIdx, 0, &DeviceQueue); - - const VulkanQueue GraphicsQueue = VulkanQueue(DeviceQueue, QueueFamilyIdx); - - return std::make_unique(Instance, PhysicalDevice, Props, - Device, std::move(GraphicsQueue), - std::move(InstanceLayers)); + VkQueue MainQueue = VK_NULL_HANDLE; + vkGetDeviceQueue(Device, MainQueueIdx, 0, &MainQueue); + VkQueue SparseQueue = VK_NULL_HANDLE; + if (SparseQueueIdx != InvalidQueueIndex) + vkGetDeviceQueue(Device, SparseQueueIdx, 0, &SparseQueue); + + VulkanQueue GraphicsQueue = VulkanQueue(MainQueue, MainQueueIdx); + VulkanQueue VulkanSparseQueue = VulkanQueue(SparseQueue, SparseQueueIdx); + + return std::make_unique( + Instance, PhysicalDevice, Props, Device, std::move(GraphicsQueue), + std::move(VulkanSparseQueue), std::move(InstanceLayers)); } VulkanDevice(std::shared_ptr I, VkPhysicalDevice P, VkPhysicalDeviceProperties Props, VkDevice D, VulkanQueue Q, + VulkanQueue SQ, llvm::SmallVector InstanceLayers) : Instance(I), PhysicalDevice(P), Props(Props), Device(D), - GraphicsQueue(std::move(Q)), InstanceLayers(std::move(InstanceLayers)) { + GraphicsQueue(std::move(Q)), SparseQueue(SQ), + InstanceLayers(std::move(InstanceLayers)) { const uint64_t DeviceNameSz = strnlen(Props.deviceName, VK_MAX_PHYSICAL_DEVICE_NAME_SIZE); Description = std::string(Props.deviceName, DeviceNameSz); @@ -829,6 +873,11 @@ class VulkanDevice : public offloadtest::Device { #define VULKAN_FLOAT_CONTROLS_FEATURE_BOOL(Name) \ Caps.insert(std::make_pair( \ #Name, makeCapability(#Name, FloatControlProp.Name))); + +#define VULKAN_SPARSE_PROPERTY_BOOL(Name) \ + Caps.insert(std::make_pair( \ + #Name, makeCapability(#Name, Props.sparseProperties.Name))); + #define VULKAN_FEATURE_BOOL(Name) \ Caps.insert(std::make_pair( \ #Name, makeCapability(#Name, Features.features.Name))); @@ -849,7 +898,6 @@ class VulkanDevice : public offloadtest::Device { #include "VKFeatures.def" } -public: llvm::Error createDevice(InvocationState &IS) { VkCommandPoolCreateInfo CmdPoolInfo = {}; CmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; @@ -881,65 +929,79 @@ class VulkanDevice : public offloadtest::Device { llvm::Expected createBuffer(VkBufferUsageFlags Usage, VkMemoryPropertyFlags MemoryFlags, - size_t Size, void *Data = nullptr) { - VkBuffer Buffer; - VkDeviceMemory Memory; - VkBufferCreateInfo BufferInfo = {}; - BufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - BufferInfo.size = Size; - BufferInfo.usage = Usage; - BufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - - if (vkCreateBuffer(Device, &BufferInfo, nullptr, &Buffer)) - return llvm::createStringError(std::errc::not_enough_memory, - "Could not create buffer."); - + size_t Size, void *Data = nullptr, + VkBufferCreateFlags Flags = 0) { + auto ExBuffer = createVkBuffer(Device, Size, Usage, Flags); + if (!ExBuffer) + return ExBuffer.takeError(); + VkBuffer Buffer = *ExBuffer; VkMemoryRequirements MemReqs; vkGetBufferMemoryRequirements(Device, Buffer, &MemReqs); - VkMemoryAllocateInfo AllocInfo = {}; - AllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - AllocInfo.allocationSize = MemReqs.size; - llvm::Expected MemIdx = getMemoryIndex(PhysicalDevice, MemReqs.memoryTypeBits, MemoryFlags); if (!MemIdx) return MemIdx.takeError(); - - AllocInfo.memoryTypeIndex = *MemIdx; - - if (vkAllocateMemory(Device, &AllocInfo, nullptr, &Memory)) - return llvm::createStringError(std::errc::not_enough_memory, - "Memory allocation failed."); + auto ExMemory = allocateMemory(Device, MemReqs.size, *MemIdx); + if (!ExMemory) + return ExMemory.takeError(); + VkDeviceMemory Memory = *ExMemory; if (Data) { void *Dst = nullptr; if (vkMapMemory(Device, Memory, 0, VK_WHOLE_SIZE, 0, &Dst)) return llvm::createStringError(std::errc::not_enough_memory, "Failed to map memory."); memcpy(Dst, Data, Size); - VkMappedMemoryRange Range = {}; Range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; Range.memory = Memory; Range.offset = 0; Range.size = VK_WHOLE_SIZE; vkFlushMappedMemoryRanges(Device, 1, &Range); - vkUnmapMemory(Device, Memory); } - if (vkBindBufferMemory(Device, Buffer, Memory, 0)) return llvm::createStringError(std::errc::not_enough_memory, "Failed to bind buffer to memory."); - return BufferRef{Buffer, Memory}; } + llvm::Expected createVkBuffer(VkDevice Device, size_t Size, + VkBufferUsageFlags Usage, + VkBufferCreateFlags Flags = 0) { + VkBuffer Buffer; + VkBufferCreateInfo BufferInfo = {}; + BufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + BufferInfo.size = Size; + BufferInfo.usage = Usage; + BufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + BufferInfo.flags = Flags; + + if (vkCreateBuffer(Device, &BufferInfo, nullptr, &Buffer)) + return llvm::createStringError(std::errc::not_enough_memory, + "Could not create buffer."); + return Buffer; + } + + llvm::Expected + allocateMemory(VkDevice Device, VkDeviceSize Size, uint32_t MemoryTypeIndex) { + VkMemoryAllocateInfo AllocInfo = {}; + AllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + AllocInfo.allocationSize = Size; + AllocInfo.memoryTypeIndex = MemoryTypeIndex; + VkDeviceMemory Memory; + if (vkAllocateMemory(Device, &AllocInfo, nullptr, &Memory)) + return llvm::createStringError(std::errc::not_enough_memory, + "Memory allocation failed."); + return Memory; + } + llvm::Expected createImage(Resource &R, BufferRef &Host, int UsageOverride = 0) { const offloadtest::CPUBuffer &B = *R.BufferPtr; if (B.Format == DataFormat::Depth32 && R.isReadWrite()) - return llvm::createStringError(std::errc::invalid_argument, - "Image memory allocation failed."); + return llvm::createStringError( + std::errc::invalid_argument, + "Depth32 format is not supported for read-write resources."); VkImageCreateInfo ImageCreateInfo = {}; ImageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; ImageCreateInfo.imageType = getVKImageType(R.Kind); @@ -1016,6 +1078,19 @@ class VulkanDevice : public offloadtest::Device { return ResourceRef(Host, ImageRef{0, Sampler, 0}); } + VkDeviceSize getCopySize(Resource &R) { + VkDeviceSize CopySize = R.size(); + if (R.IsReserved) { + const VkDeviceSize MappedSize = + static_cast( + R.TilesMapped.value_or(getNumTiles(R.size()))) * + SparseBufferTileSize; + if (CopySize > MappedSize) + CopySize = MappedSize; + } + return CopySize; + } + llvm::Error createResource(Resource &R, InvocationState &IS) { // Samplers don't have backing data buffers, so handle them separately if (R.isSampler()) { @@ -1029,6 +1104,11 @@ class VulkanDevice : public offloadtest::Device { return llvm::Error::success(); } + if (!R.BufferPtr) + return llvm::createStringError(std::errc::invalid_argument, + "Resource '%s' has no backing buffer.", + R.Name.c_str()); + ResourceBundle Bundle{getDescriptorType(R.Kind), R.size(), R.BufferPtr}; for (auto &ResData : R.BufferPtr->Data) { auto ExHostBuf = createBuffer( @@ -1054,14 +1134,22 @@ class VulkanDevice : public offloadtest::Device { Bundle.ResourceRefs.push_back(*ExImageRef); } else { - auto ExDeviceBuf = createBuffer( - getFlagBits(R.Kind) | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | - VK_BUFFER_USAGE_TRANSFER_DST_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, R.size()); + llvm::Expected ExDeviceBuf = + R.IsReserved + ? createSparseBuffer( + getFlagBits(R.Kind) | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, R.size(), + R.TilesMapped.value_or(getNumTiles(R.size()))) + : createBuffer(getFlagBits(R.Kind) | + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, R.size()); if (!ExDeviceBuf) return ExDeviceBuf.takeError(); + VkBufferCopy Copy = {}; - Copy.size = R.size(); + Copy.size = getCopySize(R); vkCmdCopyBuffer(IS.CmdBuffer, ExHostBuf->Buffer, ExDeviceBuf->Buffer, 1, &Copy); Bundle.ResourceRefs.emplace_back(*ExHostBuf, *ExDeviceBuf); @@ -1094,6 +1182,106 @@ class VulkanDevice : public offloadtest::Device { return llvm::Error::success(); } + llvm::Expected + createSparseBuffer(VkBufferUsageFlags Usage, + VkMemoryPropertyFlags MemoryFlags, size_t Size, + uint32_t TilesMapped) { + if (SparseQueue.Queue == VK_NULL_HANDLE) + return llvm::createStringError( + std::errc::not_supported, + "Sparse binding is not supported by the Vulkan device."); + + auto ExBuffer = createVkBuffer(Device, Size, Usage, + VK_BUFFER_CREATE_SPARSE_BINDING_BIT | + VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT); + if (!ExBuffer) + return ExBuffer.takeError(); + VkBuffer Buffer = *ExBuffer; + VkDeviceMemory Memory = VK_NULL_HANDLE; + bool Success = false; + auto Cleanup = llvm::scope_exit([&]() { + if (!Success) { + if (Memory != VK_NULL_HANDLE) + vkFreeMemory(Device, Memory, nullptr); + vkDestroyBuffer(Device, Buffer, nullptr); + } + }); + + VkMemoryRequirements MemReqs; + vkGetBufferMemoryRequirements(Device, Buffer, &MemReqs); + + if (SparseBufferTileSize % MemReqs.alignment != 0) + return llvm::createStringError( + std::errc::not_supported, + "Sparse buffer alignment must be a factor of 64KB."); + + if (TilesMapped == 0) + return llvm::createStringError(std::errc::invalid_argument, + "TilesMapped must be at least 1."); + + // Calculate size for the mapped region (TilesMapped * SparseBufferTileSize) + VkDeviceSize MappedSize = TilesMapped * SparseBufferTileSize; + + if (MappedSize > MemReqs.size) + MappedSize = MemReqs.size; + + llvm::Expected MemIdx = + getMemoryIndex(PhysicalDevice, MemReqs.memoryTypeBits, MemoryFlags); + if (!MemIdx) + return MemIdx.takeError(); + + auto ExMemory = allocateMemory(Device, MappedSize, *MemIdx); + if (!ExMemory) + return ExMemory.takeError(); + Memory = *ExMemory; + + // Bind the allocated memory to the start of the buffer + VkSparseMemoryBind Bind = {}; + Bind.resourceOffset = 0; + Bind.size = MappedSize; + Bind.memory = Memory; + Bind.memoryOffset = 0; + Bind.flags = 0; + + VkSparseBufferMemoryBindInfo BufferBindInfo = {}; + BufferBindInfo.buffer = Buffer; + BufferBindInfo.bindCount = 1; + BufferBindInfo.pBinds = &Bind; + + VkBindSparseInfo BindInfo = {}; + BindInfo.sType = VK_STRUCTURE_TYPE_BIND_SPARSE_INFO; + BindInfo.bufferBindCount = 1; + BindInfo.pBufferBinds = &BufferBindInfo; + + // Use a fence to ensure binding is complete before use, though for simple + // cases strict ordering might suffice if on same queue. Ideally we should + // wait, but here we just submit. IS.Queue is used for commands. Note: + // vkQueueBindSparse requires the queue to support SPARSE_BINDING. + VkFence Fence; + VkFenceCreateInfo FenceInfo = {}; + FenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + if (vkCreateFence(Device, &FenceInfo, nullptr, &Fence)) + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed to create fence for sparse bind"); + + auto CleanFence = + llvm::scope_exit([&]() { vkDestroyFence(Device, Fence, nullptr); }); + + if (vkQueueBindSparse(SparseQueue.Queue, 1, &BindInfo, Fence) != + VK_SUCCESS) { + return llvm::createStringError(std::errc::io_error, + "vkQueueBindSparse failed"); + } + + if (vkWaitForFences(Device, 1, &Fence, VK_TRUE, UINT64_MAX) != VK_SUCCESS) { + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed to wait for sparse bind fence"); + } + + Success = true; + return BufferRef{Buffer, Memory}; + } + llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) { // Create an optimal image used as the depth stencil attachment VkImageCreateInfo ImageCi = {}; @@ -2386,6 +2574,7 @@ class VulkanDevice : public offloadtest::Device { vkDestroyCommandPool(Device, IS.CmdPool, nullptr); } +public: llvm::Error executeProgram(Pipeline &P) override { InvocationState State; auto CleanupState = llvm::scope_exit([&]() { diff --git a/lib/API/VK/VKFeatures.def b/lib/API/VK/VKFeatures.def index 7c5f2c427..12a78923d 100644 --- a/lib/API/VK/VKFeatures.def +++ b/lib/API/VK/VKFeatures.def @@ -58,6 +58,15 @@ VULKAN_FEATURE_BOOL(inheritedQueries) #undef VULKAN_FEATURE_BOOL #endif +#ifdef VULKAN_SPARSE_PROPERTY_BOOL +VULKAN_SPARSE_PROPERTY_BOOL(residencyStandard2DBlockShape) +VULKAN_SPARSE_PROPERTY_BOOL(residencyStandard2DMultisampleBlockShape) +VULKAN_SPARSE_PROPERTY_BOOL(residencyStandard3DBlockShape) +VULKAN_SPARSE_PROPERTY_BOOL(residencyAlignedMipSize) +VULKAN_SPARSE_PROPERTY_BOOL(residencyNonResidentStrict) +#undef VULKAN_SPARSE_PROPERTY_BOOL +#endif + #ifdef VULKAN_FLOAT_CONTROLS_FEATURE_BOOL VULKAN_FLOAT_CONTROLS_FEATURE_BOOL(shaderSignedZeroInfNanPreserveFloat16) VULKAN_FLOAT_CONTROLS_FEATURE_BOOL(shaderSignedZeroInfNanPreserveFloat32) diff --git a/test/Feature/Sparse/SparseByteAddressBuffer.test b/test/Feature/Sparse/SparseByteAddressBuffer.test new file mode 100644 index 000000000..cb033936e --- /dev/null +++ b/test/Feature/Sparse/SparseByteAddressBuffer.test @@ -0,0 +1,78 @@ +#--- source.hlsl +RWByteAddressBuffer X : register(u0); +RWStructuredBuffer Out : register(u1); + +[numthreads(1,1,1)] +void main() { + // Index 0: Offset 0. + Out[0] = X.Load(0); + + // Offset 128000: Second tile. + Out[1] = X.Load(128000); +} + +//--- pipeline.yaml +--- + +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: X + Format: Int32 + Stride: 4 + FillSize: 131072 + FillValue: 9001 + - Name: Out + Format: Int32 + Stride: 4 + FillSize: 8 + FillValue: 0 + - Name: ExpectedOut + Format: Int32 + Stride: 4 + Data: [9001, 0] + +Results: + - Result: Test + Rule: BufferExact + Actual: Out + Expected: ExpectedOut + +DescriptorSets: + - Resources: + - Name: X + Kind: RWByteAddressBuffer + IsReserved: true + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + TilesMapped: 1 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +#--- end + +# REQUIRES: !Vulkan || sparseBinding +# REQUIRES: !Vulkan || sparseResidencyBuffer + +# XFAIL: Vulkan && Clang + +# Unimplemented: https://github.com/llvm/offload-test-suite/issues/515 +# XFAIL: Metal + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s + +# CHECK: - Name: Out +# CHECK-NEXT: Format: Int32 +# CHECK-NEXT: Stride: 4 +# CHECK-NEXT: Data: [ 9001, 0 ] diff --git a/test/Feature/Sparse/SparseRWStructuredBuffer.test b/test/Feature/Sparse/SparseRWStructuredBuffer.test new file mode 100644 index 000000000..9bf3be10b --- /dev/null +++ b/test/Feature/Sparse/SparseRWStructuredBuffer.test @@ -0,0 +1,79 @@ +#--- source.hlsl +RWStructuredBuffer X : register(u0); +RWStructuredBuffer Out : register(u1); + +[numthreads(1,1,1)] +void main() { + // Index 0: Guaranteed to be in the first mapped tile. + Out[0] = X[0]; + + // Index 32000: 32000 * 4 bytes = 128,000 bytes. + // If tile size is 64KB (65536 bytes), this is in the second tile (offset 65536+). + // This should return 0 because it is unmapped. + Out[1] = X[32000]; +} + +//--- pipeline.yaml +--- + +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: X + Format: Int32 + Stride: 4 + FillSize: 131072 # 128KB total bytes (32768 elements) + FillValue: 9001 + - Name: Out + Format: Int32 + Stride: 4 + FillSize: 8 + FillValue: 0 + - Name: ExpectedOut + Format: Int32 + Stride: 4 + Data: [9001, 0] + +Results: + - Result: Test + Rule: BufferExact + Actual: Out + Expected: ExpectedOut + +DescriptorSets: + - Resources: + - Name: X + Kind: RWStructuredBuffer + IsReserved: true + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + TilesMapped: 1 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +#--- end + +# REQUIRES: !Vulkan || sparseBinding +# REQUIRES: !Vulkan || sparseResidencyBuffer +# REQUIRES: !Vulkan || residencyNonResidentStrict + +# Unimplemented: https://github.com/llvm/offload-test-suite/issues/515 +# XFAIL: Metal + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s + +# CHECK: - Name: Out +# CHECK-NEXT: Format: Int32 +# CHECK-NEXT: Stride: 4 +# CHECK-NEXT: Data: [ 9001, 0 ] diff --git a/test/Feature/Sparse/SparseStructuredBuffer.test b/test/Feature/Sparse/SparseStructuredBuffer.test new file mode 100644 index 000000000..be88ce5b5 --- /dev/null +++ b/test/Feature/Sparse/SparseStructuredBuffer.test @@ -0,0 +1,86 @@ +#--- source.hlsl +RWBuffer X : register(u0); +RWStructuredBuffer Out : register(u1); + +[numthreads(1,1,1)] +void main() { + // Index 0: Guaranteed to be in the first mapped tile. + Out[0] = X[0]; + + // Index 32000: 32000 * 4 bytes = 128,000 bytes. + // If tile size is 64KB (65536 bytes), this is in the second tile (offset 65536+). + // This should return 0 since it is unmapped. + Out[1] = X[32000]; +} + +//--- pipeline.yaml +--- + +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: X + Format: Int32 + Stride: 4 + FillSize: 131072 # 128KB total bytes (32768 elements) + FillValue: 9001 + - Name: Out + Format: Int32 + Stride: 4 + FillSize: 8 + FillValue: 0 + - Name: ExpectedOut + Format: Int32 + Stride: 4 + Data: [9001, 0] + +Results: + - Result: Test + Rule: BufferExact + Actual: Out + Expected: ExpectedOut + +DescriptorSets: + - Resources: + - Name: X + Kind: RWBuffer + IsReserved: true + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + TilesMapped: 1 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +#--- end + +# REQUIRES: !Vulkan || sparseBinding +# REQUIRES: !Vulkan || sparseResidencyBuffer +# REQUIRES: !Vulkan || residencyNonResidentStrict + +# Unimplemented: https://github.com/llvm/offload-test-suite/issues/515 +# XFAIL: Metal + +# RUN: split-file %s %t + +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl + +# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s + + + +# CHECK: - Name: Out + +# CHECK-NEXT: Format: Int32 + +# CHECK-NEXT: Stride: 4 + +# CHECK-NEXT: Data: [ 9001, 0 ] diff --git a/test/Feature/Sparse/SparseTypedLoad.test b/test/Feature/Sparse/SparseTypedLoad.test new file mode 100644 index 000000000..09669dccc --- /dev/null +++ b/test/Feature/Sparse/SparseTypedLoad.test @@ -0,0 +1,73 @@ +#--- source.hlsl +Buffer X : register(t0); + +RWStructuredBuffer Out : register(u1); + +[numthreads(1,1,1)] +void main() { + int4 Result; + + // Index 0: Tile 0 of X. Mapped. + Result = X.Load(0); + Out[0] = Result.x; + + // Index 5000: Tile 1 of X. Unmapped. + Result = X.Load(5000); + Out[1] = Result.x; +} + +//--- pipeline.yaml +--- + +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: X + Format: Int32 + Channels: 4 + FillSize: 131072 # 128KB + FillValue: 9001 + - Name: Out + Format: Int32 + Stride: 4 + FillSize: 8 +DescriptorSets: + - Resources: + - Name: X + Kind: Buffer + IsReserved: true + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + TilesMapped: 1 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +#--- end + +# REQUIRES: !Vulkan || sparseBinding +# REQUIRES: !Vulkan || sparseResidencyBuffer +# REQUIRES: !Vulkan || residencyNonResidentStrict + +# Unimplemented: https://github.com/llvm/offload-test-suite/issues/515 +# XFAIL: Metal + +# Bug: https://github.com/llvm/llvm-project/issues/191248 +# XFAIL: DirectX && Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s + +# CHECK: - Name: Out +# CHECK-NEXT: Format: Int32 +# CHECK-NEXT: Stride: 4 +# CHECK-NEXT: Data: [ 9001, 0 ] diff --git a/test/Feature/Sparse/SparseTypedLoadMapped.test b/test/Feature/Sparse/SparseTypedLoadMapped.test new file mode 100644 index 000000000..6c23abbaa --- /dev/null +++ b/test/Feature/Sparse/SparseTypedLoadMapped.test @@ -0,0 +1,64 @@ +#--- source.hlsl +Buffer X : register(t0); +RWStructuredBuffer Out : register(u1); + +[numthreads(1,1,1)] +void main() { + // Index 0: Tile 0. Mapped. + Out[0] = X.Load(0).x; + // Index 5000: Tile 1 (offset 80000). Mapped (TilesMapped: 2 = 128KB). + Out[1] = X.Load(5000).x; +} + +//--- pipeline.yaml +--- + +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: X + Format: Int32 + Channels: 4 + FillSize: 131072 + FillValue: 9001 + - Name: Out + Format: Int32 + Stride: 4 + FillSize: 8 +DescriptorSets: + - Resources: + - Name: X + Kind: Buffer + IsReserved: true + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + TilesMapped: 2 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 +#--- end + +# REQUIRES: !Vulkan || sparseBinding +# REQUIRES: !Vulkan || sparseResidencyBuffer + +# Bug: https://github.com/llvm/llvm-project/issues/191248 +# XFAIL: DirectX && Clang +# XFAIL: Metal && Clang + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s + +# CHECK: - Name: Out +# CHECK-NEXT: Format: Int32 +# CHECK-NEXT: Stride: 4 +# CHECK-NEXT: Data: [ 9001, 9001 ] diff --git a/test/Feature/Sparse/SparseTypedResidency.test b/test/Feature/Sparse/SparseTypedResidency.test new file mode 100644 index 000000000..1ee16ade8 --- /dev/null +++ b/test/Feature/Sparse/SparseTypedResidency.test @@ -0,0 +1,94 @@ +#--- source.hlsl +Buffer X : register(t0); + +RWStructuredBuffer Out : register(u1); +RWStructuredBuffer CAFM : register(u2); + +[numthreads(1,1,1)] +void main() { + uint status; + int4 Result; + + // Index 0: Tile 0 of X. Mapped. + Result = X.Load(0, status); + CAFM[0] = CheckAccessFullyMapped(status); + Out[0] = CAFM[0] ? Result.x : 9003; + + // Index 5000: Tile 1 of X. Unmapped. + Result = X.Load(5000, status); + CAFM[1] = CheckAccessFullyMapped(status); + Out[1] = CAFM[1] ? Result.x : 9003; +} + +//--- pipeline.yaml +--- + +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: X + Format: Int32 + Channels: 4 + FillSize: 131072 # 128KB + FillValue: 9001 + - Name: Out + Format: Int32 + Stride: 4 + FillSize: 8 + - Name: CAFM + Format: Int32 + Stride: 4 + FillSize: 8 +DescriptorSets: + - Resources: + - Name: X + Kind: Buffer + IsReserved: true + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 + TilesMapped: 1 + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 1 + Space: 0 + VulkanBinding: + Binding: 1 + - Name: CAFM + Kind: RWStructuredBuffer + DirectXBinding: + Register: 2 + Space: 0 + VulkanBinding: + Binding: 2 +#--- end + +# REQUIRES: !Vulkan || sparseBinding +# REQUIRES: !Vulkan || sparseResidencyBuffer +# REQUIRES: !Vulkan || shaderResourceResidency +# REQUIRES: !Vulkan || residencyNonResidentStrict + +# Unimplemented: https://github.com/llvm/llvm-project/issues/166954 +# XFAIL: Vulkan && Clang + +# Unimplemented: https://github.com/llvm/offload-test-suite/issues/515 +# XFAIL: Metal + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s + +# CHECK: - Name: Out +# CHECK-NEXT: Format: Int32 +# CHECK-NEXT: Stride: 4 +# CHECK-NEXT: Data: [ 9001, 9003 ] + +# CHECK: - Name: CAFM +# CHECK-NEXT: Format: Int32 +# CHECK-NEXT: Stride: 4 +# CHECK-NEXT: Data: [ 1, 0 ] diff --git a/test/lit.cfg.py b/test/lit.cfg.py index 35e8f2ba5..958c51993 100644 --- a/test/lit.cfg.py +++ b/test/lit.cfg.py @@ -165,6 +165,11 @@ def setDeviceFeatures(config, device, compiler): if device["Features"].get("shaderInt64", False): config.available_features.add("Int64") + # Add all boolean features from api-query + for FeatureName, FeatureValue in device["Features"].items(): + if isinstance(FeatureValue, bool) and FeatureValue: + config.available_features.add(FeatureName) + # Add supported extensions. for Extension in device["Extensions"]: config.available_features.add(Extension["ExtensionName"])