diff --git a/include/API/Buffer.h b/include/API/Buffer.h new file mode 100644 index 000000000..b150682fe --- /dev/null +++ b/include/API/Buffer.h @@ -0,0 +1,36 @@ +//===- Buffer.h - Offload API Buffer --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOADTEST_API_BUFFER_H +#define OFFLOADTEST_API_BUFFER_H + +#include "API/Resources.h" + +namespace offloadtest { + +struct BufferCreateDesc { + MemoryLocation Location; +}; + +class Buffer { +public: + virtual ~Buffer() = default; + + Buffer(const Buffer &) = delete; + Buffer &operator=(const Buffer &) = delete; + +protected: + Buffer() = default; +}; + +} // namespace offloadtest + +#endif // OFFLOADTEST_API_BUFFER_H diff --git a/include/API/Device.h b/include/API/Device.h index f99271233..0f21b703a 100644 --- a/include/API/Device.h +++ b/include/API/Device.h @@ -15,7 +15,10 @@ #include "Config.h" #include "API/API.h" +#include "API/Buffer.h" #include "API/Capabilities.h" +#include "API/Texture.h" +#include "Support/Pipeline.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" @@ -35,27 +38,6 @@ struct DeviceConfig { bool EnableValidationLayer = false; }; -enum class MemoryLocation { - GpuOnly, - CpuToGpu, - GpuToCpu, -}; - -struct BufferCreateDesc { - MemoryLocation Location; -}; - -class Buffer { -public: - virtual ~Buffer() = default; - - Buffer(const Buffer &) = delete; - Buffer &operator=(const Buffer &) = delete; - -protected: - Buffer() = default; -}; - class Queue { public: virtual ~Queue() = 0; @@ -80,6 +62,10 @@ class Device { virtual llvm::Expected> createBuffer(std::string Name, BufferCreateDesc &Desc, size_t SizeInBytes) = 0; + + virtual llvm::Expected> + createTexture(std::string Name, TextureCreateDesc &Desc) = 0; + virtual void printExtra(llvm::raw_ostream &OS) {} virtual ~Device() = 0; @@ -100,6 +86,16 @@ initializeMetalDevices(const DeviceConfig Config, llvm::Expected>> initializeDevices(const DeviceConfig Config); +// Creates a render target texture using the format and dimensions from a +// CPUBuffer. Does not upload the buffer's data — only uses its description to +// configure the texture. +llvm::Expected> +createRenderTargetFromCPUBuffer(Device &Dev, const CPUBuffer &Buf); + +// Creates a depth/stencil texture matching the dimensions of a render target. +llvm::Expected> +createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, uint32_t Height); + } // namespace offloadtest #endif // OFFLOADTEST_API_DEVICE_H diff --git a/include/API/FormatConversion.h b/include/API/FormatConversion.h new file mode 100644 index 000000000..277206ab9 --- /dev/null +++ b/include/API/FormatConversion.h @@ -0,0 +1,159 @@ +//===- FormatConversion.h - Bridge between DataFormat and Format -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Transitional helpers for converting between the legacy DataFormat + Channels +// description system and the unified Format enum. This file should be deleted +// once the pipeline is fully migrated to use Format directly. +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOADTEST_API_FORMATCONVERSION_H +#define OFFLOADTEST_API_FORMATCONVERSION_H + +#include "API/Resources.h" +#include "API/Texture.h" +#include "Support/Pipeline.h" + +#include "llvm/Support/Error.h" + +namespace offloadtest { + +// Bridge for code that still describes textures as DataFormat + Channels (e.g. +// render targets bound via CPUBuffer). Once the pipeline is refactored to use +// Format directly, this function can be removed. +inline llvm::Expected toFormat(DataFormat Format, int Channels) { + switch (Format) { + case DataFormat::Int16: + switch (Channels) { + case 1: + return Format::R16Sint; + case 2: + return Format::RG16Sint; + case 4: + return Format::RGBA16Sint; + } + break; + case DataFormat::UInt16: + switch (Channels) { + case 1: + return Format::R16Uint; + case 2: + return Format::RG16Uint; + case 4: + return Format::RGBA16Uint; + } + break; + case DataFormat::Int32: + switch (Channels) { + case 1: + return Format::R32Sint; + case 2: + return Format::RG32Sint; + case 4: + return Format::RGBA32Sint; + } + break; + case DataFormat::UInt32: + switch (Channels) { + case 1: + return Format::R32Uint; + case 2: + return Format::RG32Uint; + case 4: + return Format::RGBA32Uint; + } + break; + case DataFormat::Float32: + switch (Channels) { + case 1: + return Format::R32Float; + case 2: + return Format::RG32Float; + case 4: + return Format::RGBA32Float; + } + break; + case DataFormat::Depth32: + // D32FloatS8Uint is not expressible as DataFormat + Channels because the + // stencil component is uint8, not a second Depth32 channel. Once the + // pipeline uses Format directly, this limitation goes away. + if (Channels == 1) + return Format::D32Float; + break; + // No Format mapping for these DataFormats. + case DataFormat::Hex8: + case DataFormat::Hex16: + case DataFormat::Hex32: + case DataFormat::Hex64: + case DataFormat::UInt64: + case DataFormat::Int64: + case DataFormat::Float16: + case DataFormat::Float64: + case DataFormat::Bool: + return llvm::createStringError(std::errc::invalid_argument, + "DataFormat %d has no Format equivalent.", + static_cast(Format)); + } + return llvm::createStringError(std::errc::invalid_argument, + "No Format for DataFormat %d with %d " + "channel(s).", + static_cast(Format), Channels); +} + +// Validates that a TextureCreateDesc is consistent with the CPUBuffer it was +// derived from. Call this after building a TextureCreateDesc from a CPUBuffer +// to catch mismatches between the two description systems. +inline llvm::Error +validateTextureDescMatchesCPUBuffer(const TextureCreateDesc &Desc, + const CPUBuffer &Buf) { + auto ExpectedFmt = toFormat(Buf.Format, Buf.Channels); + if (!ExpectedFmt) + return ExpectedFmt.takeError(); + if (Desc.Format != *ExpectedFmt) + return llvm::createStringError( + std::errc::invalid_argument, + "TextureCreateDesc format '%s' does not match CPUBuffer format " + "(DataFormat %d, %d channels -> '%s').", + getFormatName(Desc.Format).data(), static_cast(Buf.Format), + Buf.Channels, getFormatName(*ExpectedFmt).data()); + if (Desc.Width != static_cast(Buf.OutputProps.Width)) + return llvm::createStringError( + std::errc::invalid_argument, + "TextureCreateDesc width %u does not match CPUBuffer width %d.", + Desc.Width, Buf.OutputProps.Width); + if (Desc.Height != static_cast(Buf.OutputProps.Height)) + return llvm::createStringError( + std::errc::invalid_argument, + "TextureCreateDesc height %u does not match CPUBuffer height %d.", + Desc.Height, Buf.OutputProps.Height); + if (Desc.MipLevels != static_cast(Buf.OutputProps.MipLevels)) + return llvm::createStringError( + std::errc::invalid_argument, + "TextureCreateDesc mip levels %u does not match CPUBuffer mip " + "levels %d.", + Desc.MipLevels, Buf.OutputProps.MipLevels); + const uint32_t TexelSize = getFormatSize(Desc.Format); + if (Buf.Stride > 0 && static_cast(Buf.Stride) != TexelSize) + return llvm::createStringError( + std::errc::invalid_argument, + "CPUBuffer stride %d does not match texture format element size %u.", + Buf.Stride, TexelSize); + const uint64_t ExpectedSize = + static_cast(Desc.Width) * Desc.Height * TexelSize; + if (static_cast(Buf.size()) != ExpectedSize) + return llvm::createStringError( + std::errc::invalid_argument, + "CPUBuffer size %u does not match expected size %llu " + "(width %u * height %u * element size %u).", + Buf.size(), ExpectedSize, Desc.Width, Desc.Height, TexelSize); + return llvm::Error::success(); +} + +} // namespace offloadtest + +#endif // OFFLOADTEST_API_FORMATCONVERSION_H diff --git a/include/API/Resources.h b/include/API/Resources.h new file mode 100644 index 000000000..13593d350 --- /dev/null +++ b/include/API/Resources.h @@ -0,0 +1,237 @@ +//===- Resources.h - Offload API shared resource types --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOADTEST_API_RESOURCES_H +#define OFFLOADTEST_API_RESOURCES_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" + +#include + +namespace offloadtest { + +enum class MemoryLocation { + GpuOnly, + CpuToGpu, + GpuToCpu, +}; + +// TODO: Add Unorm types (e.g. R8Unorm, RGBA8Unorm) which can be sampled as +// floats. +// TODO: Add SRGB types (e.g. RGBA8Srgb) once needed. +enum class Format { + R16Sint, + R16Uint, + RG16Sint, + RG16Uint, + RGBA16Sint, + RGBA16Uint, + R32Sint, + R32Uint, + R32Float, + RG32Sint, + RG32Uint, + RG32Float, + RGB32Float, + RGBA32Sint, + RGBA32Uint, + RGBA32Float, + D32Float, + D32FloatS8Uint, +}; + +inline llvm::StringRef getFormatName(Format Format) { + switch (Format) { + case Format::R16Sint: + return "R16Sint"; + case Format::R16Uint: + return "R16Uint"; + case Format::RG16Sint: + return "RG16Sint"; + case Format::RG16Uint: + return "RG16Uint"; + case Format::RGBA16Sint: + return "RGBA16Sint"; + case Format::RGBA16Uint: + return "RGBA16Uint"; + case Format::R32Sint: + return "R32Sint"; + case Format::R32Uint: + return "R32Uint"; + case Format::R32Float: + return "R32Float"; + case Format::RG32Sint: + return "RG32Sint"; + case Format::RG32Uint: + return "RG32Uint"; + case Format::RG32Float: + return "RG32Float"; + case Format::RGB32Float: + return "RGB32Float"; + case Format::RGBA32Sint: + return "RGBA32Sint"; + case Format::RGBA32Uint: + return "RGBA32Uint"; + case Format::RGBA32Float: + return "RGBA32Float"; + case Format::D32Float: + return "D32Float"; + case Format::D32FloatS8Uint: + return "D32FloatS8Uint"; + } + llvm_unreachable("All Format cases handled"); +} + +// Returns the size in bytes of a single texel/element for the given format. +inline uint32_t getFormatSize(Format Format) { + switch (Format) { + case Format::R16Sint: + case Format::R16Uint: + return 2; + case Format::RG16Sint: + case Format::RG16Uint: + case Format::R32Sint: + case Format::R32Uint: + case Format::R32Float: + case Format::D32Float: + return 4; + case Format::RGBA16Sint: + case Format::RGBA16Uint: + case Format::RG32Sint: + case Format::RG32Uint: + case Format::RG32Float: + case Format::D32FloatS8Uint: + return 8; + case Format::RGB32Float: + return 12; + case Format::RGBA32Sint: + case Format::RGBA32Uint: + case Format::RGBA32Float: + return 16; + } + llvm_unreachable("All Format cases handled"); +} + +inline bool isDepthFormat(Format Format) { + switch (Format) { + case Format::R16Sint: + case Format::R16Uint: + case Format::RG16Sint: + case Format::RG16Uint: + case Format::R32Sint: + case Format::R32Uint: + case Format::R32Float: + case Format::RGBA16Sint: + case Format::RGBA16Uint: + case Format::RG32Sint: + case Format::RG32Uint: + case Format::RG32Float: + case Format::RGB32Float: + case Format::RGBA32Sint: + case Format::RGBA32Uint: + case Format::RGBA32Float: + return false; + case Format::D32Float: + case Format::D32FloatS8Uint: + return true; + } + llvm_unreachable("All Format cases handled"); +} + +// Returns true if the format can be used as a texture pixel format across all +// backends. Formats like RGB32Float are valid for vertex attributes but have no +// pixel format equivalent on some APIs (e.g. Metal). +inline bool isTextureCompatible(Format Format) { + switch (Format) { + case Format::RGB32Float: + return false; + case Format::R16Sint: + case Format::R16Uint: + case Format::RG16Sint: + case Format::RG16Uint: + case Format::RGBA16Sint: + case Format::RGBA16Uint: + case Format::R32Sint: + case Format::R32Uint: + case Format::R32Float: + case Format::RG32Sint: + case Format::RG32Uint: + case Format::RG32Float: + case Format::RGBA32Sint: + case Format::RGBA32Uint: + case Format::RGBA32Float: + case Format::D32Float: + case Format::D32FloatS8Uint: + return true; + } + llvm_unreachable("All Format cases handled"); +} + +// Returns true if the format can be used as a vertex attribute. +inline bool isVertexCompatible(Format Format) { + switch (Format) { + case Format::R16Sint: + case Format::R16Uint: + case Format::RG16Sint: + case Format::RG16Uint: + case Format::RGBA16Sint: + case Format::RGBA16Uint: + case Format::R32Sint: + case Format::R32Uint: + case Format::R32Float: + case Format::RG32Sint: + case Format::RG32Uint: + case Format::RG32Float: + case Format::RGB32Float: + case Format::RGBA32Sint: + case Format::RGBA32Uint: + case Format::RGBA32Float: + return true; + case Format::D32Float: + case Format::D32FloatS8Uint: + return false; + } + llvm_unreachable("All Format cases handled"); +} + +// Returns true if the format can be used as a BLAS position attribute for +// raytracing acceleration structure builds. Only a small subset of floating +// point formats are supported across DX12, Vulkan, and Metal. +inline bool isPositionCompatible(Format Format) { + switch (Format) { + case Format::RG32Float: + case Format::RGB32Float: + case Format::RGBA32Float: + return true; + case Format::R16Sint: + case Format::R16Uint: + case Format::RG16Sint: + case Format::RG16Uint: + case Format::RGBA16Sint: + case Format::RGBA16Uint: + case Format::R32Sint: + case Format::R32Uint: + case Format::R32Float: + case Format::RG32Sint: + case Format::RG32Uint: + case Format::RGBA32Sint: + case Format::RGBA32Uint: + case Format::D32Float: + case Format::D32FloatS8Uint: + return false; + } + llvm_unreachable("All Format cases handled"); +} + +} // namespace offloadtest + +#endif // OFFLOADTEST_API_RESOURCES_H diff --git a/include/API/Texture.h b/include/API/Texture.h new file mode 100644 index 000000000..bb4ee2b49 --- /dev/null +++ b/include/API/Texture.h @@ -0,0 +1,155 @@ +//===- Texture.h - Offload API Texture ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOADTEST_API_TEXTURE_H +#define OFFLOADTEST_API_TEXTURE_H + +#include "API/Resources.h" + +#include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" + +#include +#include +#include +#include + +namespace offloadtest { + +enum TextureUsage : uint32_t { + Sampled = 1 << 0, + Storage = 1 << 1, + RenderTarget = 1 << 2, + DepthStencil = 1 << 3, + LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ DepthStencil) +}; + +inline std::string getTextureUsageName(TextureUsage Usage) { + std::string Result; + if ((Usage & Sampled) != 0) + Result += "Sampled|"; + if ((Usage & Storage) != 0) + Result += "Storage|"; + if ((Usage & RenderTarget) != 0) + Result += "RenderTarget|"; + if ((Usage & DepthStencil) != 0) + Result += "DepthStencil|"; + if (!Result.empty()) + Result.pop_back(); // Remove trailing '|' + return Result; +} + +struct ClearColor { + float R = 0.0f, G = 0.0f, B = 0.0f, A = 0.0f; +}; + +struct ClearDepthStencil { + float Depth = 1.0f; + uint8_t Stencil = 0; +}; + +using ClearValue = std::variant; + +// TODO: Currently only 2D textures are supported. When expanding to 1D, 3D, +// cube, or array textures, add a TextureType enum and validation between usage +// and type (e.g. 3D textures cannot be used as DepthStencil). +struct TextureCreateDesc { + MemoryLocation Location; + TextureUsage Usage; + Format Format; + uint32_t Width; + uint32_t Height; + uint32_t MipLevels; + // Clear value for render target or depth/stencil textures. + // How and when this is applied depends on the backend: + // - DX uses it as an optimized clear hint at resource creation time + // - VK and MTL apply it at render pass begin + std::optional OptimizedClearValue; +}; + +inline llvm::Error validateTextureCreateDesc(const TextureCreateDesc &Desc) { + if (!isTextureCompatible(Desc.Format)) + return llvm::createStringError( + std::errc::invalid_argument, + "Format '%s' is not compatible with texture creation.", + getFormatName(Desc.Format).data()); + + const bool IsDepth = isDepthFormat(Desc.Format); + const bool IsRT = (Desc.Usage & TextureUsage::RenderTarget) != 0; + const bool IsDS = (Desc.Usage & TextureUsage::DepthStencil) != 0; + + // DepthStencil + RenderTarget is not supported. + if (IsDS && IsRT) + return llvm::createStringError( + std::errc::invalid_argument, + "DepthStencil and RenderTarget are mutually exclusive."); + // DepthStencil + Storage is a valid but discouraged configuration (poor + // performance on most hardware). Not supported for now. + if (IsDS && (Desc.Usage & TextureUsage::Storage) != 0) + return llvm::createStringError( + std::errc::not_supported, + "DepthStencil combined with Storage is not yet supported."); + + // Depth formats require DepthStencil usage; non-depth formats forbid it. + if (IsDepth && !IsDS) + return llvm::createStringError( + std::errc::invalid_argument, + "Depth format '%s' requires DepthStencil usage.", + getFormatName(Desc.Format).data()); + if (!IsDepth && IsDS) + return llvm::createStringError( + std::errc::invalid_argument, + "DepthStencil usage requires a depth format, got '%s'.", + getFormatName(Desc.Format).data()); + + // Render targets and depth/stencil textures only support a single mip level. + if ((IsRT || IsDS) && Desc.MipLevels != 1) + return llvm::createStringError( + std::errc::not_supported, + "Multiple mip levels are not supported for render target or " + "depth/stencil textures."); + + // A clear value requires RenderTarget or DepthStencil usage, and the + // variant must match. + if (Desc.OptimizedClearValue) { + if (!IsRT && !IsDS) + return llvm::createStringError( + std::errc::invalid_argument, + "OptimizedClearValue requires RenderTarget or DepthStencil usage."); + if (IsRT && !std::holds_alternative(*Desc.OptimizedClearValue)) + return llvm::createStringError( + std::errc::invalid_argument, + "RenderTarget usage requires a ClearColor clear value."); + if (IsDS && + !std::holds_alternative(*Desc.OptimizedClearValue)) + return llvm::createStringError( + std::errc::invalid_argument, + "DepthStencil usage requires a ClearDepthStencil clear value."); + } + + return llvm::Error::success(); +} + +class Texture { +public: + virtual ~Texture() = default; + + Texture(const Texture &) = delete; + Texture &operator=(const Texture &) = delete; + +protected: + Texture() = default; +}; + +} // namespace offloadtest + +#endif // OFFLOADTEST_API_TEXTURE_H diff --git a/lib/API/DX/DXResources.h b/lib/API/DX/DXResources.h new file mode 100644 index 000000000..3aa6f35e1 --- /dev/null +++ b/lib/API/DX/DXResources.h @@ -0,0 +1,89 @@ +//===- DXResources.h - DirectX Resource Helpers ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOADTEST_API_DXRESOURCES_H +#define OFFLOADTEST_API_DXRESOURCES_H + +#include "API/Device.h" + +#include +#include + +namespace offloadtest { + +inline D3D12_HEAP_TYPE getDXHeapType(MemoryLocation Location) { + switch (Location) { + case MemoryLocation::GpuOnly: + return D3D12_HEAP_TYPE_DEFAULT; + case MemoryLocation::CpuToGpu: + return D3D12_HEAP_TYPE_UPLOAD; + case MemoryLocation::GpuToCpu: + return D3D12_HEAP_TYPE_READBACK; + } + llvm_unreachable("All MemoryLocation cases handled"); +} + +inline DXGI_FORMAT getDXGIFormat(Format Format) { + switch (Format) { + case Format::R16Sint: + return DXGI_FORMAT_R16_SINT; + case Format::R16Uint: + return DXGI_FORMAT_R16_UINT; + case Format::RG16Sint: + return DXGI_FORMAT_R16G16_SINT; + case Format::RG16Uint: + return DXGI_FORMAT_R16G16_UINT; + case Format::RGBA16Sint: + return DXGI_FORMAT_R16G16B16A16_SINT; + case Format::RGBA16Uint: + return DXGI_FORMAT_R16G16B16A16_UINT; + case Format::R32Sint: + return DXGI_FORMAT_R32_SINT; + case Format::R32Uint: + return DXGI_FORMAT_R32_UINT; + case Format::R32Float: + return DXGI_FORMAT_R32_FLOAT; + case Format::RG32Sint: + return DXGI_FORMAT_R32G32_SINT; + case Format::RG32Uint: + return DXGI_FORMAT_R32G32_UINT; + case Format::RG32Float: + return DXGI_FORMAT_R32G32_FLOAT; + case Format::RGB32Float: + return DXGI_FORMAT_R32G32B32_FLOAT; + case Format::RGBA32Sint: + return DXGI_FORMAT_R32G32B32A32_SINT; + case Format::RGBA32Uint: + return DXGI_FORMAT_R32G32B32A32_UINT; + case Format::RGBA32Float: + return DXGI_FORMAT_R32G32B32A32_FLOAT; + case Format::D32Float: + return DXGI_FORMAT_D32_FLOAT; + case Format::D32FloatS8Uint: + return DXGI_FORMAT_D32_FLOAT_S8X24_UINT; + } + llvm_unreachable("All Format cases handled"); +} + +inline D3D12_RESOURCE_FLAGS getDXResourceFlags(TextureUsage Usage) { + D3D12_RESOURCE_FLAGS Flags = D3D12_RESOURCE_FLAG_NONE; + if ((Usage & TextureUsage::Storage) != 0) + Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + if ((Usage & TextureUsage::RenderTarget) != 0) + Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + if ((Usage & TextureUsage::DepthStencil) != 0) + Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + return Flags; +} + +} // namespace offloadtest + +#endif // OFFLOADTEST_API_DXRESOURCES_H diff --git a/lib/API/DX/Device.cpp b/lib/API/DX/Device.cpp index 2eec8b777..2049c9fca 100644 --- a/lib/API/DX/Device.cpp +++ b/lib/API/DX/Device.cpp @@ -37,6 +37,8 @@ #include "Support/Pipeline.h" #include "Support/WinError.h" +#include "DXResources.h" + #include "llvm/ADT/SmallVector.h" #include "llvm/Object/DXContainer.h" #include "llvm/Support/Error.h" @@ -293,6 +295,27 @@ class DXBuffer : public offloadtest::Buffer { : Buffer(Buffer), Name(Name), Desc(Desc), SizeInBytes(SizeInBytes) {} }; +class DXTexture : public offloadtest::Texture { +public: + ComPtr Resource; + // TODO: + // RTV/DSV views own a dedicated single-descriptor heap and are created at + // texture creation time. Ideally SRV/UAV views would also live here, but + // they currently require a shared CBV_SRV_UAV heap whose indices are + // determined at pipeline bind time. Moving them here would require a + // descriptor heap allocator, which is not yet implemented. + // + // Either an RTV or DSV descriptor, depending on Desc.Usage. + ComPtr ViewHeap; + D3D12_CPU_DESCRIPTOR_HANDLE ViewHandle = {}; + std::string Name; + TextureCreateDesc Desc; + + DXTexture(ComPtr Resource, llvm::StringRef Name, + TextureCreateDesc Desc) + : Resource(Resource), Name(Name), Desc(Desc) {} +}; + class DXQueue : public offloadtest::Queue { public: ComPtr Queue; @@ -354,9 +377,9 @@ class DXDevice : public offloadtest::Device { #endif // Resources for graphics pipelines. - ComPtr RT; - ComPtr RTReadback; - ComPtr RTVHeap; + std::shared_ptr RT; + std::shared_ptr RTReadback; + std::shared_ptr DS; ComPtr VB; llvm::SmallVector DescTables; @@ -381,38 +404,116 @@ class DXDevice : public offloadtest::Device { llvm::Expected> createBuffer(std::string Name, BufferCreateDesc &Desc, size_t SizeInBytes) override { + const D3D12_HEAP_TYPE HeapType = getDXHeapType(Desc.Location); - D3D12_HEAP_TYPE HeapType = D3D12_HEAP_TYPE_DEFAULT; - switch (Desc.Location) { - case MemoryLocation::GpuOnly: - HeapType = D3D12_HEAP_TYPE_DEFAULT; - break; - case MemoryLocation::CpuToGpu: - HeapType = D3D12_HEAP_TYPE_UPLOAD; - break; - case MemoryLocation::GpuToCpu: - HeapType = D3D12_HEAP_TYPE_READBACK; - break; - } - + // Readback heaps do not support UAV access. const D3D12_RESOURCE_FLAGS Flags = - D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + HeapType == D3D12_HEAP_TYPE_READBACK + ? D3D12_RESOURCE_FLAG_NONE + : D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(HeapType); const D3D12_RESOURCE_DESC BufferDesc = CD3DX12_RESOURCE_DESC::Buffer(SizeInBytes, Flags); + D3D12_RESOURCE_STATES InitialState = D3D12_RESOURCE_STATE_COMMON; + if (HeapType == D3D12_HEAP_TYPE_UPLOAD) + InitialState = D3D12_RESOURCE_STATE_GENERIC_READ; + else if (HeapType == D3D12_HEAP_TYPE_READBACK) + InitialState = D3D12_RESOURCE_STATE_COPY_DEST; + ComPtr DeviceBuffer; if (auto Err = HR::toError(Device->CreateCommittedResource( &HeapProps, D3D12_HEAP_FLAG_NONE, - &BufferDesc, D3D12_RESOURCE_STATE_COMMON, - nullptr, IID_PPV_ARGS(&DeviceBuffer)), + &BufferDesc, InitialState, nullptr, + IID_PPV_ARGS(&DeviceBuffer)), "Failed to create buffer.")) return Err; return std::make_shared(DeviceBuffer, Name, Desc, SizeInBytes); } + llvm::Expected> + createTexture(std::string Name, TextureCreateDesc &Desc) override { + if (auto Err = validateTextureCreateDesc(Desc)) + return Err; + + const D3D12_HEAP_PROPERTIES HeapProps = + CD3DX12_HEAP_PROPERTIES(getDXHeapType(Desc.Location)); + + D3D12_RESOURCE_DESC TexDesc = {}; + TexDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + TexDesc.Width = Desc.Width; + TexDesc.Height = Desc.Height; + TexDesc.DepthOrArraySize = 1; + TexDesc.MipLevels = static_cast(Desc.MipLevels); + TexDesc.Format = getDXGIFormat(Desc.Format); + TexDesc.SampleDesc.Count = 1; + TexDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + TexDesc.Flags = getDXResourceFlags(Desc.Usage); + + const D3D12_CLEAR_VALUE *ClearValuePtr = nullptr; + D3D12_CLEAR_VALUE ClearValue = {}; + if (Desc.OptimizedClearValue) { + ClearValue.Format = TexDesc.Format; + std::visit( + [&ClearValue](auto &&V) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + ClearValue.Color[0] = V.R; + ClearValue.Color[1] = V.G; + ClearValue.Color[2] = V.B; + ClearValue.Color[3] = V.A; + } else { + ClearValue.DepthStencil.Depth = V.Depth; + ClearValue.DepthStencil.Stencil = V.Stencil; + } + }, + *Desc.OptimizedClearValue); + ClearValuePtr = &ClearValue; + } + + D3D12_RESOURCE_STATES InitialState = D3D12_RESOURCE_STATE_COMMON; + if ((Desc.Usage & TextureUsage::RenderTarget) != 0) + InitialState = D3D12_RESOURCE_STATE_RENDER_TARGET; + else if ((Desc.Usage & TextureUsage::DepthStencil) != 0) + InitialState = D3D12_RESOURCE_STATE_DEPTH_WRITE; + + ComPtr DeviceTexture; + if (auto Err = HR::toError(Device->CreateCommittedResource( + &HeapProps, D3D12_HEAP_FLAG_NONE, &TexDesc, + InitialState, ClearValuePtr, + IID_PPV_ARGS(&DeviceTexture)), + "Failed to create texture.")) + return Err; + + auto Tex = std::make_shared(DeviceTexture, Name, Desc); + + const bool IsRT = (Desc.Usage & TextureUsage::RenderTarget) != 0; + const bool IsDS = (Desc.Usage & TextureUsage::DepthStencil) != 0; + if (IsRT || IsDS) { + D3D12_DESCRIPTOR_HEAP_DESC HeapDesc = {}; + HeapDesc.NumDescriptors = 1; + HeapDesc.Type = IsRT ? D3D12_DESCRIPTOR_HEAP_TYPE_RTV + : D3D12_DESCRIPTOR_HEAP_TYPE_DSV; + HeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + if (auto Err = HR::toError(Device->CreateDescriptorHeap( + &HeapDesc, IID_PPV_ARGS(&Tex->ViewHeap)), + IsRT ? "Failed to create RTV heap." + : "Failed to create DSV heap.")) + return Err; + Tex->ViewHandle = Tex->ViewHeap->GetCPUDescriptorHandleForHeapStart(); + if (IsRT) + Device->CreateRenderTargetView(DeviceTexture.Get(), nullptr, + Tex->ViewHandle); + else + Device->CreateDepthStencilView(DeviceTexture.Get(), nullptr, + Tex->ViewHandle); + } + + return Tex; + } + static llvm::Expected> create(ComPtr Adapter, const DeviceConfig &Config) { ComPtr Device; @@ -1361,7 +1462,7 @@ class DXDevice : public offloadtest::Device { return Err; // If there is no render target, return early. - if (IS.RTReadback == nullptr) + if (!IS.RTReadback) return llvm::Error::success(); // Map readback and copy into host buffer, accounting for row pitch and @@ -1369,13 +1470,13 @@ class DXDevice : public offloadtest::Device { // while our image writer expects bottom-left. const CPUBuffer &B = *P.Bindings.RTargetBufferPtr; void *Mapped = nullptr; - if (auto Err = HR::toError(IS.RTReadback->Map(0, nullptr, &Mapped), + if (auto Err = HR::toError(IS.RTReadback->Buffer->Map(0, nullptr, &Mapped), "Failed to map render target readback")) return Err; // Query the copy footprint to get the actual padded row pitch used by the // copy operation. - const D3D12_RESOURCE_DESC RTDesc = IS.RT->GetDesc(); + const D3D12_RESOURCE_DESC RTDesc = IS.RT->Resource->GetDesc(); D3D12_PLACED_SUBRESOURCE_FOOTPRINT Placed = {}; uint32_t NumRows = 0; uint64_t RowSizeInBytes = 0; @@ -1400,7 +1501,7 @@ class DXDevice : public offloadtest::Device { memcpy(DstRow, SrcRow, RowBytes); } - IS.RTReadback->Unmap(0, nullptr); + IS.RTReadback->Buffer->Unmap(0, nullptr); return llvm::Error::success(); } @@ -1410,51 +1511,31 @@ class DXDevice : public offloadtest::Device { std::errc::invalid_argument, "No render target bound for graphics pipeline."); const CPUBuffer &OutBuf = *P.Bindings.RTargetBufferPtr; - if (OutBuf.OutputProps.MipLevels != 1) - return llvm::createStringError( - std::errc::not_supported, - "Multiple mip levels are not yet supported for DirectX render " - "targets."); - D3D12_RESOURCE_DESC Desc = {}; - Desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; - Desc.Width = OutBuf.OutputProps.Width; - Desc.Height = OutBuf.OutputProps.Height; - Desc.DepthOrArraySize = 1; - Desc.MipLevels = 1; - Desc.Format = getDXFormat(OutBuf.Format, OutBuf.Channels); - Desc.SampleDesc.Count = 1; - Desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; - Desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; - D3D12_CLEAR_VALUE ClearValue = {}; - ClearValue.Format = Desc.Format; - ClearValue.Color[0] = 0.0f; - ClearValue.Color[1] = 0.0f; - ClearValue.Color[2] = 0.0f; - ClearValue.Color[3] = 0.0f; + auto TexOrErr = offloadtest::createRenderTargetFromCPUBuffer(*this, OutBuf); + if (!TexOrErr) + return TexOrErr.takeError(); - CD3DX12_HEAP_PROPERTIES HeapProps = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); - if (auto Err = HR::toError(Device->CreateCommittedResource( - &HeapProps, D3D12_HEAP_FLAG_NONE, &Desc, - D3D12_RESOURCE_STATE_RENDER_TARGET, - &ClearValue, IID_PPV_ARGS(&IS.RT)), - "Failed to create render target")) - return Err; + IS.RT = std::static_pointer_cast(*TexOrErr); // Create readback buffer sized for the pixel data (raw bytes). - const uint64_t RBSize = static_cast(OutBuf.size()); - D3D12_RESOURCE_DESC const RbDesc = CD3DX12_RESOURCE_DESC::Buffer(RBSize); - CD3DX12_HEAP_PROPERTIES RbHeap = - CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK); - if (auto Err = - HR::toError(Device->CreateCommittedResource( - &RbHeap, D3D12_HEAP_FLAG_NONE, &RbDesc, - D3D12_RESOURCE_STATE_COPY_DEST, nullptr, - IID_PPV_ARGS(&IS.RTReadback)), - "Failed to create render target readback buffer")) - return Err; + BufferCreateDesc BufDesc = {}; + BufDesc.Location = MemoryLocation::GpuToCpu; + auto BufOrErr = createBuffer("RTReadback", BufDesc, OutBuf.size()); + if (!BufOrErr) + return BufOrErr.takeError(); + IS.RTReadback = std::static_pointer_cast(*BufOrErr); + + return llvm::Error::success(); + } + llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) { + auto TexOrErr = offloadtest::createDefaultDepthStencilTarget( + *this, P.Bindings.RTargetBufferPtr->OutputProps.Width, + P.Bindings.RTargetBufferPtr->OutputProps.Height); + if (!TexOrErr) + return TexOrErr.takeError(); + IS.DS = std::static_pointer_cast(*TexOrErr); return llvm::Error::success(); } @@ -1533,8 +1614,11 @@ class DXDevice : public offloadtest::Device { PSODesc.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); PSODesc.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); - PSODesc.DepthStencilState.DepthEnable = false; + PSODesc.DepthStencilState.DepthEnable = true; + PSODesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + PSODesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS; PSODesc.DepthStencilState.StencilEnable = false; + PSODesc.DSVFormat = getDXGIFormat(IS.DS->Desc.Format); PSODesc.SampleMask = UINT_MAX; PSODesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; PSODesc.NumRenderTargets = 1; @@ -1551,21 +1635,6 @@ class DXDevice : public offloadtest::Device { } llvm::Error createGraphicsCommands(Pipeline &P, InvocationState &IS) { - // Create descriptor heap for the render target view. We do this later and - // separately from other descriptors just as a convenience since we need the - // descriptor handle to bind the render target. - D3D12_DESCRIPTOR_HEAP_DESC RTVHeapDesc = {}; - RTVHeapDesc.NumDescriptors = 1; - RTVHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; - RTVHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; - if (auto Err = HR::toError(Device->CreateDescriptorHeap( - &RTVHeapDesc, IID_PPV_ARGS(&IS.RTVHeap)), - "Failed to create RTV heap")) - return Err; - const D3D12_CPU_DESCRIPTOR_HANDLE RTVHandle = - IS.RTVHeap->GetCPUDescriptorHandleForHeapStart(); - Device->CreateRenderTargetView(IS.RT.Get(), nullptr, RTVHandle); - IS.CmdList->SetGraphicsRootSignature(IS.RootSig.Get()); if (IS.DescHeap) { ID3D12DescriptorHeap *const Heaps[] = {IS.DescHeap.Get()}; @@ -1575,7 +1644,18 @@ class DXDevice : public offloadtest::Device { } IS.CmdList->SetPipelineState(IS.PSO.Get()); - IS.CmdList->OMSetRenderTargets(1, &RTVHandle, false, nullptr); + IS.CmdList->OMSetRenderTargets(1, &IS.RT->ViewHandle, false, + &IS.DS->ViewHandle); + + const auto *DepthCV = + std::get_if(&*IS.DS->Desc.OptimizedClearValue); + if (!DepthCV) + return llvm::createStringError( + std::errc::invalid_argument, + "Depth/stencil clear value must be a ClearDepthStencil."); + IS.CmdList->ClearDepthStencilView( + IS.DS->ViewHandle, D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL, + DepthCV->Depth, DepthCV->Stencil, 0, nullptr); D3D12_VIEWPORT VP = {}; VP.Width = @@ -1596,7 +1676,7 @@ class DXDevice : public offloadtest::Device { // Transition the render target to copy source and copy to the readback // buffer. const D3D12_RESOURCE_BARRIER Barrier = CD3DX12_RESOURCE_BARRIER::Transition( - IS.RT.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, + IS.RT->Resource.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); IS.CmdList->ResourceBarrier(1, &Barrier); @@ -1606,8 +1686,9 @@ class DXDevice : public offloadtest::Device { CD3DX12_SUBRESOURCE_FOOTPRINT( getDXFormat(B.Format, B.Channels), B.OutputProps.Width, B.OutputProps.Height, 1, B.OutputProps.Width * B.getElementSize())}; - const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(IS.RTReadback.Get(), Footprint); - const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(IS.RT.Get(), 0); + const CD3DX12_TEXTURE_COPY_LOCATION DstLoc(IS.RTReadback->Buffer.Get(), + Footprint); + const CD3DX12_TEXTURE_COPY_LOCATION SrcLoc(IS.RT->Resource.Get(), 0); IS.CmdList->CopyTextureRegion(&DstLoc, 0, 0, 0, &SrcLoc, nullptr); @@ -1712,10 +1793,16 @@ class DXDevice : public offloadtest::Device { llvm::outs() << "Compute command list created.\n"; } else { - // Create render target, readback and vertex buffer and PSO. + // Create render target, depth/stencil, readback and vertex buffer and + // PSO. if (auto Err = createRenderTarget(P, State)) return Err; llvm::outs() << "Render target created.\n"; + // TODO: Always created for graphics pipelines. Consider making this + // conditional on the pipeline definition. + if (auto Err = createDepthStencil(P, State)) + return Err; + llvm::outs() << "Depth stencil created.\n"; if (auto Err = createVertexBuffer(P, State)) return Err; llvm::outs() << "Vertex buffer created.\n"; diff --git a/lib/API/Device.cpp b/lib/API/Device.cpp index fc1044884..6310a4fdb 100644 --- a/lib/API/Device.cpp +++ b/lib/API/Device.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "API/Device.h" +#include "API/FormatConversion.h" #include "Config.h" @@ -45,3 +46,40 @@ offloadtest::initializeDevices(const DeviceConfig Config) { return Devices; } + +llvm::Expected> +offloadtest::createRenderTargetFromCPUBuffer(Device &Dev, + const CPUBuffer &Buf) { + auto TexFmtOrErr = toFormat(Buf.Format, Buf.Channels); + if (!TexFmtOrErr) + return TexFmtOrErr.takeError(); + + TextureCreateDesc Desc = {}; + Desc.Location = MemoryLocation::GpuOnly; + Desc.Usage = TextureUsage::RenderTarget; + Desc.Format = *TexFmtOrErr; + Desc.Width = Buf.OutputProps.Width; + Desc.Height = Buf.OutputProps.Height; + Desc.MipLevels = 1; + Desc.OptimizedClearValue = ClearColor{}; + + if (auto Err = validateTextureDescMatchesCPUBuffer(Desc, Buf)) + return Err; + + return Dev.createTexture("RenderTarget", Desc); +} + +llvm::Expected> +offloadtest::createDefaultDepthStencilTarget(Device &Dev, uint32_t Width, + uint32_t Height) { + TextureCreateDesc Desc = {}; + Desc.Location = MemoryLocation::GpuOnly; + Desc.Usage = TextureUsage::DepthStencil; + Desc.Format = Format::D32FloatS8Uint; + Desc.Width = Width; + Desc.Height = Height; + Desc.MipLevels = 1; + Desc.OptimizedClearValue = ClearDepthStencil{1.0f, 0}; + + return Dev.createTexture("DepthStencil", Desc); +} diff --git a/lib/API/MTL/MTLDevice.cpp b/lib/API/MTL/MTLDevice.cpp index 0c26f7c7a..e03a3beb3 100644 --- a/lib/API/MTL/MTLDevice.cpp +++ b/lib/API/MTL/MTLDevice.cpp @@ -10,6 +10,7 @@ #include "metal_irconverter_runtime.h" #include "API/Device.h" +#include "MTLResources.h" #include "Support/Pipeline.h" #include "llvm/ADT/SmallString.h" @@ -100,6 +101,21 @@ class MTLBuffer : public offloadtest::Buffer { } }; +class MTLTexture : public offloadtest::Texture { +public: + MTL::Texture *Tex; + std::string Name; + TextureCreateDesc Desc; + + MTLTexture(MTL::Texture *Tex, llvm::StringRef Name, TextureCreateDesc Desc) + : Tex(Tex), Name(Name), Desc(Desc) {} + + ~MTLTexture() override { + if (Tex) + Tex->release(); + } +}; + class MTLDevice : public offloadtest::Device { Capabilities Caps; MTL::Device *Device; @@ -128,7 +144,9 @@ class MTLDevice : public offloadtest::Device { MTL::VertexDescriptor *VertexDescriptor; llvm::SmallVector Textures; llvm::SmallVector Buffers; - MTL::Texture *FrameBufferTexture = nullptr; + std::shared_ptr FrameBufferTexture; + std::shared_ptr FrameBufferReadback; + std::shared_ptr DepthStencil; MTL::CommandBuffer *CmdBuffer = nullptr; }; @@ -266,6 +284,12 @@ class MTLDevice : public offloadtest::Device { MTL::RenderPipelineColorAttachmentDescriptor::alloc()->init(); RPCA->setPixelFormat(PF); Desc->colorAttachments()->setObject(RPCA, 0); + + // Set the depth/stencil format on the pipeline descriptor. + const MTL::PixelFormat DepthFmt = + getMetalPixelFormat(Format::D32FloatS8Uint); + Desc->setDepthAttachmentPixelFormat(DepthFmt); + Desc->setStencilAttachmentPixelFormat(DepthFmt); } IS.RenderPipeline = Device->newRenderPipelineState(Desc, &Error); @@ -347,7 +371,7 @@ class MTLDevice : public offloadtest::Device { if (TableSize > 0) { IS.ArgBuffer = Device->newBuffer(TableSize, MTL::ResourceStorageModeManaged); - uint32_t HeapIndex = 0; + const uint32_t HeapIndex = 0; for (auto &D : P.Sets) { for (auto &R : D.Resources) { if (auto Err = createDescriptor(R, IS, HeapIndex++)) @@ -435,41 +459,107 @@ class MTLDevice : public offloadtest::Device { return llvm::Error::success(); } + llvm::Error createRenderTarget(Pipeline &P, InvocationState &IS) { + if (!P.Bindings.RTargetBufferPtr) + return llvm::createStringError( + std::errc::invalid_argument, + "No render target bound for graphics pipeline."); + const CPUBuffer &OutBuf = *P.Bindings.RTargetBufferPtr; + + auto TexOrErr = offloadtest::createRenderTargetFromCPUBuffer(*this, OutBuf); + if (!TexOrErr) + return TexOrErr.takeError(); + + IS.FrameBufferTexture = std::static_pointer_cast(*TexOrErr); + + // Create a readback buffer for copying render target data to the CPU. + BufferCreateDesc BufDesc = {}; + BufDesc.Location = MemoryLocation::GpuToCpu; + auto BufOrErr = createBuffer("RTReadback", BufDesc, OutBuf.size()); + if (!BufOrErr) + return BufOrErr.takeError(); + IS.FrameBufferReadback = std::static_pointer_cast(*BufOrErr); + + return llvm::Error::success(); + } + + llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) { + auto TexOrErr = offloadtest::createDefaultDepthStencilTarget( + *this, P.Bindings.RTargetBufferPtr->OutputProps.Width, + P.Bindings.RTargetBufferPtr->OutputProps.Height); + if (!TexOrErr) + return TexOrErr.takeError(); + IS.DepthStencil = std::static_pointer_cast(*TexOrErr); + return llvm::Error::success(); + } + llvm::Error createGraphicsCommands(Pipeline &P, InvocationState &IS) { IS.CmdBuffer = GraphicsQueue.Queue->commandBuffer(); + if (auto Err = createRenderTarget(P, IS)) + return Err; + // TODO: Always created for graphics pipelines. Consider making this + // conditional on the pipeline definition. + if (auto Err = createDepthStencil(P, IS)) + return Err; + MTL::RenderPassDescriptor *Desc = MTL::RenderPassDescriptor::alloc()->init(); - // Setup the render target texture. - CPUBuffer *RTarget = P.Bindings.RTargetBufferPtr; - - const MTL::PixelFormat Format = - getMTLFormat(RTarget->Format, RTarget->Channels); - - const uint64_t Width = RTarget->OutputProps.Width; - const uint64_t Height = RTarget->OutputProps.Height; - MTL::TextureDescriptor *TDesc = MTL::TextureDescriptor::texture2DDescriptor( - Format, Width, Height, false); - // Create a shared texture used for both rendering and CPU readback. - MTL::TextureDescriptor *SharedDesc = TDesc->copy(); - SharedDesc->setUsage(MTL::TextureUsageRenderTarget | - MTL::TextureUsageShaderRead | - MTL::TextureUsageShaderWrite); - SharedDesc->setStorageMode(MTL::StorageModeShared); - IS.FrameBufferTexture = Device->newTexture(SharedDesc); + const uint64_t Width = P.Bindings.RTargetBufferPtr->OutputProps.Width; + const uint64_t Height = P.Bindings.RTargetBufferPtr->OutputProps.Height; + // Color attachment. auto *CADesc = MTL::RenderPassColorAttachmentDescriptor::alloc()->init(); - CADesc->setTexture(IS.FrameBufferTexture); + CADesc->setTexture(IS.FrameBufferTexture->Tex); CADesc->setLoadAction(MTL::LoadActionClear); - CADesc->setClearColor(MTL::ClearColor()); + const auto *ColorCV = std::get_if( + &*IS.FrameBufferTexture->Desc.OptimizedClearValue); + if (!ColorCV) + return llvm::createStringError( + std::errc::invalid_argument, + "Render target clear value must be a ClearColor."); + + CADesc->setClearColor( + MTL::ClearColor(ColorCV->R, ColorCV->G, ColorCV->B, ColorCV->A)); CADesc->setStoreAction(MTL::StoreActionStore); Desc->colorAttachments()->setObject(CADesc, 0); + // Depth/stencil attachment. + const auto *DepthCV = std::get_if( + &*IS.DepthStencil->Desc.OptimizedClearValue); + if (!DepthCV) + return llvm::createStringError( + std::errc::invalid_argument, + "Depth/stencil clear value must be a ClearDepthStencil."); + + auto *DADesc = Desc->depthAttachment(); + DADesc->setTexture(IS.DepthStencil->Tex); + DADesc->setLoadAction(MTL::LoadActionClear); + DADesc->setClearDepth(DepthCV->Depth); + DADesc->setStoreAction(MTL::StoreActionDontCare); + + auto *SADesc = Desc->stencilAttachment(); + SADesc->setTexture(IS.DepthStencil->Tex); + SADesc->setLoadAction(MTL::LoadActionClear); + SADesc->setClearStencil(DepthCV->Stencil); + SADesc->setStoreAction(MTL::StoreActionDontCare); + MTL::RenderCommandEncoder *CmdEncoder = IS.CmdBuffer->renderCommandEncoder(Desc); CmdEncoder->setRenderPipelineState(IS.RenderPipeline); + + // Configure depth stencil state: depth test enabled, write all, less. + MTL::DepthStencilDescriptor *DSDesc = + MTL::DepthStencilDescriptor::alloc()->init(); + DSDesc->setDepthCompareFunction(MTL::CompareFunctionLess); + DSDesc->setDepthWriteEnabled(true); + MTL::DepthStencilState *DSState = Device->newDepthStencilState(DSDesc); + CmdEncoder->setDepthStencilState(DSState); + DSDesc->release(); + DSState->release(); + // Explicitly set viewport to texture dimensions. CmdEncoder->setViewport( MTL::Viewport{0.0, 0.0, (double)Width, (double)Height, 0.0, 1.0}); @@ -484,6 +574,15 @@ class MTLDevice : public offloadtest::Device { CmdEncoder->endEncoding(); + // Blit the render target into the readback buffer for CPU access. + MTL::BlitCommandEncoder *Blit = IS.CmdBuffer->blitCommandEncoder(); + const size_t ElemSize = RTarget->getElementSize(); + const size_t RowBytes = Width * ElemSize; + Blit->copyFromTexture(IS.FrameBufferTexture->Tex, 0, 0, + MTL::Origin(0, 0, 0), MTL::Size(Width, Height, 1), + IS.FrameBufferReadback->Buf, 0, RowBytes, 0); + Blit->endEncoding(); + return llvm::Error::success(); } @@ -500,8 +599,8 @@ class MTLDevice : public offloadtest::Device { } llvm::Error copyBack(Pipeline &P, InvocationState &IS) { - uint32_t TextureIndex = 0; - uint32_t BufferIndex = 0; + const uint32_t TextureIndex = 0; + const uint32_t BufferIndex = 0; for (auto &D : P.Sets) { for (auto &R : D.Resources) { assert(R.BufferPtr->ArraySize == 1 && @@ -534,16 +633,15 @@ class MTLDevice : public offloadtest::Device { const size_t ElemSize = RTarget->getElementSize(); const size_t RowBytes = Width * ElemSize; - // Read the framebuffer one row at a time into the output buffer. - // Read rows from the texture bottom-to-top into the buffer top-to-bottom - // so the final image is upright. + // Read from the readback buffer. The blit copied the texture data in + // GPU layout order, so we flip rows here to produce an upright image. + const unsigned char *Src = reinterpret_cast( + IS.FrameBufferReadback->Buf->contents()); unsigned char *Buf = reinterpret_cast(RTarget->Data[0].get()); for (uint64_t R = 0; R < Height; ++R) { - const uint32_t SrcRow = (uint32_t)((Height - 1) - R); - unsigned char *Dst = Buf + R * RowBytes; - IS.FrameBufferTexture->getBytes( - Dst, RowBytes, MTL::Region(0, SrcRow, (uint32_t)Width, 1), 0); + const uint64_t SrcRow = (Height - 1) - R; + memcpy(Buf + R * RowBytes, Src + SrcRow * RowBytes, RowBytes); } } return llvm::Error::success(); @@ -568,24 +666,33 @@ class MTLDevice : public offloadtest::Device { llvm::Expected> createBuffer(std::string Name, BufferCreateDesc &Desc, size_t SizeInBytes) override { - MTL::ResourceOptions StorageMode; - switch (Desc.Location) { - case MemoryLocation::GpuOnly: - StorageMode = MTL::ResourceStorageModePrivate; - break; - case MemoryLocation::CpuToGpu: - case MemoryLocation::GpuToCpu: - StorageMode = MTL::ResourceStorageModeManaged; - break; - } - - MTL::Buffer *Buf = Device->newBuffer(SizeInBytes, StorageMode); + MTL::Buffer *Buf = Device->newBuffer( + SizeInBytes, getMetalBufferResourceOptions(Desc.Location)); if (!Buf) return llvm::createStringError(std::errc::not_enough_memory, "Failed to create Metal buffer."); return std::make_shared(Buf, Name, Desc, SizeInBytes); } + llvm::Expected> + createTexture(std::string Name, TextureCreateDesc &Desc) override { + if (auto Err = validateTextureCreateDesc(Desc)) + return Err; + + MTL::TextureDescriptor *TDesc = MTL::TextureDescriptor::texture2DDescriptor( + getMetalPixelFormat(Desc.Format), Desc.Width, Desc.Height, + Desc.MipLevels > 1); + TDesc->setMipmapLevelCount(Desc.MipLevels); + TDesc->setStorageMode(getMetalTextureStorageMode(Desc.Location)); + TDesc->setUsage(getMetalTextureUsage(Desc.Usage)); + + MTL::Texture *Tex = Device->newTexture(TDesc); + if (!Tex) + return llvm::createStringError(std::errc::not_enough_memory, + "Failed to create Metal texture."); + return std::make_shared(Tex, Name, Desc); + } + llvm::Error executeProgram(Pipeline &P) override { InvocationState IS; diff --git a/lib/API/MTL/MTLResources.h b/lib/API/MTL/MTLResources.h new file mode 100644 index 000000000..68d529ee3 --- /dev/null +++ b/lib/API/MTL/MTLResources.h @@ -0,0 +1,152 @@ +//===- MTLResources.h - Metal Resource Helpers ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOADTEST_API_MTLRESOURCES_H +#define OFFLOADTEST_API_MTLRESOURCES_H + +#include "API/Device.h" + +#include "Metal/Metal.hpp" + +namespace offloadtest { + +// Metal requires different storage modes for textures and buffers. +// Textures use Managed for GpuToCpu because Shared textures are not available +// on discrete GPUs and lack hardware compression/tiling optimizations. +// Buffers use Shared for GpuToCpu because they are linear memory with no +// layout concerns, and Shared avoids the explicit synchronizeResource step +// that Managed requires. +inline MTL::StorageMode getMetalTextureStorageMode(MemoryLocation Location) { + switch (Location) { + case MemoryLocation::GpuOnly: + return MTL::StorageModePrivate; + case MemoryLocation::CpuToGpu: + case MemoryLocation::GpuToCpu: + return MTL::StorageModeManaged; + } + llvm_unreachable("All MemoryLocation cases handled"); +} + +inline MTL::ResourceOptions +getMetalBufferResourceOptions(MemoryLocation Location) { + switch (Location) { + case MemoryLocation::GpuOnly: + return MTL::ResourceStorageModePrivate; + case MemoryLocation::CpuToGpu: + return MTL::ResourceStorageModeManaged; + case MemoryLocation::GpuToCpu: + return MTL::ResourceStorageModeShared; + } + llvm_unreachable("All MemoryLocation cases handled"); +} + +inline MTL::PixelFormat getMetalPixelFormat(Format Format) { + switch (Format) { + case Format::R16Sint: + return MTL::PixelFormatR16Sint; + case Format::R16Uint: + return MTL::PixelFormatR16Uint; + case Format::RG16Sint: + return MTL::PixelFormatRG16Sint; + case Format::RG16Uint: + return MTL::PixelFormatRG16Uint; + case Format::RGBA16Sint: + return MTL::PixelFormatRGBA16Sint; + case Format::RGBA16Uint: + return MTL::PixelFormatRGBA16Uint; + case Format::R32Sint: + return MTL::PixelFormatR32Sint; + case Format::R32Uint: + return MTL::PixelFormatR32Uint; + case Format::R32Float: + return MTL::PixelFormatR32Float; + case Format::RG32Sint: + return MTL::PixelFormatRG32Sint; + case Format::RG32Uint: + return MTL::PixelFormatRG32Uint; + case Format::RG32Float: + return MTL::PixelFormatRG32Float; + // Metal has no 3-component pixel format. + // RGB32Float is only valid as a vertex format. + case Format::RGB32Float: + llvm_unreachable("RGB32Float has no Metal pixel format equivalent"); + case Format::RGBA32Sint: + return MTL::PixelFormatRGBA32Sint; + case Format::RGBA32Uint: + return MTL::PixelFormatRGBA32Uint; + case Format::RGBA32Float: + return MTL::PixelFormatRGBA32Float; + case Format::D32Float: + return MTL::PixelFormatDepth32Float; + case Format::D32FloatS8Uint: + return MTL::PixelFormatDepth32Float_Stencil8; + } + llvm_unreachable("All Format cases handled"); +} + +inline MTL::TextureUsage getMetalTextureUsage(TextureUsage Usage) { + MTL::TextureUsage Flags = MTL::TextureUsageUnknown; + if ((Usage & Sampled) != 0) + Flags |= MTL::TextureUsageShaderRead; + if ((Usage & Storage) != 0) + Flags |= MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite; + if ((Usage & RenderTarget) != 0) + Flags |= MTL::TextureUsageRenderTarget; + if ((Usage & DepthStencil) != 0) + Flags |= MTL::TextureUsageRenderTarget; + return Flags; +} + +inline MTL::VertexFormat getMetalVertexFormat(Format Fmt) { + switch (Fmt) { + case Format::R16Sint: + return MTL::VertexFormatShort; + case Format::R16Uint: + return MTL::VertexFormatUShort; + case Format::RG16Sint: + return MTL::VertexFormatShort2; + case Format::RG16Uint: + return MTL::VertexFormatUShort2; + case Format::RGBA16Sint: + return MTL::VertexFormatShort4; + case Format::RGBA16Uint: + return MTL::VertexFormatUShort4; + case Format::R32Sint: + return MTL::VertexFormatInt; + case Format::R32Uint: + return MTL::VertexFormatUInt; + case Format::R32Float: + return MTL::VertexFormatFloat; + case Format::RG32Sint: + return MTL::VertexFormatInt2; + case Format::RG32Uint: + return MTL::VertexFormatUInt2; + case Format::RG32Float: + return MTL::VertexFormatFloat2; + case Format::RGB32Float: + return MTL::VertexFormatFloat3; + case Format::RGBA32Sint: + return MTL::VertexFormatInt4; + case Format::RGBA32Uint: + return MTL::VertexFormatUInt4; + case Format::RGBA32Float: + return MTL::VertexFormatFloat4; + // Depth formats cannot be used as vertex attributes. + case Format::D32Float: + case Format::D32FloatS8Uint: + llvm_unreachable("Depth formats are not valid vertex formats"); + } + llvm_unreachable("All Format cases handled"); +} + +} // namespace offloadtest + +#endif // OFFLOADTEST_API_MTLRESOURCES_H diff --git a/lib/API/VK/Device.cpp b/lib/API/VK/Device.cpp index 913000253..6e0f14dd7 100644 --- a/lib/API/VK/Device.cpp +++ b/lib/API/VK/Device.cpp @@ -11,6 +11,7 @@ #include "API/Device.h" #include "Support/Pipeline.h" +#include "VKResources.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Error.h" @@ -391,6 +392,32 @@ class VulkanBuffer : public offloadtest::Buffer { } }; +class VulkanTexture : public offloadtest::Texture { +public: + VkDevice Dev; + VkImage Image; + VkDeviceMemory Memory; + // TODO: + // RenderTarget and DepthStencil views are created at texture creation time. + // Ideally Sampled/Storage image views would also live here, but they are + // currently created during descriptor set setup, which determines their + // binding layout. + VkImageView View = VK_NULL_HANDLE; + std::string Name; + TextureCreateDesc Desc; + + VulkanTexture(VkDevice Dev, VkImage Image, VkDeviceMemory Memory, + llvm::StringRef Name, TextureCreateDesc Desc) + : Dev(Dev), Image(Image), Memory(Memory), Name(Name), Desc(Desc) {} + + ~VulkanTexture() override { + if (View) + vkDestroyImageView(Dev, View, nullptr); + vkDestroyImage(Dev, Image, nullptr); + vkFreeMemory(Dev, Memory, nullptr); + } +}; + class VulkanQueue : public offloadtest::Queue { public: VkQueue Queue = VK_NULL_HANDLE; @@ -489,9 +516,9 @@ class VulkanDevice : public offloadtest::Device { // FrameBuffer associated data for offscreen rendering. VkFramebuffer FrameBuffer = VK_NULL_HANDLE; - ResourceBundle FrameBufferResource = {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 0, - nullptr}; - ImageRef DepthStencil = {0, 0, 0}; + std::shared_ptr RenderTarget; + std::shared_ptr RTReadback; + std::shared_ptr DepthStencil; std::optional VertexBuffer = std::nullopt; VkRenderPass RenderPass = VK_NULL_HANDLE; @@ -652,21 +679,6 @@ class VulkanDevice : public offloadtest::Device { llvm::Expected> createBuffer(std::string Name, BufferCreateDesc &Desc, size_t SizeInBytes) override { - VkMemoryPropertyFlags MemFlags = 0; - switch (Desc.Location) { - case MemoryLocation::GpuOnly: - MemFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - break; - case MemoryLocation::CpuToGpu: - MemFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - break; - case MemoryLocation::GpuToCpu: - MemFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - break; - } - VkBufferCreateInfo BufInfo = {}; BufInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; BufInfo.size = SizeInBytes; @@ -686,8 +698,8 @@ class VulkanDevice : public offloadtest::Device { VkMemoryAllocateInfo AllocInfo = {}; AllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; AllocInfo.allocationSize = MemReqs.size; - auto MemIdx = - getMemoryIndex(PhysicalDevice, MemReqs.memoryTypeBits, MemFlags); + auto MemIdx = getMemoryIndex(PhysicalDevice, MemReqs.memoryTypeBits, + getVulkanMemoryFlags(Desc.Location)); if (!MemIdx) return MemIdx.takeError(); AllocInfo.memoryTypeIndex = *MemIdx; @@ -704,6 +716,89 @@ class VulkanDevice : public offloadtest::Device { Name, Desc, SizeInBytes); } + llvm::Expected> + createTexture(std::string Name, TextureCreateDesc &Desc) override { + if (auto Err = validateTextureCreateDesc(Desc)) + return Err; + + VkImageCreateInfo ImageInfo = {}; + ImageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + ImageInfo.imageType = VK_IMAGE_TYPE_2D; + ImageInfo.format = getVulkanFormat(Desc.Format); + ImageInfo.extent = {Desc.Width, Desc.Height, 1}; + ImageInfo.mipLevels = Desc.MipLevels; + ImageInfo.arrayLayers = 1; + ImageInfo.samples = VK_SAMPLE_COUNT_1_BIT; + ImageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + ImageInfo.usage = getVulkanImageUsage(Desc.Usage); + ImageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + ImageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + VkImage Image; + if (vkCreateImage(Device, &ImageInfo, nullptr, &Image)) + return llvm::createStringError(std::errc::io_error, + "Failed to create image."); + + VkMemoryRequirements MemReqs; + vkGetImageMemoryRequirements(Device, Image, &MemReqs); + + VkMemoryAllocateInfo AllocInfo = {}; + AllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + AllocInfo.allocationSize = MemReqs.size; + auto MemIdx = getMemoryIndex(PhysicalDevice, MemReqs.memoryTypeBits, + getVulkanMemoryFlags(Desc.Location)); + if (!MemIdx) { + vkDestroyImage(Device, Image, nullptr); + return MemIdx.takeError(); + } + AllocInfo.memoryTypeIndex = *MemIdx; + + VkDeviceMemory DeviceMemory; + if (vkAllocateMemory(Device, &AllocInfo, nullptr, &DeviceMemory)) { + vkDestroyImage(Device, Image, nullptr); + return llvm::createStringError(std::errc::not_enough_memory, + "Failed to allocate image memory."); + } + if (vkBindImageMemory(Device, Image, DeviceMemory, 0)) { + vkDestroyImage(Device, Image, nullptr); + vkFreeMemory(Device, DeviceMemory, nullptr); + return llvm::createStringError(std::errc::io_error, + "Failed to bind image memory."); + } + + auto Tex = std::make_shared(Device, Image, DeviceMemory, + Name, Desc); + + const bool IsRT = (Desc.Usage & TextureUsage::RenderTarget) != 0; + const bool IsDS = (Desc.Usage & TextureUsage::DepthStencil) != 0; + if (IsRT || IsDS) { + VkImageViewCreateInfo ViewCi = {}; + ViewCi.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + ViewCi.viewType = VK_IMAGE_VIEW_TYPE_2D; + ViewCi.format = getVulkanFormat(Desc.Format); + ViewCi.subresourceRange.baseMipLevel = 0; + ViewCi.subresourceRange.levelCount = 1; + ViewCi.subresourceRange.baseArrayLayer = 0; + ViewCi.subresourceRange.layerCount = 1; + ViewCi.image = Image; + if (IsRT) { + ViewCi.components = {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, + VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A}; + ViewCi.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + } else { + ViewCi.subresourceRange.aspectMask = + VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + } + if (vkCreateImageView(Device, &ViewCi, nullptr, &Tex->View)) { + // Tex destructor will clean up Image + Memory. + return llvm::createStringError(std::errc::device_or_resource_busy, + "Failed to create image view."); + } + } + + return Tex; + } + const Capabilities &getCapabilities() override { if (Caps.empty()) queryCapabilities(); @@ -1035,47 +1130,37 @@ class VulkanDevice : public offloadtest::Device { return llvm::Error::success(); } - llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) { - // Create an optimal image used as the depth stencil attachment - VkImageCreateInfo ImageCi = {}; - ImageCi.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - ImageCi.imageType = getVKImageType(ResourceKind::Texture2D); - ImageCi.format = VK_FORMAT_D32_SFLOAT_S8_UINT; - // Use example's height and width - ImageCi.extent = { - static_cast(P.Bindings.RTargetBufferPtr->OutputProps.Width), - static_cast(P.Bindings.RTargetBufferPtr->OutputProps.Height), - 1}; - ImageCi.mipLevels = 1; - ImageCi.arrayLayers = 1; - ImageCi.samples = VK_SAMPLE_COUNT_1_BIT; - ImageCi.tiling = VK_IMAGE_TILING_OPTIMAL; - ImageCi.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; - ImageCi.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - if (vkCreateImage(Device, &ImageCi, nullptr, &IS.DepthStencil.Image)) - return llvm::createStringError(std::errc::device_or_resource_busy, - "Depth stencil creation failed."); + llvm::Error createRenderTarget(Pipeline &P, InvocationState &IS) { + if (!P.Bindings.RTargetBufferPtr) + return llvm::createStringError( + std::errc::invalid_argument, + "No render target bound for graphics pipeline."); + const CPUBuffer &RTBuf = *P.Bindings.RTargetBufferPtr; - // Allocate memory for the image (device local) and bind it to our image - VkMemoryAllocateInfo MemAlloc{}; - MemAlloc.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - VkMemoryRequirements MemReqs; - vkGetImageMemoryRequirements(Device, IS.DepthStencil.Image, &MemReqs); - MemAlloc.allocationSize = MemReqs.size; - llvm::Expected MemIdx = - getMemoryIndex(PhysicalDevice, MemReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - if (!MemIdx) - return MemIdx.takeError(); + auto TexOrErr = offloadtest::createRenderTargetFromCPUBuffer(*this, RTBuf); + if (!TexOrErr) + return TexOrErr.takeError(); - MemAlloc.memoryTypeIndex = *MemIdx; - if (vkAllocateMemory(Device, &MemAlloc, nullptr, &IS.DepthStencil.Memory)) - return llvm::createStringError(std::errc::not_enough_memory, - "Depth stencil memory allocation failed."); - if (vkBindImageMemory(Device, IS.DepthStencil.Image, IS.DepthStencil.Memory, - 0)) - return llvm::createStringError(std::errc::not_enough_memory, - "Depth stencil memory binding failed."); + IS.RenderTarget = std::static_pointer_cast(*TexOrErr); + + // Create a host-visible staging buffer for readback. + BufferCreateDesc BufDesc = {}; + BufDesc.Location = MemoryLocation::GpuToCpu; + auto BufOrErr = createBuffer("RTReadback", BufDesc, RTBuf.size()); + if (!BufOrErr) + return BufOrErr.takeError(); + IS.RTReadback = std::static_pointer_cast(*BufOrErr); + + return llvm::Error::success(); + } + + llvm::Error createDepthStencil(Pipeline &P, InvocationState &IS) { + auto TexOrErr = offloadtest::createDefaultDepthStencilTarget( + *this, P.Bindings.RTargetBufferPtr->OutputProps.Width, + P.Bindings.RTargetBufferPtr->OutputProps.Height); + if (!TexOrErr) + return TexOrErr.takeError(); + IS.DepthStencil = std::static_pointer_cast(*TexOrErr); return llvm::Error::success(); } @@ -1088,36 +1173,10 @@ class VulkanDevice : public offloadtest::Device { } if (P.isGraphics()) { - if (!P.Bindings.RTargetBufferPtr) - return llvm::createStringError( - std::errc::invalid_argument, - "No RenderTarget buffer specified for graphics pipeline."); - Resource FrameBuffer = {ResourceKind::Texture2D, - "RenderTarget", - {}, - {}, - P.Bindings.RTargetBufferPtr, - nullptr, - false, - std::nullopt, - false}; - IS.FrameBufferResource.Size = P.Bindings.RTargetBufferPtr->size(); - IS.FrameBufferResource.BufferPtr = P.Bindings.RTargetBufferPtr; - IS.FrameBufferResource.ImageLayout = - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - auto ExHostBuf = createBuffer( - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, FrameBuffer.size(), - FrameBuffer.BufferPtr->Data[0].get()); - if (!ExHostBuf) - return ExHostBuf.takeError(); - auto ExImageRef = createImage(FrameBuffer, *ExHostBuf, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT | - VK_IMAGE_USAGE_TRANSFER_SRC_BIT); - if (!ExImageRef) - return ExImageRef.takeError(); - IS.FrameBufferResource.ResourceRefs.push_back(*ExImageRef); + if (auto Err = createRenderTarget(P, IS)) + return Err; + // TODO: Always created for graphics pipelines. Consider making this + // conditional on the pipeline definition. if (auto Err = createDepthStencil(P, IS)) return Err; @@ -1485,11 +1544,10 @@ class VulkanDevice : public offloadtest::Device { return llvm::Error::success(); } - llvm::Error createRenderPass(Pipeline &P, InvocationState &IS) { + llvm::Error createRenderPass(InvocationState &IS) { std::array Attachments = {}; - Attachments[0].format = getVKFormat(P.Bindings.RTargetBufferPtr->Format, - P.Bindings.RTargetBufferPtr->Channels); + Attachments[0].format = getVulkanFormat(IS.RenderTarget->Desc.Format); Attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; Attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; Attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE; @@ -1498,7 +1556,7 @@ class VulkanDevice : public offloadtest::Device { Attachments[0].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; Attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - Attachments[1].format = VK_FORMAT_D32_SFLOAT_S8_UINT; + Attachments[1].format = getVulkanFormat(IS.DepthStencil->Desc.Format); Attachments[1].samples = VK_SAMPLE_COUNT_1_BIT; Attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; Attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; @@ -1568,53 +1626,17 @@ class VulkanDevice : public offloadtest::Device { return llvm::Error::success(); } - llvm::Error createFrameBuffer(Pipeline &P, InvocationState &IS) { - std::array Views = {}; - VkImageViewCreateInfo ViewCreateInfo = {}; - ViewCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - ViewCreateInfo.viewType = getImageViewType(ResourceKind::Texture2D); - ViewCreateInfo.format = getVKFormat(P.Bindings.RTargetBufferPtr->Format, - P.Bindings.RTargetBufferPtr->Channels); - ViewCreateInfo.components = {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, - VK_COMPONENT_SWIZZLE_B, - VK_COMPONENT_SWIZZLE_A}; - ViewCreateInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - ViewCreateInfo.subresourceRange.baseMipLevel = 0; - ViewCreateInfo.subresourceRange.baseArrayLayer = 0; - ViewCreateInfo.subresourceRange.layerCount = 1; - ViewCreateInfo.subresourceRange.levelCount = 1; - ViewCreateInfo.image = IS.FrameBufferResource.ResourceRefs[0].Image.Image; - if (vkCreateImageView(Device, &ViewCreateInfo, nullptr, &Views[0])) - return llvm::createStringError( - std::errc::device_or_resource_busy, - "Failed to create frame buffer image view."); - IS.ImageViews.push_back(Views[0]); - - VkImageViewCreateInfo DepthStencilViewCi = {}; - DepthStencilViewCi.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - DepthStencilViewCi.viewType = getImageViewType(ResourceKind::Texture2D); - DepthStencilViewCi.format = VK_FORMAT_D32_SFLOAT_S8_UINT; - DepthStencilViewCi.subresourceRange = {}; - DepthStencilViewCi.subresourceRange.aspectMask = - VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; - DepthStencilViewCi.subresourceRange.baseMipLevel = 0; - DepthStencilViewCi.subresourceRange.levelCount = 1; - DepthStencilViewCi.subresourceRange.baseArrayLayer = 0; - DepthStencilViewCi.subresourceRange.layerCount = 1; - DepthStencilViewCi.image = IS.DepthStencil.Image; - if (vkCreateImageView(Device, &DepthStencilViewCi, nullptr, &Views[1])) - return llvm::createStringError( - std::errc::device_or_resource_busy, - "Failed to create depth stencil image view."); - IS.ImageViews.push_back(Views[1]); + llvm::Error createFrameBuffer(InvocationState &IS) { + std::array Views = {IS.RenderTarget->View, + IS.DepthStencil->View}; VkFramebufferCreateInfo FbufCreateInfo = {}; FbufCreateInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; FbufCreateInfo.renderPass = IS.RenderPass; FbufCreateInfo.attachmentCount = Views.size(); FbufCreateInfo.pAttachments = Views.data(); - FbufCreateInfo.width = P.Bindings.RTargetBufferPtr->OutputProps.Width; - FbufCreateInfo.height = P.Bindings.RTargetBufferPtr->OutputProps.Height; + FbufCreateInfo.width = IS.RenderTarget->Desc.Width; + FbufCreateInfo.height = IS.RenderTarget->Desc.Height; FbufCreateInfo.layers = 1; if (vkCreateFramebuffer(Device, &FbufCreateInfo, nullptr, &IS.FrameBuffer)) @@ -1980,6 +2002,63 @@ class VulkanDevice : public offloadtest::Device { } } + // Record commands to copy a texture into a readback buffer. + void copyTextureToReadback(VkCommandBuffer CmdBuffer, + const VulkanTexture &Tex, + const VulkanBuffer &Readback, + VkImageLayout OldLayout, + VkAccessFlags SrcAccessMask, + VkPipelineStageFlags SrcStageMask) { + const VkImageAspectFlags AspectMask = isDepthFormat(Tex.Desc.Format) + ? VK_IMAGE_ASPECT_DEPTH_BIT + : VK_IMAGE_ASPECT_COLOR_BIT; + + // Transition texture to transfer source. + VkImageSubresourceRange SubRange = {}; + SubRange.aspectMask = AspectMask; + SubRange.baseMipLevel = 0; + SubRange.levelCount = 1; + SubRange.layerCount = 1; + + VkImageMemoryBarrier ImageBarrier = {}; + ImageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + ImageBarrier.subresourceRange = SubRange; + ImageBarrier.srcAccessMask = SrcAccessMask; + ImageBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + ImageBarrier.oldLayout = OldLayout; + ImageBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + ImageBarrier.image = Tex.Image; + vkCmdPipelineBarrier(CmdBuffer, SrcStageMask, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, + nullptr, 1, &ImageBarrier); + + // Copy image to readback buffer. + VkBufferImageCopy Region = {}; + Region.imageSubresource.aspectMask = AspectMask; + Region.imageSubresource.mipLevel = 0; + Region.imageSubresource.baseArrayLayer = 0; + Region.imageSubresource.layerCount = 1; + Region.imageExtent.width = Tex.Desc.Width; + Region.imageExtent.height = Tex.Desc.Height; + Region.imageExtent.depth = 1; + vkCmdCopyImageToBuffer(CmdBuffer, Tex.Image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + Readback.Buffer, 1, &Region); + + // Barrier to make the readback buffer visible to the host. + VkBufferMemoryBarrier BufBarrier = {}; + BufBarrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + BufBarrier.size = VK_WHOLE_SIZE; + BufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + BufBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT; + BufBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + BufBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + BufBarrier.buffer = Readback.Buffer; + vkCmdPipelineBarrier(CmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_HOST_BIT, 0, 0, nullptr, 1, + &BufBarrier, 0, nullptr); + } + void copyResourceDataToHost(InvocationState &IS, ResourceBundle &R) { if (!R.isReadWrite()) return; @@ -2103,9 +2182,21 @@ class VulkanDevice : public offloadtest::Device { copyResourceDataToDevice(IS, R); if (P.isGraphics()) { + const auto *ColorCV = + std::get_if(&*IS.RenderTarget->Desc.OptimizedClearValue); + if (!ColorCV) + return llvm::createStringError( + std::errc::invalid_argument, + "Render target clear value must be a ClearColor."); + const auto *DepthCV = std::get_if( + &*IS.DepthStencil->Desc.OptimizedClearValue); + if (!DepthCV) + return llvm::createStringError( + std::errc::invalid_argument, + "Depth/stencil clear value must be a ClearDepthStencil."); VkClearValue ClearValues[2] = {}; - ClearValues[0].color = {{0.0f, 0.0f, 0.0f, 0.0f}}; - ClearValues[1].depthStencil = {1.0f, 0}; + ClearValues[0].color = {{ColorCV->R, ColorCV->G, ColorCV->B, ColorCV->A}}; + ClearValues[1].depthStencil = {DepthCV->Depth, DepthCV->Stencil}; VkRenderPassBeginInfo RenderPassBeginInfo = {}; RenderPassBeginInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; @@ -2172,7 +2263,10 @@ class VulkanDevice : public offloadtest::Device { vkCmdDraw(IS.CmdBuffer, P.Bindings.getVertexCount(), 1, 0, 0); llvm::outs() << "Drew " << P.Bindings.getVertexCount() << " vertices.\n"; vkCmdEndRenderPass(IS.CmdBuffer); - copyResourceDataToHost(IS, IS.FrameBufferResource); + copyTextureToReadback(IS.CmdBuffer, *IS.RenderTarget, *IS.RTReadback, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); } for (auto &R : IS.Resources) @@ -2227,17 +2321,15 @@ class VulkanDevice : public offloadtest::Device { Range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; Range.offset = 0; Range.size = VK_WHOLE_SIZE; - const ResourceRef &ResRef = IS.FrameBufferResource.ResourceRefs[0]; + Range.memory = IS.RTReadback->Memory; void *Mapped = nullptr; // NOLINT(misc-const-correctness) - vkMapMemory(Device, ResRef.Host.Memory, 0, VK_WHOLE_SIZE, 0, &Mapped); - - Range.memory = ResRef.Host.Memory; + vkMapMemory(Device, IS.RTReadback->Memory, 0, VK_WHOLE_SIZE, 0, &Mapped); vkInvalidateMappedMemoryRanges(Device, 1, &Range); const CPUBuffer &B = *P.Bindings.RTargetBufferPtr; memcpy(B.Data[0].get(), Mapped, B.size()); - vkUnmapMemory(Device, ResRef.Host.Memory); + vkUnmapMemory(Device, IS.RTReadback->Memory); } return llvm::Error::success(); } @@ -2285,15 +2377,6 @@ class VulkanDevice : public offloadtest::Device { vkDestroyBuffer(Device, IS.VertexBuffer->Host.Buffer, nullptr); vkFreeMemory(Device, IS.VertexBuffer->Host.Memory, nullptr); } - for (auto &ResRef : IS.FrameBufferResource.ResourceRefs) { - // We know the device resource is an image, so no need to check it. - vkDestroyImage(Device, ResRef.Image.Image, nullptr); - vkFreeMemory(Device, ResRef.Image.Memory, nullptr); - vkDestroyBuffer(Device, ResRef.Host.Buffer, nullptr); - vkFreeMemory(Device, ResRef.Host.Memory, nullptr); - } - vkDestroyImage(Device, IS.DepthStencil.Image, nullptr); - vkFreeMemory(Device, IS.DepthStencil.Memory, nullptr); vkDestroyFramebuffer(Device, IS.FrameBuffer, nullptr); vkDestroyRenderPass(Device, IS.RenderPass, nullptr); } @@ -2339,10 +2422,10 @@ class VulkanDevice : public offloadtest::Device { if (auto Err = createResources(P, State)) return Err; if (P.isGraphics()) { - if (auto Err = createRenderPass(P, State)) + if (auto Err = createRenderPass(State)) return Err; llvm::outs() << "Render pass created.\n"; - if (auto Err = createFrameBuffer(P, State)) + if (auto Err = createFrameBuffer(State)) return Err; llvm::outs() << "Frame buffer created.\n"; } diff --git a/lib/API/VK/VKResources.h b/lib/API/VK/VKResources.h new file mode 100644 index 000000000..9f39ab24c --- /dev/null +++ b/lib/API/VK/VKResources.h @@ -0,0 +1,93 @@ +//===- VKResources.h - Vulkan Resource Helpers ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef OFFLOADTEST_API_VKRESOURCES_H +#define OFFLOADTEST_API_VKRESOURCES_H + +#include "API/Device.h" + +#include + +namespace offloadtest { + +inline VkMemoryPropertyFlags getVulkanMemoryFlags(MemoryLocation Location) { + switch (Location) { + case MemoryLocation::GpuOnly: + return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + case MemoryLocation::CpuToGpu: + return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + case MemoryLocation::GpuToCpu: + return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + } + llvm_unreachable("All MemoryLocation cases handled"); +} + +inline VkFormat getVulkanFormat(Format Format) { + switch (Format) { + case Format::R16Sint: + return VK_FORMAT_R16_SINT; + case Format::R16Uint: + return VK_FORMAT_R16_UINT; + case Format::RG16Sint: + return VK_FORMAT_R16G16_SINT; + case Format::RG16Uint: + return VK_FORMAT_R16G16_UINT; + case Format::RGBA16Sint: + return VK_FORMAT_R16G16B16A16_SINT; + case Format::RGBA16Uint: + return VK_FORMAT_R16G16B16A16_UINT; + case Format::R32Sint: + return VK_FORMAT_R32_SINT; + case Format::R32Uint: + return VK_FORMAT_R32_UINT; + case Format::R32Float: + return VK_FORMAT_R32_SFLOAT; + case Format::RG32Sint: + return VK_FORMAT_R32G32_SINT; + case Format::RG32Uint: + return VK_FORMAT_R32G32_UINT; + case Format::RG32Float: + return VK_FORMAT_R32G32_SFLOAT; + case Format::RGB32Float: + return VK_FORMAT_R32G32B32_SFLOAT; + case Format::RGBA32Sint: + return VK_FORMAT_R32G32B32A32_SINT; + case Format::RGBA32Uint: + return VK_FORMAT_R32G32B32A32_UINT; + case Format::RGBA32Float: + return VK_FORMAT_R32G32B32A32_SFLOAT; + case Format::D32Float: + return VK_FORMAT_D32_SFLOAT; + case Format::D32FloatS8Uint: + return VK_FORMAT_D32_SFLOAT_S8_UINT; + } + llvm_unreachable("All Format cases handled"); +} + +inline VkImageUsageFlags getVulkanImageUsage(TextureUsage Usage) { + VkImageUsageFlags Flags = + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + if ((Usage & Sampled) != 0) + Flags |= VK_IMAGE_USAGE_SAMPLED_BIT; + if ((Usage & Storage) != 0) + Flags |= VK_IMAGE_USAGE_STORAGE_BIT; + if ((Usage & RenderTarget) != 0) + Flags |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + if ((Usage & DepthStencil) != 0) + Flags |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + return Flags; +} + +} // namespace offloadtest + +#endif // OFFLOADTEST_API_VKRESOURCES_H