Skip to content
Open
Show file tree
Hide file tree
Changes from 41 commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
1148a72
HLSL 6.0 ... 6.9
soufianekhiat Mar 25, 2026
a333169
Missing header
soufianekhiat Mar 26, 2026
8270fda
Fix JIT tests and add support for GPU Textures
soufianekhiat Mar 26, 2026
9c4ddaa
Add tests to validate HLSL 6.x features
soufianekhiat Mar 26, 2026
b880b5c
Fix Clang Tidy.
soufianekhiat Mar 26, 2026
997ccbd
Apply pre-commit auto-fixes
halide-ci[bot] Mar 26, 2026
3ad196f
Add strict_float
soufianekhiat Mar 27, 2026
b650a16
Retrigger GHA
alexreinking Mar 26, 2026
1925554
Apply pre-commit auto-fixes
halide-ci[bot] Mar 26, 2026
1c99661
Appease clang-tidy
alexreinking Mar 27, 2026
2eba11b
Implement strict float intrinsics for D3D12
alexreinking Mar 27, 2026
58223e6
Address PR review: fix DXC codegen bugs
soufianekhiat Mar 27, 2026
2486419
Remove duplicate visit(const Shuffle *op)
soufianekhiat Mar 28, 2026
37f4d92
Fix uint16 cast errors
soufianekhiat Mar 30, 2026
0096e32
Fix CL part 2
soufianekhiat Mar 30, 2026
7f1eb3f
Fix CI Part 3
soufianekhiat Mar 31, 2026
768b61a
CI++
soufianekhiat Mar 31, 2026
7ed2dc6
CI
soufianekhiat Mar 31, 2026
bc85661
shorter name for cross compilation and fix GUID
soufianekhiat Apr 1, 2026
8f17c7d
Fix comment and mismatch naming
soufianekhiat Apr 1, 2026
1820ba3
Too agressive for replace_all
soufianekhiat Apr 1, 2026
90168c0
Rename + fix warning of implicit cast
soufianekhiat Apr 1, 2026
331f7ce
rename
soufianekhiat Apr 1, 2026
16efab1
Fix for correctness_mul_div_mod
soufianekhiat Apr 1, 2026
463c8c6
skip 64 bits buffer for: correctness_{gpu_mixed_shared_mem_types, mat…
soufianekhiat Apr 1, 2026
fcd4238
Apply pre-commit auto-fixes
halide-ci[bot] Apr 2, 2026
0670b21
Update src/runtime/d3d12compute.cpp
soufianekhiat Apr 3, 2026
8c1e04d
Update src/runtime/d3d12compute.cpp
soufianekhiat Apr 3, 2026
ed63a89
minor change
soufianekhiat Apr 3, 2026
c3fdaef
Missing header
soufianekhiat Apr 3, 2026
b63ad28
Compilation
soufianekhiat Apr 3, 2026
1d93874
Use manual scan since strrchr is not available in the runtime
soufianekhiat Apr 3, 2026
dfb3d45
Update src/runtime/d3d12compute.cpp
soufianekhiat Apr 3, 2026
0bbcfba
Update src/runtime/d3d12compute.cpp
soufianekhiat Apr 3, 2026
b075297
Update src/runtime/d3d12compute.cpp
soufianekhiat Apr 3, 2026
8566b7c
Update src/runtime/d3d12compute.cpp
soufianekhiat Apr 3, 2026
fb2e281
Remove logic and unsigned int
soufianekhiat Apr 3, 2026
3f2f7e4
Merge branch 'sk/hlsl_6' of https://github.com/soufianekhiat/Halide i…
soufianekhiat Apr 3, 2026
5ca6f2a
Guard for OSX
soufianekhiat Apr 3, 2026
7563f63
Apply pre-commit auto-fixes
halide-ci[bot] Apr 3, 2026
ee664f2
Update src/runtime/d3d12compute.cpp
soufianekhiat Apr 3, 2026
da3ed77
Iterations
soufianekhiat Apr 4, 2026
dbbdf5f
Fix generation and compilation
soufianekhiat Apr 5, 2026
3f16a7f
Comment clarifications
soufianekhiat Apr 5, 2026
5561e48
Branch for wrap_buffer
soufianekhiat Apr 9, 2026
c3910b3
Some improvements
soufianekhiat Apr 9, 2026
83bee09
Add u?int64_t and double support
soufianekhiat Apr 9, 2026
8f85256
Apply pre-commit auto-fixes
halide-ci[bot] Apr 14, 2026
4cb42d9
Tidy fixes
soufianekhiat May 2, 2026
ab3f8d4
release COM after error
soufianekhiat May 2, 2026
f82a3b9
Theoritical support for cs_6_10 or cs_10_0
soufianekhiat May 2, 2026
617f7e4
Bulk changes from @slomp suggestions
soufianekhiat May 3, 2026
2ab0294
Use GetCopyableFootprints for texture upload/readback layout
soufianekhiat May 3, 2026
e8f9c0c
Apply pre-commit auto-fixes
halide-ci[bot] May 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions python_bindings/src/halide/halide_/PyEnums.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,16 @@ void define_enums(py::module &m) {
.value("AVX10_1", Target::Feature::AVX10_1)
.value("X86APX", Target::Feature::X86APX)
.value("Simulator", Target::Feature::Simulator)
.value("HLSL_SM60", Target::Feature::HLSL_SM60)
.value("HLSL_SM61", Target::Feature::HLSL_SM61)
.value("HLSL_SM62", Target::Feature::HLSL_SM62)
.value("HLSL_SM63", Target::Feature::HLSL_SM63)
.value("HLSL_SM64", Target::Feature::HLSL_SM64)
.value("HLSL_SM65", Target::Feature::HLSL_SM65)
.value("HLSL_SM66", Target::Feature::HLSL_SM66)
.value("HLSL_SM67", Target::Feature::HLSL_SM67)
.value("HLSL_SM68", Target::Feature::HLSL_SM68)
.value("HLSL_SM69", Target::Feature::HLSL_SM69)
.value("FeatureEnd", Target::Feature::FeatureEnd);

py::enum_<halide_type_code_t>(m, "TypeCode")
Expand Down
658 changes: 578 additions & 80 deletions src/CodeGen_D3D12Compute_Dev.cpp

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion src/DeviceInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,11 @@ Expr make_device_interface_call(DeviceAPI device_api, MemoryType memory_type) {
interface_name = "halide_hexagon_dma_device_interface";
break;
case DeviceAPI::D3D12Compute:
interface_name = "halide_d3d12compute_device_interface";
if (memory_type == MemoryType::GPUTexture) {
interface_name = "halide_d3d12compute_image_device_interface";
} else {
interface_name = "halide_d3d12compute_device_interface";
}
break;
case DeviceAPI::Vulkan:
interface_name = "halide_vulkan_device_interface";
Expand Down
135 changes: 130 additions & 5 deletions src/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,16 @@ const std::map<std::string, Target::Feature> feature_name_map = {
{"trace_realizations", Target::TraceRealizations},
{"trace_pipeline", Target::TracePipeline},
{"d3d12compute", Target::D3D12Compute},
{"hlsl_sm60", Target::HLSL_SM60},
{"hlsl_sm61", Target::HLSL_SM61},
{"hlsl_sm62", Target::HLSL_SM62},
{"hlsl_sm63", Target::HLSL_SM63},
{"hlsl_sm64", Target::HLSL_SM64},
{"hlsl_sm65", Target::HLSL_SM65},
{"hlsl_sm66", Target::HLSL_SM66},
{"hlsl_sm67", Target::HLSL_SM67},
{"hlsl_sm68", Target::HLSL_SM68},
{"hlsl_sm69", Target::HLSL_SM69},
{"strict_float", Target::StrictFloat},
{"tsan", Target::TSAN},
{"asan", Target::ASAN},
Expand Down Expand Up @@ -1135,6 +1145,22 @@ void Target::validate_features() const {
VSX,
});
}

// D3D12Compute SM version features require D3D12Compute to also be set.
if (!has_feature(D3D12Compute)) {
do_check_bad(*this, {
HLSL_SM60,
HLSL_SM61,
HLSL_SM62,
HLSL_SM63,
HLSL_SM64,
HLSL_SM65,
HLSL_SM66,
HLSL_SM67,
HLSL_SM68,
HLSL_SM69,
});
}
}

Target::Target(const std::string &target) {
Expand Down Expand Up @@ -1378,6 +1404,43 @@ int Target::get_vulkan_capability_lower_bound() const {
return 10;
}

int Target::get_d3d12compute_capability_lower_bound() const {
if (!has_feature(Target::D3D12Compute)) {
return -1;
}
if (has_feature(Target::HLSL_SM60)) {
return 60;
}
if (has_feature(Target::HLSL_SM61)) {
return 61;
}
if (has_feature(Target::HLSL_SM62)) {
return 62;
}
if (has_feature(Target::HLSL_SM63)) {
return 63;
}
if (has_feature(Target::HLSL_SM64)) {
return 64;
}
if (has_feature(Target::HLSL_SM65)) {
return 65;
}
if (has_feature(Target::HLSL_SM66)) {
return 66;
}
if (has_feature(Target::HLSL_SM67)) {
return 67;
}
if (has_feature(Target::HLSL_SM68)) {
return 68;
}
if (has_feature(Target::HLSL_SM69)) {
return 69;
}
return 51; // default: SM 5.1 (FXC)
}

int Target::get_arm_v8_lower_bound() const {
if (has_feature(Target::ARMv8a)) {
return 80;
Expand Down Expand Up @@ -1416,13 +1479,13 @@ bool Target::supports_type(const Type &t) const {
if (t.bits() == 64) {
if (t.is_float()) {
return (!has_feature(Metal) &&
!has_feature(D3D12Compute) &&
(!has_feature(D3D12Compute) || get_d3d12compute_capability_lower_bound() >= 60) &&
(!has_feature(Target::OpenCL) || has_feature(Target::CLDoubles)) &&
(!has_feature(Vulkan) || has_feature(Target::VulkanFloat64)) &&
!has_feature(WebGPU));
} else {
return (!has_feature(Metal) &&
!has_feature(D3D12Compute) &&
(!has_feature(D3D12Compute) || get_d3d12compute_capability_lower_bound() >= 60) &&
(!has_feature(Vulkan) || has_feature(Target::VulkanInt64)) &&
!has_feature(WebGPU));
}
Expand Down Expand Up @@ -1450,9 +1513,18 @@ bool Target::supports_type(const Type &t, DeviceAPI device) const {
return has_feature(Target::CLDoubles);
}
} else if (device == DeviceAPI::D3D12Compute) {
// Shader Model 5.x can optionally support double-precision; 64-bit int
// types are not supported.
return t.bits() < 64;
// SM 5.1 (FXC): no 64-bit types. float16 and int8 work via widening.
// SM 6.0+: 64-bit int and float (double, int64_t, uint64_t) supported.
// SM 6.2+: native 16-bit float (float16_t) and int (int16_t, uint16_t).
// SM 6.6+: native 8-bit int (int8_t, uint8_t). Earlier SMs widen to int32.
// SM 6.9+: long vectors (5–1024 lanes) via vector<T, N> syntax.
if (t.bits() == 64) {
return get_d3d12compute_capability_lower_bound() >= 60;
}
if (t.lanes() > 4) {
return get_d3d12compute_capability_lower_bound() >= 69;
}
return true;
} else if (device == DeviceAPI::Vulkan) {
if (t.is_float() && t.bits() == 64) {
return has_feature(Target::VulkanFloat64);
Expand Down Expand Up @@ -1653,6 +1725,17 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result)
VulkanV12,
VulkanV13,

HLSL_SM60,
HLSL_SM61,
HLSL_SM62,
HLSL_SM63,
HLSL_SM64,
HLSL_SM65,
HLSL_SM66,
HLSL_SM67,
HLSL_SM68,
HLSL_SM69,

ARMv8a,
ARMv81a,
ARMv82a,
Expand Down Expand Up @@ -1787,6 +1870,43 @@ bool Target::get_runtime_compatible_target(const Target &other, Target &result)
output.features.reset(VulkanV13);
}

// Pick tight lower bound for D3D12Compute SM version. Use fall-through to clear redundant features
int d3d12_sm_a = get_d3d12compute_capability_lower_bound();
int d3d12_sm_b = other.get_d3d12compute_capability_lower_bound();

// Same trick as CUDA: -1 (unused) becomes large when cast to unsigned, so min gives the true lower bound.
int d3d12_sm = std::min((unsigned)d3d12_sm_a, (unsigned)d3d12_sm_b);
if (d3d12_sm < 60) {
output.features.reset(HLSL_SM60);
}
if (d3d12_sm < 61) {
output.features.reset(HLSL_SM61);
}
if (d3d12_sm < 62) {
output.features.reset(HLSL_SM62);
}
if (d3d12_sm < 63) {
output.features.reset(HLSL_SM63);
}
if (d3d12_sm < 64) {
output.features.reset(HLSL_SM64);
}
if (d3d12_sm < 65) {
output.features.reset(HLSL_SM65);
}
if (d3d12_sm < 66) {
output.features.reset(HLSL_SM66);
}
if (d3d12_sm < 67) {
output.features.reset(HLSL_SM67);
}
if (d3d12_sm < 68) {
output.features.reset(HLSL_SM68);
}
if (d3d12_sm < 69) {
output.features.reset(HLSL_SM69);
}

// Pick tight lower bound for HVX version. Use fall-through to clear redundant features
int hvx_a = get_hvx_lower_bound(*this);
int hvx_b = get_hvx_lower_bound(other);
Expand Down Expand Up @@ -1874,6 +1994,11 @@ void target_test() {
{{"hexagon-32-qurt-hvx_v62", "hexagon-32-qurt", "hexagon-32-qurt"}},
{{"hexagon-32-qurt-hvx_v62-hvx", "hexagon-32-qurt", ""}},
{{"hexagon-32-qurt-hvx_v62-hvx", "hexagon-32-qurt-hvx", "hexagon-32-qurt-hvx"}},
{{"x86-64-windows-d3d12compute-hlsl_sm66", "x86-64-windows-d3d12compute", "x86-64-windows-d3d12compute"}},
{{"x86-64-windows-d3d12compute-hlsl_sm66", "x86-64-windows-d3d12compute-hlsl_sm60", "x86-64-windows-d3d12compute-hlsl_sm60"}},
{{"x86-64-windows-d3d12compute-hlsl_sm62", "x86-64-windows-d3d12compute-hlsl_sm62", "x86-64-windows-d3d12compute-hlsl_sm62"}},
{{"x86-64-windows-d3d12compute-hlsl_sm69", "x86-64-windows-d3d12compute", "x86-64-windows-d3d12compute"}},
{{"x86-64-windows-d3d12compute-hlsl_sm69", "x86-64-windows-d3d12compute-hlsl_sm60", "x86-64-windows-d3d12compute-hlsl_sm60"}},
};

for (const auto &test : gcd_tests) {
Expand Down
15 changes: 15 additions & 0 deletions src/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,16 @@ struct Target {
AVX10_1 = halide_target_feature_avx10_1,
X86APX = halide_target_feature_x86_apx,
Simulator = halide_target_feature_simulator,
HLSL_SM60 = halide_target_feature_hlsl_sm60,
HLSL_SM61 = halide_target_feature_hlsl_sm61,
HLSL_SM62 = halide_target_feature_hlsl_sm62,
HLSL_SM63 = halide_target_feature_hlsl_sm63,
HLSL_SM64 = halide_target_feature_hlsl_sm64,
HLSL_SM65 = halide_target_feature_hlsl_sm65,
HLSL_SM66 = halide_target_feature_hlsl_sm66,
HLSL_SM67 = halide_target_feature_hlsl_sm67,
HLSL_SM68 = halide_target_feature_hlsl_sm68,
HLSL_SM69 = halide_target_feature_hlsl_sm69,
FeatureEnd = halide_target_feature_end
};
Target() = default;
Expand Down Expand Up @@ -349,6 +359,11 @@ struct Target {
* features are set. */
int get_vulkan_capability_lower_bound() const;

/** Get the minimum D3D12Compute Shader Model version as an integer
* (e.g. 60 for SM 6.0, 62 for SM 6.2). Returns 51 (SM 5.1, FXC path)
* if no SM 6.x features are set, or -1 if D3D12Compute is not enabled. */
int get_d3d12compute_capability_lower_bound() const;

/** Get the minimum ARM v8.x capability found as an integer. Returns
* -1 if no ARM v8.x features are set. */
int get_arm_v8_lower_bound() const;
Expand Down
10 changes: 10 additions & 0 deletions src/runtime/HalideRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -1478,6 +1478,16 @@ typedef enum halide_target_feature_t {
halide_target_feature_avx10_1, ///< Intel AVX10 version 1 support. vector_bits is used to indicate width.
halide_target_feature_x86_apx, ///< Intel x86 APX support. Covers initial set of features released as APX: egpr,push2pop2,ppx,ndd .
halide_target_feature_simulator, ///< Target is for a simulator environment. Currently only applies to iOS.
halide_target_feature_hlsl_sm60, ///< Enable D3D12 Shader Model 6.0 (DXIL, 64-bit types, wave intrinsics). Requires d3d12compute. Uses DXC compiler.
halide_target_feature_hlsl_sm61, ///< Enable D3D12 Shader Model 6.1
halide_target_feature_hlsl_sm62, ///< Enable D3D12 Shader Model 6.2 (native 16-bit scalar types with -enable-16bit-types)
halide_target_feature_hlsl_sm63, ///< Enable D3D12 Shader Model 6.3
halide_target_feature_hlsl_sm64, ///< Enable D3D12 Shader Model 6.4
halide_target_feature_hlsl_sm65, ///< Enable D3D12 Shader Model 6.5
halide_target_feature_hlsl_sm66, ///< Enable D3D12 Shader Model 6.6 (64-bit atomics, packed 8-bit types)
halide_target_feature_hlsl_sm67, ///< Enable D3D12 Shader Model 6.7
halide_target_feature_hlsl_sm68, ///< Enable D3D12 Shader Model 6.8
halide_target_feature_hlsl_sm69, ///< Enable D3D12 Shader Model 6.9 (long vectors 5-1024 lanes, native 16-bit/wave/int64 required)
halide_target_feature_end ///< A sentinel. Every target is considered to have this feature, and setting this feature does nothing.
} halide_target_feature_t;

Expand Down
3 changes: 3 additions & 0 deletions src/runtime/HalideRuntimeD3D12Compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@
extern "C" {
#endif

#define HALIDE_RUNTIME_D3D12COMPUTE

/** \file
* Routines specific to the Halide Direct3D 12 Compute runtime.
*/

extern const struct halide_device_interface_t *halide_d3d12compute_device_interface();
extern const struct halide_device_interface_t *halide_d3d12compute_image_device_interface();

/** These are forward declared here to allow clients to override the
* Halide Direct3D 12 Compute runtime. Do not call them. */
Expand Down
Loading