From 3c338b2760121570a2f7bae73535a7180c083171 Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Wed, 11 Mar 2026 16:56:47 -0700 Subject: [PATCH 01/16] OpenCL: Initial backend --- CMakeLists.txt | 34 + README.md | 5 + main.cpp | 787 +++++++++++--------- spirv_cross_c.cpp | 12 + spirv_cross_c.h | 1780 +++++++++++++++++++++++---------------------- spirv_opencl.cpp | 1067 +++++++++++++++++++++++++++ spirv_opencl.hpp | 125 ++++ test_shaders.py | 133 +++- 8 files changed, 2726 insertions(+), 1217 deletions(-) create mode 100644 spirv_opencl.cpp create mode 100644 spirv_opencl.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e5129c8b..d9ac3b141 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,7 @@ option(SPIRV_CROSS_ENABLE_TESTS "Enable SPIRV-Cross tests." ON) option(SPIRV_CROSS_ENABLE_GLSL "Enable GLSL support." ON) option(SPIRV_CROSS_ENABLE_HLSL "Enable HLSL target support." ON) option(SPIRV_CROSS_ENABLE_MSL "Enable MSL target support." ON) +option(SPIRV_CROSS_ENABLE_OPENCL "Enable OpenCL target support." ON) option(SPIRV_CROSS_ENABLE_CPP "Enable C++ target support." ON) option(SPIRV_CROSS_ENABLE_REFLECT "Enable JSON reflection target support." ON) option(SPIRV_CROSS_ENABLE_C_API "Enable C API wrapper support in static library." ON) @@ -242,6 +243,10 @@ set(spirv-cross-msl-sources ${CMAKE_CURRENT_SOURCE_DIR}/spirv_msl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/spirv_msl.hpp) +set(spirv-cross-opencl-sources + ${CMAKE_CURRENT_SOURCE_DIR}/spirv_opencl.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/spirv_opencl.hpp) + set(spirv-cross-hlsl-sources ${CMAKE_CURRENT_SOURCE_DIR}/spirv_hlsl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/spirv_hlsl.hpp) @@ -306,6 +311,16 @@ if (SPIRV_CROSS_STATIC) endif() endif() + if (SPIRV_CROSS_ENABLE_OPENCL) + spirv_cross_add_library(spirv-cross-opencl spirv_cross_opencl STATIC + ${spirv-cross-opencl-sources}) + if (SPIRV_CROSS_ENABLE_GLSL) + target_link_libraries(spirv-cross-opencl PRIVATE spirv-cross-glsl) + else() + message(FATAL_ERROR "Must enable GLSL support to enable OpenCL support.") + endif() + endif() + if (SPIRV_CROSS_ENABLE_HLSL) spirv_cross_add_library(spirv-cross-hlsl spirv_cross_hlsl STATIC ${spirv-cross-hlsl-sources}) @@ -343,6 +358,11 @@ if (SPIRV_CROSS_STATIC) target_compile_definitions(spirv-cross-c PRIVATE SPIRV_CROSS_C_API_MSL=1) endif() + if (SPIRV_CROSS_ENABLE_OPENCL) + target_link_libraries(spirv-cross-c PRIVATE spirv-cross-opencl) + target_compile_definitions(spirv-cross-c PRIVATE SPIRV_CROSS_C_API_OPENCL=1) + endif() + if (SPIRV_CROSS_ENABLE_CPP) target_link_libraries(spirv-cross-c PRIVATE spirv-cross-cpp) target_compile_definitions(spirv-cross-c PRIVATE SPIRV_CROSS_C_API_CPP=1) @@ -393,6 +413,15 @@ if (SPIRV_CROSS_SHARED) target_compile_definitions(spirv-cross-c-shared PRIVATE SPIRV_CROSS_C_API_MSL=1) endif() + if (SPIRV_CROSS_ENABLE_OPENCL) + if (SPIRV_CROSS_ENABLE_GLSL) + target_sources(spirv-cross-c-shared PRIVATE ${spirv-cross-opencl-sources}) + else() + message(FATAL_ERROR "Must enable GLSL support to enable OpenCL support.") + endif() + target_compile_definitions(spirv-cross-c-shared PRIVATE SPIRV_CROSS_C_API_OPENCL=1) + endif() + if (SPIRV_CROSS_ENABLE_CPP) if (SPIRV_CROSS_ENABLE_GLSL) target_sources(spirv-cross-c-shared PRIVATE ${spirv-cross-cpp-sources}) @@ -439,6 +468,10 @@ if (SPIRV_CROSS_CLI) message(FATAL_ERROR "Must enable MSL if building CLI.") endif() + if (NOT SPIRV_CROSS_ENABLE_OPENCL) + message(FATAL_ERROR "Must enable OpenCL if building CLI.") + endif() + if (NOT SPIRV_CROSS_ENABLE_CPP) message(FATAL_ERROR "Must enable C++ if building CLI.") endif() @@ -468,6 +501,7 @@ if (SPIRV_CROSS_CLI) spirv-cross-cpp spirv-cross-reflect spirv-cross-msl + spirv-cross-opencl spirv-cross-util spirv-cross-core) diff --git a/README.md b/README.md index a1aa5511d..9030cd2a7 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ SPIRV-Cross is a tool designed for parsing and converting SPIR-V to other shader - Convert SPIR-V to readable, usable and efficient GLSL - Convert SPIR-V to readable, usable and efficient Metal Shading Language (MSL) - Convert SPIR-V to readable, usable and efficient HLSL + - Convert SPIR-V to readable, usable and efficient OpenCL - Convert SPIR-V to a JSON reflection format - Convert SPIR-V to debuggable C++ [DEPRECATED] - Reflection API to simplify the creation of Vulkan pipeline layouts @@ -546,6 +547,10 @@ To test the roundtrip path GLSL -> SPIR-V -> MSL, `--msl` can be added, e.g. `./ To test the roundtrip path GLSL -> SPIR-V -> HLSL, `--hlsl` can be added, e.g. `./test_shaders.py --hlsl shaders-hlsl`. +### OpenCL backend + +To test the roundtrip path GLSL -> SPIR-V -> OpenCL, `--opencl` can be added, e.g. `./test_shaders.py --opencl shaders-opencl`. + ### Updating regression tests When legitimate changes are found, use `--update` flag to update regression files. diff --git a/main.cpp b/main.cpp index 7dc5404dd..adcfccbdd 100644 --- a/main.cpp +++ b/main.cpp @@ -26,6 +26,7 @@ #include "spirv_glsl.hpp" #include "spirv_hlsl.hpp" #include "spirv_msl.hpp" +#include "spirv_opencl.hpp" #include "spirv_parser.hpp" #include "spirv_reflect.hpp" #include @@ -39,8 +40,8 @@ #include #ifdef _WIN32 -#include #include +#include #endif #ifdef HAVE_SPIRV_CROSS_GIT_VERSION @@ -297,10 +298,17 @@ static void print_resources(const Compiler &compiler, StorageClass storage, auto &type = compiler.get_type(res.value_type_id); switch (type.basetype) { - case SPIRType::Float: basetype = "float"; break; - case SPIRType::Int: basetype = "int"; break; - case SPIRType::UInt: basetype = "uint"; break; - default: break; + case SPIRType::Float: + basetype = "float"; + break; + case SPIRType::Int: + basetype = "int"; + break; + case SPIRType::UInt: + basetype = "uint"; + break; + default: + break; } uint32_t array_size = 0; @@ -326,16 +334,30 @@ static void print_resources(const Compiler &compiler, StorageClass storage, string builtin_str; switch (res.builtin) { - case BuiltInPosition: builtin_str = "Position"; break; - case BuiltInPointSize: builtin_str = "PointSize"; break; - case BuiltInCullDistance: builtin_str = "CullDistance"; break; - case BuiltInClipDistance: builtin_str = "ClipDistance"; break; - case BuiltInTessLevelInner: builtin_str = "TessLevelInner"; break; - case BuiltInTessLevelOuter: builtin_str = "TessLevelOuter"; break; - default: builtin_str = string("builtin #") + to_string(res.builtin); + case BuiltInPosition: + builtin_str = "Position"; + break; + case BuiltInPointSize: + builtin_str = "PointSize"; + break; + case BuiltInCullDistance: + builtin_str = "CullDistance"; + break; + case BuiltInClipDistance: + builtin_str = "ClipDistance"; + break; + case BuiltInTessLevelInner: + builtin_str = "TessLevelInner"; + break; + case BuiltInTessLevelOuter: + builtin_str = "TessLevelOuter"; + break; + default: + builtin_str = string("builtin #") + to_string(res.builtin); } - fprintf(stderr, "Builtin %s (%s) (active: %s).\n", builtin_str.c_str(), type_str.c_str(), active ? "yes" : "no"); + fprintf(stderr, "Builtin %s (%s) (active: %s).\n", builtin_str.c_str(), type_str.c_str(), + active ? "yes" : "no"); } fprintf(stderr, "=============\n\n"); } @@ -465,63 +487,65 @@ static void print_resources(const Compiler &compiler, const ShaderResources &res fprintf(stderr, "\n"); fprintf(stderr, "Execution modes:\n"); - modes.for_each_bit([&](uint32_t i) { - auto mode = static_cast(i); - uint32_t arg0 = compiler.get_execution_mode_argument(mode, 0); - uint32_t arg1 = compiler.get_execution_mode_argument(mode, 1); - uint32_t arg2 = compiler.get_execution_mode_argument(mode, 2); - - switch (static_cast(i)) - { - case ExecutionModeInvocations: - fprintf(stderr, " Invocations: %u\n", arg0); - break; - - case ExecutionModeLocalSize: - fprintf(stderr, " LocalSize: (%u, %u, %u)\n", arg0, arg1, arg2); - break; - - case ExecutionModeOutputVertices: - fprintf(stderr, " OutputVertices: %u\n", arg0); - break; + modes.for_each_bit( + [&](uint32_t i) + { + auto mode = static_cast(i); + uint32_t arg0 = compiler.get_execution_mode_argument(mode, 0); + uint32_t arg1 = compiler.get_execution_mode_argument(mode, 1); + uint32_t arg2 = compiler.get_execution_mode_argument(mode, 2); + + switch (static_cast(i)) + { + case ExecutionModeInvocations: + fprintf(stderr, " Invocations: %u\n", arg0); + break; + + case ExecutionModeLocalSize: + fprintf(stderr, " LocalSize: (%u, %u, %u)\n", arg0, arg1, arg2); + break; + + case ExecutionModeOutputVertices: + fprintf(stderr, " OutputVertices: %u\n", arg0); + break; #define CHECK_MODE(m) \ case ExecutionMode##m: \ fprintf(stderr, " %s\n", #m); \ break - CHECK_MODE(SpacingEqual); - CHECK_MODE(SpacingFractionalEven); - CHECK_MODE(SpacingFractionalOdd); - CHECK_MODE(VertexOrderCw); - CHECK_MODE(VertexOrderCcw); - CHECK_MODE(PixelCenterInteger); - CHECK_MODE(OriginUpperLeft); - CHECK_MODE(OriginLowerLeft); - CHECK_MODE(EarlyFragmentTests); - CHECK_MODE(PointMode); - CHECK_MODE(Xfb); - CHECK_MODE(DepthReplacing); - CHECK_MODE(DepthGreater); - CHECK_MODE(DepthLess); - CHECK_MODE(DepthUnchanged); - CHECK_MODE(LocalSizeHint); - CHECK_MODE(InputPoints); - CHECK_MODE(InputLines); - CHECK_MODE(InputLinesAdjacency); - CHECK_MODE(Triangles); - CHECK_MODE(InputTrianglesAdjacency); - CHECK_MODE(Quads); - CHECK_MODE(Isolines); - CHECK_MODE(OutputPoints); - CHECK_MODE(OutputLineStrip); - CHECK_MODE(OutputTriangleStrip); - CHECK_MODE(VecTypeHint); - CHECK_MODE(ContractionOff); - - default: - break; - } - }); + CHECK_MODE(SpacingEqual); + CHECK_MODE(SpacingFractionalEven); + CHECK_MODE(SpacingFractionalOdd); + CHECK_MODE(VertexOrderCw); + CHECK_MODE(VertexOrderCcw); + CHECK_MODE(PixelCenterInteger); + CHECK_MODE(OriginUpperLeft); + CHECK_MODE(OriginLowerLeft); + CHECK_MODE(EarlyFragmentTests); + CHECK_MODE(PointMode); + CHECK_MODE(Xfb); + CHECK_MODE(DepthReplacing); + CHECK_MODE(DepthGreater); + CHECK_MODE(DepthLess); + CHECK_MODE(DepthUnchanged); + CHECK_MODE(LocalSizeHint); + CHECK_MODE(InputPoints); + CHECK_MODE(InputLines); + CHECK_MODE(InputLinesAdjacency); + CHECK_MODE(Triangles); + CHECK_MODE(InputTrianglesAdjacency); + CHECK_MODE(Quads); + CHECK_MODE(Isolines); + CHECK_MODE(OutputPoints); + CHECK_MODE(OutputLineStrip); + CHECK_MODE(OutputTriangleStrip); + CHECK_MODE(VecTypeHint); + CHECK_MODE(ContractionOff); + + default: + break; + } + }); fprintf(stderr, "\n"); print_resources(compiler, "subpass inputs", res.subpass_inputs); @@ -646,7 +670,7 @@ struct CLIArguments bool msl_pad_fragment_output = false; bool msl_domain_lower_left = false; bool msl_argument_buffers = false; - uint32_t msl_argument_buffers_tier = 0; // Tier 1 + uint32_t msl_argument_buffers_tier = 0; // Tier 1 bool msl_texture_buffer_native = false; bool msl_framebuffer_fetch = false; bool msl_invariant_float_math = false; @@ -751,6 +775,11 @@ struct CLIArguments bool use_420pack_extension = true; bool remove_unused = false; bool combined_samplers_inherit_bindings = false; + + bool opencl = false; + uint32_t opencl_version = 120; + bool opencl_enable_fp64 = false; + bool opencl_enable_64bit_atomics = false; }; static void print_version() @@ -770,6 +799,7 @@ static void print_help_backend() "\t[--vulkan-semantics] or [-V]:\n\t\tEmit Vulkan GLSL instead of plain GLSL. Makes use of Vulkan-only features to match SPIR-V.\n" "\t[--msl]:\n\t\tEmit Metal Shading Language (MSL).\n" "\t[--hlsl]:\n\t\tEmit HLSL.\n" + "\t[--opencl]:\n\t\tEmit OpenCL C (compute shaders only).\n" "\t[--reflect]:\n\t\tEmit JSON reflection.\n" "\t[--cpp]:\n\t\tDEPRECATED. Emits C++ code.\n" ); @@ -1267,7 +1297,8 @@ static string compile_iteration(const CLIArguments &args, std::vector msl_opts.pad_fragment_output_components = args.msl_pad_fragment_output; msl_opts.tess_domain_origin_lower_left = args.msl_domain_lower_left; msl_opts.argument_buffers = args.msl_argument_buffers; - msl_opts.argument_buffers_tier = static_cast(args.msl_argument_buffers_tier); + msl_opts.argument_buffers_tier = + static_cast(args.msl_argument_buffers_tier); msl_opts.texture_buffer_native = args.msl_texture_buffer_native; msl_opts.multiview = args.msl_multiview; msl_opts.multiview_layered_rendering = args.msl_multiview_layered_rendering; @@ -1323,6 +1354,15 @@ static string compile_iteration(const CLIArguments &args, std::vector if (args.msl_combined_sampler_suffix) msl_comp->set_combined_sampler_suffix(args.msl_combined_sampler_suffix); } + else if (args.opencl) + { + compiler.reset(new CompilerOpenCL(std::move(spirv_parser.get_parsed_ir()))); + auto *ocl_comp = static_cast(compiler.get()); + CompilerOpenCL::Options ocl_opts = ocl_comp->get_opencl_options(); + ocl_opts.opencl_version = args.opencl_version; + ocl_opts.enable_fp64 = args.opencl_enable_fp64; + ocl_comp->set_opencl_options(ocl_opts); + } else if (args.hlsl) compiler.reset(new CompilerHLSL(std::move(spirv_parser.get_parsed_ir()))); else @@ -1335,7 +1375,8 @@ static string compile_iteration(const CLIArguments &args, std::vector if (!args.variable_type_remaps.empty()) { - auto remap_cb = [&](const SPIRType &, const string &name, string &out) -> void { + auto remap_cb = [&](const SPIRType &, const string &name, string &out) -> void + { for (const VariableTypeRemap &remap : args.variable_type_remaps) if (name == remap.variable_name) out = remap.new_variable_type; @@ -1599,9 +1640,8 @@ static string compile_iteration(const CLIArguments &args, std::vector for (auto &named_remap : args.hlsl_attr_remap_named) { - auto itr = std::find_if(res.stage_inputs.begin(), res.stage_inputs.end(), [&](const Resource &input_res) { - return input_res.name == named_remap.name; - }); + auto itr = std::find_if(res.stage_inputs.begin(), res.stage_inputs.end(), + [&](const Resource &input_res) { return input_res.name == named_remap.name; }); if (itr != res.stage_inputs.end()) { @@ -1633,55 +1673,79 @@ static int main_inner(int argc, char *argv[]) CLIArguments args; CLICallbacks cbs; - cbs.add("--help", [](CLIParser &parser) { - print_help(); - parser.end(); - }); - cbs.add("--help-all", [](CLIParser &parser) { - print_help_all(); - parser.end(); - }); - cbs.add("--help-backend", [](CLIParser &parser) { - print_help_backend(); - parser.end(); - }); - cbs.add("--help-common", [](CLIParser &parser) { - print_help_common(); - parser.end(); - }); - cbs.add("--help-glsl", [](CLIParser &parser) { - print_help_glsl(); - parser.end(); - }); - cbs.add("--help-msl", [](CLIParser &parser) { - print_help_msl(); - parser.end(); - }); - cbs.add("--help-hlsl", [](CLIParser &parser) { - print_help_hlsl(); - parser.end(); - }); - cbs.add("--help-obscure", [](CLIParser &parser) { - print_help_obscure(); - parser.end(); - }); - cbs.add("--revision", [](CLIParser &parser) { - print_version(); - parser.end(); - }); + cbs.add("--help", + [](CLIParser &parser) + { + print_help(); + parser.end(); + }); + cbs.add("--help-all", + [](CLIParser &parser) + { + print_help_all(); + parser.end(); + }); + cbs.add("--help-backend", + [](CLIParser &parser) + { + print_help_backend(); + parser.end(); + }); + cbs.add("--help-common", + [](CLIParser &parser) + { + print_help_common(); + parser.end(); + }); + cbs.add("--help-glsl", + [](CLIParser &parser) + { + print_help_glsl(); + parser.end(); + }); + cbs.add("--help-msl", + [](CLIParser &parser) + { + print_help_msl(); + parser.end(); + }); + cbs.add("--help-hlsl", + [](CLIParser &parser) + { + print_help_hlsl(); + parser.end(); + }); + cbs.add("--help-obscure", + [](CLIParser &parser) + { + print_help_obscure(); + parser.end(); + }); + cbs.add("--revision", + [](CLIParser &parser) + { + print_version(); + parser.end(); + }); cbs.add("--output", [&args](CLIParser &parser) { args.output = parser.next_string(); }); - cbs.add("--es", [&args](CLIParser &) { - args.es = true; - args.set_es = true; - }); - cbs.add("--no-es", [&args](CLIParser &) { - args.es = false; - args.set_es = true; - }); - cbs.add("--version", [&args](CLIParser &parser) { - args.version = parser.next_uint(); - args.set_version = true; - }); + cbs.add("--es", + [&args](CLIParser &) + { + args.es = true; + args.set_es = true; + }); + cbs.add("--no-es", + [&args](CLIParser &) + { + args.es = false; + args.set_es = true; + }); + cbs.add("--version", + [&args](CLIParser &parser) + { + args.version = parser.next_uint(); + args.set_version = true; + }); cbs.add("--dump-resources", [&args](CLIParser &) { args.dump_resources = true; }); cbs.add("--force-temporary", [&args](CLIParser &) { args.force_temporary = true; }); cbs.add("--flatten-ubo", [&args](CLIParser &) { args.flatten_ubo = true; }); @@ -1695,15 +1759,17 @@ static int main_inner(int argc, char *argv[]) cbs.add("--glsl-emit-push-constant-as-ubo", [&args](CLIParser &) { args.glsl_emit_push_constant_as_ubo = true; }); cbs.add("--glsl-emit-ubo-as-plain-uniforms", [&args](CLIParser &) { args.glsl_emit_ubo_as_plain_uniforms = true; }); cbs.add("--glsl-force-flattened-io-blocks", [&args](CLIParser &) { args.glsl_force_flattened_io_blocks = true; }); - cbs.add("--glsl-ovr-multiview-view-count", [&args](CLIParser &parser) { args.glsl_ovr_multiview_view_count = parser.next_uint(); }); - cbs.add("--glsl-remap-ext-framebuffer-fetch", [&args](CLIParser &parser) { - uint32_t input_index = parser.next_uint(); - uint32_t color_attachment = parser.next_uint(); - args.glsl_ext_framebuffer_fetch.push_back({ input_index, color_attachment }); - }); - cbs.add("--glsl-ext-framebuffer-fetch-noncoherent", [&args](CLIParser &) { - args.glsl_ext_framebuffer_fetch_noncoherent = true; - }); + cbs.add("--glsl-ovr-multiview-view-count", + [&args](CLIParser &parser) { args.glsl_ovr_multiview_view_count = parser.next_uint(); }); + cbs.add("--glsl-remap-ext-framebuffer-fetch", + [&args](CLIParser &parser) + { + uint32_t input_index = parser.next_uint(); + uint32_t color_attachment = parser.next_uint(); + args.glsl_ext_framebuffer_fetch.push_back({ input_index, color_attachment }); + }); + cbs.add("--glsl-ext-framebuffer-fetch-noncoherent", + [&args](CLIParser &) { args.glsl_ext_framebuffer_fetch_noncoherent = true; }); cbs.add("--vulkan-glsl-disable-ext-samplerless-texture-functions", [&args](CLIParser &) { args.vulkan_glsl_disable_ext_samplerless_texture_functions = true; }); cbs.add("--disable-storage-image-qualifier-deduction", @@ -1715,14 +1781,15 @@ static int main_inner(int argc, char *argv[]) cbs.add("--hlsl-enable-compat", [&args](CLIParser &) { args.hlsl_compat = true; }); cbs.add("--hlsl-support-nonzero-basevertex-baseinstance", [&args](CLIParser &) { args.hlsl_support_nonzero_base = true; }); - cbs.add("--hlsl-basevertex-baseinstance-binding", [&args](CLIParser &parser) { - args.hlsl_base_vertex_index_explicit_binding = true; - args.hlsl_base_vertex_index_register_index = parser.next_uint(); - args.hlsl_base_vertex_index_register_space = parser.next_uint(); - }); - cbs.add("--hlsl-auto-binding", [&args](CLIParser &parser) { - args.hlsl_binding_flags |= hlsl_resource_type_to_flag(parser.next_string()); - }); + cbs.add("--hlsl-basevertex-baseinstance-binding", + [&args](CLIParser &parser) + { + args.hlsl_base_vertex_index_explicit_binding = true; + args.hlsl_base_vertex_index_register_index = parser.next_uint(); + args.hlsl_base_vertex_index_register_space = parser.next_uint(); + }); + cbs.add("--hlsl-auto-binding", [&args](CLIParser &parser) + { args.hlsl_binding_flags |= hlsl_resource_type_to_flag(parser.next_string()); }); cbs.add("--hlsl-force-storage-buffer-as-uav", [&args](CLIParser &) { args.hlsl_force_storage_buffer_as_uav = true; }); cbs.add("--hlsl-nonwritable-uav-texture-as-srv", @@ -1730,7 +1797,8 @@ static int main_inner(int argc, char *argv[]) cbs.add("--hlsl-enable-16bit-types", [&args](CLIParser &) { args.hlsl_enable_16bit_types = true; }); cbs.add("--hlsl-flatten-matrix-vertex-input-semantics", [&args](CLIParser &) { args.hlsl_flatten_matrix_vertex_input_semantics = true; }); - cbs.add("--hlsl-preserve-structured-buffers", [&args](CLIParser &) { args.hlsl_preserve_structured_buffers = true; }); + cbs.add("--hlsl-preserve-structured-buffers", + [&args](CLIParser &) { args.hlsl_preserve_structured_buffers = true; }); cbs.add("--hlsl-user-semantic", [&args](CLIParser &) { args.hlsl_user_semantic = true; }); cbs.add("--vulkan-semantics", [&args](CLIParser &) { args.vulkan_semantics = true; }); cbs.add("-V", [&args](CLIParser &) { args.vulkan_semantics = true; }); @@ -1758,23 +1826,27 @@ static int main_inner(int argc, char *argv[]) cbs.add("--msl-view-index-from-device-index", [&args](CLIParser &) { args.msl_view_index_from_device_index = true; }); cbs.add("--msl-dispatch-base", [&args](CLIParser &) { args.msl_dispatch_base = true; }); - cbs.add("--msl-dynamic-buffer", [&args](CLIParser &parser) { - args.msl_argument_buffers = true; - // Make sure next_uint() is called in-order. - uint32_t desc_set = parser.next_uint(); - uint32_t binding = parser.next_uint(); - args.msl_dynamic_buffers.push_back(make_pair(desc_set, binding)); - }); + cbs.add("--msl-dynamic-buffer", + [&args](CLIParser &parser) + { + args.msl_argument_buffers = true; + // Make sure next_uint() is called in-order. + uint32_t desc_set = parser.next_uint(); + uint32_t binding = parser.next_uint(); + args.msl_dynamic_buffers.push_back(make_pair(desc_set, binding)); + }); cbs.add("--msl-decoration-binding", [&args](CLIParser &) { args.msl_decoration_binding = true; }); cbs.add("--msl-force-active-argument-buffer-resources", [&args](CLIParser &) { args.msl_force_active_argument_buffer_resources = true; }); - cbs.add("--msl-inline-uniform-block", [&args](CLIParser &parser) { - args.msl_argument_buffers = true; - // Make sure next_uint() is called in-order. - uint32_t desc_set = parser.next_uint(); - uint32_t binding = parser.next_uint(); - args.msl_inline_uniform_blocks.push_back(make_pair(desc_set, binding)); - }); + cbs.add("--msl-inline-uniform-block", + [&args](CLIParser &parser) + { + args.msl_argument_buffers = true; + // Make sure next_uint() is called in-order. + uint32_t desc_set = parser.next_uint(); + uint32_t binding = parser.next_uint(); + args.msl_inline_uniform_blocks.push_back(make_pair(desc_set, binding)); + }); cbs.add("--msl-force-native-arrays", [&args](CLIParser &) { args.msl_force_native_arrays = true; }); cbs.add("--msl-disable-frag-depth-builtin", [&args](CLIParser &) { args.msl_enable_frag_depth_builtin = false; }); cbs.add("--msl-disable-frag-stencil-ref-builtin", @@ -1783,92 +1855,100 @@ static int main_inner(int argc, char *argv[]) [&args](CLIParser &parser) { args.msl_enable_frag_output_mask = parser.next_hex_uint(); }); cbs.add("--msl-no-clip-distance-user-varying", [&args](CLIParser &) { args.msl_enable_clip_distance_user_varying = false; }); - cbs.add("--msl-add-shader-input", [&args](CLIParser &parser) { - MSLShaderInterfaceVariable input; - // Make sure next_uint() is called in-order. - input.location = parser.next_uint(); - const char *format = parser.next_value_string("other"); - if (strcmp(format, "any32") == 0) - input.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; - else if (strcmp(format, "any16") == 0) - input.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; - else if (strcmp(format, "u16") == 0) - input.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; - else if (strcmp(format, "u8") == 0) - input.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; - else - input.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; - input.vecsize = parser.next_uint(); - const char *rate = parser.next_value_string("vertex"); - if (strcmp(rate, "primitive") == 0) - input.rate = MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE; - else if (strcmp(rate, "patch") == 0) - input.rate = MSL_SHADER_VARIABLE_RATE_PER_PATCH; - else - input.rate = MSL_SHADER_VARIABLE_RATE_PER_VERTEX; - args.msl_shader_inputs.push_back(input); - }); - cbs.add("--msl-add-shader-output", [&args](CLIParser &parser) { - MSLShaderInterfaceVariable output; - // Make sure next_uint() is called in-order. - output.location = parser.next_uint(); - const char *format = parser.next_value_string("other"); - if (strcmp(format, "any32") == 0) - output.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; - else if (strcmp(format, "any16") == 0) - output.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; - else if (strcmp(format, "u16") == 0) - output.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; - else if (strcmp(format, "u8") == 0) - output.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; - else - output.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; - output.vecsize = parser.next_uint(); - const char *rate = parser.next_value_string("vertex"); - if (strcmp(rate, "primitive") == 0) - output.rate = MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE; - else if (strcmp(rate, "patch") == 0) - output.rate = MSL_SHADER_VARIABLE_RATE_PER_PATCH; - else - output.rate = MSL_SHADER_VARIABLE_RATE_PER_VERTEX; - args.msl_shader_outputs.push_back(output); - }); - cbs.add("--msl-shader-input", [&args](CLIParser &parser) { - MSLShaderInterfaceVariable input; - // Make sure next_uint() is called in-order. - input.location = parser.next_uint(); - const char *format = parser.next_value_string("other"); - if (strcmp(format, "any32") == 0) - input.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; - else if (strcmp(format, "any16") == 0) - input.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; - else if (strcmp(format, "u16") == 0) - input.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; - else if (strcmp(format, "u8") == 0) - input.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; - else - input.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; - input.vecsize = parser.next_uint(); - args.msl_shader_inputs.push_back(input); - }); - cbs.add("--msl-shader-output", [&args](CLIParser &parser) { - MSLShaderInterfaceVariable output; - // Make sure next_uint() is called in-order. - output.location = parser.next_uint(); - const char *format = parser.next_value_string("other"); - if (strcmp(format, "any32") == 0) - output.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; - else if (strcmp(format, "any16") == 0) - output.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; - else if (strcmp(format, "u16") == 0) - output.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; - else if (strcmp(format, "u8") == 0) - output.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; - else - output.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; - output.vecsize = parser.next_uint(); - args.msl_shader_outputs.push_back(output); - }); + cbs.add("--msl-add-shader-input", + [&args](CLIParser &parser) + { + MSLShaderInterfaceVariable input; + // Make sure next_uint() is called in-order. + input.location = parser.next_uint(); + const char *format = parser.next_value_string("other"); + if (strcmp(format, "any32") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; + else if (strcmp(format, "any16") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; + else if (strcmp(format, "u16") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; + else if (strcmp(format, "u8") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; + else + input.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; + input.vecsize = parser.next_uint(); + const char *rate = parser.next_value_string("vertex"); + if (strcmp(rate, "primitive") == 0) + input.rate = MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE; + else if (strcmp(rate, "patch") == 0) + input.rate = MSL_SHADER_VARIABLE_RATE_PER_PATCH; + else + input.rate = MSL_SHADER_VARIABLE_RATE_PER_VERTEX; + args.msl_shader_inputs.push_back(input); + }); + cbs.add("--msl-add-shader-output", + [&args](CLIParser &parser) + { + MSLShaderInterfaceVariable output; + // Make sure next_uint() is called in-order. + output.location = parser.next_uint(); + const char *format = parser.next_value_string("other"); + if (strcmp(format, "any32") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; + else if (strcmp(format, "any16") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; + else if (strcmp(format, "u16") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; + else if (strcmp(format, "u8") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; + else + output.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; + output.vecsize = parser.next_uint(); + const char *rate = parser.next_value_string("vertex"); + if (strcmp(rate, "primitive") == 0) + output.rate = MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE; + else if (strcmp(rate, "patch") == 0) + output.rate = MSL_SHADER_VARIABLE_RATE_PER_PATCH; + else + output.rate = MSL_SHADER_VARIABLE_RATE_PER_VERTEX; + args.msl_shader_outputs.push_back(output); + }); + cbs.add("--msl-shader-input", + [&args](CLIParser &parser) + { + MSLShaderInterfaceVariable input; + // Make sure next_uint() is called in-order. + input.location = parser.next_uint(); + const char *format = parser.next_value_string("other"); + if (strcmp(format, "any32") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; + else if (strcmp(format, "any16") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; + else if (strcmp(format, "u16") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; + else if (strcmp(format, "u8") == 0) + input.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; + else + input.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; + input.vecsize = parser.next_uint(); + args.msl_shader_inputs.push_back(input); + }); + cbs.add("--msl-shader-output", + [&args](CLIParser &parser) + { + MSLShaderInterfaceVariable output; + // Make sure next_uint() is called in-order. + output.location = parser.next_uint(); + const char *format = parser.next_value_string("other"); + if (strcmp(format, "any32") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_ANY32; + else if (strcmp(format, "any16") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_ANY16; + else if (strcmp(format, "u16") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_UINT16; + else if (strcmp(format, "u8") == 0) + output.format = MSL_SHADER_VARIABLE_FORMAT_UINT8; + else + output.format = MSL_SHADER_VARIABLE_FORMAT_OTHER; + output.vecsize = parser.next_uint(); + args.msl_shader_outputs.push_back(output); + }); cbs.add("--msl-raw-buffer-tese-input", [&args](CLIParser &) { args.msl_raw_buffer_tese_input = true; }); cbs.add("--msl-multi-patch-workgroup", [&args](CLIParser &) { args.msl_multi_patch_workgroup = true; }); cbs.add("--msl-vertex-for-tessellation", [&args](CLIParser &) { args.msl_vertex_for_tessellation = true; }); @@ -1888,92 +1968,118 @@ static int main_inner(int argc, char *argv[]) cbs.add("--msl-no-manual-helper-invocation-updates", [&args](CLIParser &) { args.msl_manual_helper_invocation_updates = false; }); cbs.add("--msl-check-discarded-frag-stores", [&args](CLIParser &) { args.msl_check_discarded_frag_stores = true; }); - cbs.add("--msl-force-frag-with-side-effects-execution", [&args](CLIParser &) { args.msl_force_fragment_with_side_effects_execution = true; }); + cbs.add("--msl-force-frag-with-side-effects-execution", + [&args](CLIParser &) { args.msl_force_fragment_with_side_effects_execution = true; }); cbs.add("--msl-sample-dref-lod-array-as-grad", [&args](CLIParser &) { args.msl_sample_dref_lod_array_as_grad = true; }); cbs.add("--msl-no-readwrite-texture-fences", [&args](CLIParser &) { args.msl_readwrite_texture_fences = false; }); cbs.add("--msl-agx-manual-cube-grad-fixup", [&args](CLIParser &) { args.msl_agx_manual_cube_grad_fixup = true; }); - cbs.add("--msl-combined-sampler-suffix", [&args](CLIParser &parser) { - args.msl_combined_sampler_suffix = parser.next_string(); - }); + cbs.add("--msl-combined-sampler-suffix", + [&args](CLIParser &parser) { args.msl_combined_sampler_suffix = parser.next_string(); }); cbs.add("--msl-runtime-array-rich-descriptor", [&args](CLIParser &) { args.msl_runtime_array_rich_descriptor = true; }); - cbs.add("--msl-replace-recursive-inputs", - [&args](CLIParser &) { args.msl_replace_recursive_inputs = true; }); - cbs.add("--msl-input-attachment-is-ds-attachment", [&args](CLIParser &) { args.msl_input_attachment_is_ds_attachment = true; }); + cbs.add("--msl-replace-recursive-inputs", [&args](CLIParser &) { args.msl_replace_recursive_inputs = true; }); + cbs.add("--msl-input-attachment-is-ds-attachment", + [&args](CLIParser &) { args.msl_input_attachment_is_ds_attachment = true; }); cbs.add("--msl-disable-rasterization", [&args](CLIParser &) { args.msl_disable_rasterization = true; }); cbs.add("--msl-auto-disable-rasterization", [&args](CLIParser &) { args.msl_auto_disable_rasterization = true; }); - cbs.add("--msl-default-point-size", [&args](CLIParser &parser) { - args.msl_enable_point_size_default = true; - args.msl_default_point_size = static_cast(parser.next_double()); - }); + cbs.add("--msl-default-point-size", + [&args](CLIParser &parser) + { + args.msl_enable_point_size_default = true; + args.msl_default_point_size = static_cast(parser.next_double()); + }); + cbs.add("--opencl", [&args](CLIParser &) { args.opencl = true; }); + cbs.add("--opencl-version", [&args](CLIParser &parser) { args.opencl_version = parser.next_uint(); }); + cbs.add("--opencl-fp64", [&args](CLIParser &) { args.opencl_enable_fp64 = true; }); + cbs.add("--opencl-64bit-atomics", [&args](CLIParser &) { args.opencl_enable_64bit_atomics = true; }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); - cbs.add("--rename-entry-point", [&args](CLIParser &parser) { - auto old_name = parser.next_string(); - auto new_name = parser.next_string(); - auto model = stage_to_execution_model(parser.next_string()); - args.entry_point_rename.push_back({ old_name, new_name, std::move(model) }); - }); + cbs.add("--rename-entry-point", + [&args](CLIParser &parser) + { + auto old_name = parser.next_string(); + auto new_name = parser.next_string(); + auto model = stage_to_execution_model(parser.next_string()); + args.entry_point_rename.push_back({ old_name, new_name, std::move(model) }); + }); cbs.add("--entry", [&args](CLIParser &parser) { args.entry = parser.next_string(); }); cbs.add("--stage", [&args](CLIParser &parser) { args.entry_stage = parser.next_string(); }); cbs.add("--separate-shader-objects", [&args](CLIParser &) { args.sso = true; }); - cbs.add("--set-hlsl-vertex-input-semantic", [&args](CLIParser &parser) { - HLSLVertexAttributeRemap remap; - remap.location = parser.next_uint(); - remap.semantic = parser.next_string(); - args.hlsl_attr_remap.push_back(std::move(remap)); - }); - cbs.add("--set-hlsl-named-vertex-input-semantic", [&args](CLIParser &parser) { - HLSLVertexAttributeRemapNamed remap; - remap.name = parser.next_string(); - remap.semantic = parser.next_string(); - args.hlsl_attr_remap_named.push_back(std::move(remap)); - }); - - cbs.add("--remap", [&args](CLIParser &parser) { - string src = parser.next_string(); - string dst = parser.next_string(); - uint32_t components = parser.next_uint(); - args.remaps.push_back({ std::move(src), std::move(dst), components }); - }); - - cbs.add("--remap-variable-type", [&args](CLIParser &parser) { - string var_name = parser.next_string(); - string new_type = parser.next_string(); - args.variable_type_remaps.push_back({ std::move(var_name), std::move(new_type) }); - }); - - cbs.add("--rename-interface-variable", [&args](CLIParser &parser) { - StorageClass cls = StorageClassMax; - string clsStr = parser.next_string(); - if (clsStr == "in") - cls = StorageClassInput; - else if (clsStr == "out") - cls = StorageClassOutput; - - uint32_t loc = parser.next_uint(); - string var_name = parser.next_string(); - args.interface_variable_renames.push_back({ cls, loc, std::move(var_name) }); - }); - - cbs.add("--pls-in", [&args](CLIParser &parser) { - auto fmt = pls_format(parser.next_string()); - auto name = parser.next_string(); - args.pls_in.push_back({ std::move(fmt), std::move(name) }); - }); - cbs.add("--pls-out", [&args](CLIParser &parser) { - auto fmt = pls_format(parser.next_string()); - auto name = parser.next_string(); - args.pls_out.push_back({ std::move(fmt), std::move(name) }); - }); - cbs.add("--shader-model", [&args](CLIParser &parser) { - args.shader_model = parser.next_uint(); - args.set_shader_model = true; - }); - cbs.add("--msl-version", [&args](CLIParser &parser) { - args.msl_version = parser.next_uint(); - args.set_msl_version = true; - }); + cbs.add("--set-hlsl-vertex-input-semantic", + [&args](CLIParser &parser) + { + HLSLVertexAttributeRemap remap; + remap.location = parser.next_uint(); + remap.semantic = parser.next_string(); + args.hlsl_attr_remap.push_back(std::move(remap)); + }); + cbs.add("--set-hlsl-named-vertex-input-semantic", + [&args](CLIParser &parser) + { + HLSLVertexAttributeRemapNamed remap; + remap.name = parser.next_string(); + remap.semantic = parser.next_string(); + args.hlsl_attr_remap_named.push_back(std::move(remap)); + }); + + cbs.add("--remap", + [&args](CLIParser &parser) + { + string src = parser.next_string(); + string dst = parser.next_string(); + uint32_t components = parser.next_uint(); + args.remaps.push_back({ std::move(src), std::move(dst), components }); + }); + + cbs.add("--remap-variable-type", + [&args](CLIParser &parser) + { + string var_name = parser.next_string(); + string new_type = parser.next_string(); + args.variable_type_remaps.push_back({ std::move(var_name), std::move(new_type) }); + }); + + cbs.add("--rename-interface-variable", + [&args](CLIParser &parser) + { + StorageClass cls = StorageClassMax; + string clsStr = parser.next_string(); + if (clsStr == "in") + cls = StorageClassInput; + else if (clsStr == "out") + cls = StorageClassOutput; + + uint32_t loc = parser.next_uint(); + string var_name = parser.next_string(); + args.interface_variable_renames.push_back({ cls, loc, std::move(var_name) }); + }); + + cbs.add("--pls-in", + [&args](CLIParser &parser) + { + auto fmt = pls_format(parser.next_string()); + auto name = parser.next_string(); + args.pls_in.push_back({ std::move(fmt), std::move(name) }); + }); + cbs.add("--pls-out", + [&args](CLIParser &parser) + { + auto fmt = pls_format(parser.next_string()); + auto name = parser.next_string(); + args.pls_out.push_back({ std::move(fmt), std::move(name) }); + }); + cbs.add("--shader-model", + [&args](CLIParser &parser) + { + args.shader_model = parser.next_uint(); + args.set_shader_model = true; + }); + cbs.add("--msl-version", + [&args](CLIParser &parser) + { + args.msl_version = parser.next_uint(); + args.set_msl_version = true; + }); cbs.add("--remove-unused-variables", [&args](CLIParser &) { args.remove_unused = true; }); cbs.add("--combined-samplers-inherit-bindings", @@ -1982,34 +2088,37 @@ static int main_inner(int argc, char *argv[]) cbs.add("--no-support-nonzero-baseinstance", [&](CLIParser &) { args.support_nonzero_baseinstance = false; }); cbs.add("--emit-line-directives", [&args](CLIParser &) { args.emit_line_directives = true; }); - cbs.add("--mask-stage-output-location", [&](CLIParser &parser) { - uint32_t location = parser.next_uint(); - uint32_t component = parser.next_uint(); - args.masked_stage_outputs.push_back({ location, component }); - }); - - cbs.add("--mask-stage-output-builtin", [&](CLIParser &parser) { - BuiltIn masked_builtin = BuiltInMax; - std::string builtin = parser.next_string(); - if (builtin == "Position") - masked_builtin = BuiltInPosition; - else if (builtin == "PointSize") - masked_builtin = BuiltInPointSize; - else if (builtin == "CullDistance") - masked_builtin = BuiltInCullDistance; - else if (builtin == "ClipDistance") - masked_builtin = BuiltInClipDistance; - else - { - print_help(); - exit(EXIT_FAILURE); - } - args.masked_stage_builtins.push_back(masked_builtin); - }); - - cbs.add("--force-recompile-max-debug-iterations", [&](CLIParser &parser) { - args.force_recompile_max_debug_iterations = parser.next_uint(); - }); + cbs.add("--mask-stage-output-location", + [&](CLIParser &parser) + { + uint32_t location = parser.next_uint(); + uint32_t component = parser.next_uint(); + args.masked_stage_outputs.push_back({ location, component }); + }); + + cbs.add("--mask-stage-output-builtin", + [&](CLIParser &parser) + { + BuiltIn masked_builtin = BuiltInMax; + std::string builtin = parser.next_string(); + if (builtin == "Position") + masked_builtin = BuiltInPosition; + else if (builtin == "PointSize") + masked_builtin = BuiltInPointSize; + else if (builtin == "CullDistance") + masked_builtin = BuiltInCullDistance; + else if (builtin == "ClipDistance") + masked_builtin = BuiltInClipDistance; + else + { + print_help(); + exit(EXIT_FAILURE); + } + args.masked_stage_builtins.push_back(masked_builtin); + }); + + cbs.add("--force-recompile-max-debug-iterations", + [&](CLIParser &parser) { args.force_recompile_max_debug_iterations = parser.next_uint(); }); cbs.add("--relax-nan-checks", [&](CLIParser &) { args.relax_nan_checks = true; }); diff --git a/spirv_cross_c.cpp b/spirv_cross_c.cpp index 1604385e5..4494700ed 100644 --- a/spirv_cross_c.cpp +++ b/spirv_cross_c.cpp @@ -40,6 +40,9 @@ #if SPIRV_CROSS_C_API_REFLECT #include "spirv_reflect.hpp" #endif +#if SPIRV_CROSS_C_API_OPENCL +#include "spirv_opencl.hpp" +#endif #ifdef HAVE_SPIRV_CROSS_GIT_VERSION #include "gitversion.h" @@ -339,6 +342,15 @@ spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend back break; #endif +#if SPIRV_CROSS_C_API_OPENCL + case SPVC_BACKEND_OPENCL: + if (mode == SPVC_CAPTURE_MODE_TAKE_OWNERSHIP) + comp->compiler.reset(new CompilerOpenCL(std::move(parsed_ir->parsed))); + else if (mode == SPVC_CAPTURE_MODE_COPY) + comp->compiler.reset(new CompilerOpenCL(parsed_ir->parsed)); + break; +#endif + default: context->report_error("Invalid backend."); return SPVC_ERROR_INVALID_ARGUMENT; diff --git a/spirv_cross_c.h b/spirv_cross_c.h index 30f1c459c..76d2b8155 100644 --- a/spirv_cross_c.h +++ b/spirv_cross_c.h @@ -24,8 +24,8 @@ #ifndef SPIRV_CROSS_C_API_H #define SPIRV_CROSS_C_API_H -#include #include "spirv.h" +#include /* * C89-compatible wrapper for SPIRV-Cross' API. @@ -34,7 +34,8 @@ */ #ifdef __cplusplus -extern "C" { +extern "C" +{ #endif /* Bumped if ABI or API breaks backwards compatibility. */ @@ -59,212 +60,213 @@ extern "C" { #endif #endif -/* + /* * Gets the SPVC_C_API_VERSION_* used to build this library. * Can be used to check for ABI mismatch if so-versioning did not catch it. */ -SPVC_PUBLIC_API void spvc_get_version(unsigned *major, unsigned *minor, unsigned *patch); - -/* Gets a human readable version string to identify which commit a particular binary was created from. */ -SPVC_PUBLIC_API const char *spvc_get_commit_revision_and_timestamp(void); - -/* These types are opaque to the user. */ -typedef struct spvc_context_s *spvc_context; -typedef struct spvc_parsed_ir_s *spvc_parsed_ir; -typedef struct spvc_compiler_s *spvc_compiler; -typedef struct spvc_compiler_options_s *spvc_compiler_options; -typedef struct spvc_resources_s *spvc_resources; -struct spvc_type_s; -typedef const struct spvc_type_s *spvc_type; -typedef struct spvc_constant_s *spvc_constant; -struct spvc_set_s; -typedef const struct spvc_set_s *spvc_set; + SPVC_PUBLIC_API void spvc_get_version(unsigned *major, unsigned *minor, unsigned *patch); + + /* Gets a human readable version string to identify which commit a particular binary was created from. */ + SPVC_PUBLIC_API const char *spvc_get_commit_revision_and_timestamp(void); + + /* These types are opaque to the user. */ + typedef struct spvc_context_s *spvc_context; + typedef struct spvc_parsed_ir_s *spvc_parsed_ir; + typedef struct spvc_compiler_s *spvc_compiler; + typedef struct spvc_compiler_options_s *spvc_compiler_options; + typedef struct spvc_resources_s *spvc_resources; + struct spvc_type_s; + typedef const struct spvc_type_s *spvc_type; + typedef struct spvc_constant_s *spvc_constant; + struct spvc_set_s; + typedef const struct spvc_set_s *spvc_set; -/* + /* * Shallow typedefs. All SPIR-V IDs are plain 32-bit numbers, but this helps communicate which data is used. * Maps to a SPIRType. */ -typedef SpvId spvc_type_id; -/* Maps to a SPIRVariable. */ -typedef SpvId spvc_variable_id; -/* Maps to a SPIRConstant. */ -typedef SpvId spvc_constant_id; - -/* See C++ API. */ -typedef struct spvc_reflected_resource -{ - spvc_variable_id id; - spvc_type_id base_type_id; - spvc_type_id type_id; - const char *name; -} spvc_reflected_resource; - -typedef struct spvc_reflected_builtin_resource -{ - SpvBuiltIn builtin; - spvc_type_id value_type_id; - spvc_reflected_resource resource; -} spvc_reflected_builtin_resource; - -/* See C++ API. */ -typedef struct spvc_entry_point -{ - SpvExecutionModel execution_model; - const char *name; -} spvc_entry_point; - -/* See C++ API. */ -typedef struct spvc_combined_image_sampler -{ - spvc_variable_id combined_id; - spvc_variable_id image_id; - spvc_variable_id sampler_id; -} spvc_combined_image_sampler; - -/* See C++ API. */ -typedef struct spvc_specialization_constant -{ - spvc_constant_id id; - unsigned constant_id; -} spvc_specialization_constant; + typedef SpvId spvc_type_id; + /* Maps to a SPIRVariable. */ + typedef SpvId spvc_variable_id; + /* Maps to a SPIRConstant. */ + typedef SpvId spvc_constant_id; + + /* See C++ API. */ + typedef struct spvc_reflected_resource + { + spvc_variable_id id; + spvc_type_id base_type_id; + spvc_type_id type_id; + const char *name; + } spvc_reflected_resource; + + typedef struct spvc_reflected_builtin_resource + { + SpvBuiltIn builtin; + spvc_type_id value_type_id; + spvc_reflected_resource resource; + } spvc_reflected_builtin_resource; + + /* See C++ API. */ + typedef struct spvc_entry_point + { + SpvExecutionModel execution_model; + const char *name; + } spvc_entry_point; + + /* See C++ API. */ + typedef struct spvc_combined_image_sampler + { + spvc_variable_id combined_id; + spvc_variable_id image_id; + spvc_variable_id sampler_id; + } spvc_combined_image_sampler; + + /* See C++ API. */ + typedef struct spvc_specialization_constant + { + spvc_constant_id id; + unsigned constant_id; + } spvc_specialization_constant; + + /* See C++ API. */ + typedef struct spvc_buffer_range + { + unsigned index; + size_t offset; + size_t range; + } spvc_buffer_range; + + /* See C++ API. */ + typedef struct spvc_hlsl_root_constants + { + unsigned start; + unsigned end; + unsigned binding; + unsigned space; + } spvc_hlsl_root_constants; + + /* See C++ API. */ + typedef struct spvc_hlsl_vertex_attribute_remap + { + unsigned location; + const char *semantic; + } spvc_hlsl_vertex_attribute_remap; -/* See C++ API. */ -typedef struct spvc_buffer_range -{ - unsigned index; - size_t offset; - size_t range; -} spvc_buffer_range; - -/* See C++ API. */ -typedef struct spvc_hlsl_root_constants -{ - unsigned start; - unsigned end; - unsigned binding; - unsigned space; -} spvc_hlsl_root_constants; - -/* See C++ API. */ -typedef struct spvc_hlsl_vertex_attribute_remap -{ - unsigned location; - const char *semantic; -} spvc_hlsl_vertex_attribute_remap; - -/* + /* * Be compatible with non-C99 compilers, which do not have stdbool. * Only recent MSVC compilers supports this for example, and ideally SPIRV-Cross should be linkable * from a wide range of compilers in its C wrapper. */ -typedef unsigned char spvc_bool; + typedef unsigned char spvc_bool; #define SPVC_TRUE ((spvc_bool)1) #define SPVC_FALSE ((spvc_bool)0) -typedef enum spvc_result -{ - /* Success. */ - SPVC_SUCCESS = 0, + typedef enum spvc_result + { + /* Success. */ + SPVC_SUCCESS = 0, - /* The SPIR-V is invalid. Should have been caught by validation ideally. */ - SPVC_ERROR_INVALID_SPIRV = -1, + /* The SPIR-V is invalid. Should have been caught by validation ideally. */ + SPVC_ERROR_INVALID_SPIRV = -1, - /* The SPIR-V might be valid or invalid, but SPIRV-Cross currently cannot correctly translate this to your target language. */ - SPVC_ERROR_UNSUPPORTED_SPIRV = -2, + /* The SPIR-V might be valid or invalid, but SPIRV-Cross currently cannot correctly translate this to your target language. */ + SPVC_ERROR_UNSUPPORTED_SPIRV = -2, - /* If for some reason we hit this, new or malloc failed. */ - SPVC_ERROR_OUT_OF_MEMORY = -3, + /* If for some reason we hit this, new or malloc failed. */ + SPVC_ERROR_OUT_OF_MEMORY = -3, - /* Invalid API argument. */ - SPVC_ERROR_INVALID_ARGUMENT = -4, + /* Invalid API argument. */ + SPVC_ERROR_INVALID_ARGUMENT = -4, - SPVC_ERROR_INT_MAX = 0x7fffffff -} spvc_result; + SPVC_ERROR_INT_MAX = 0x7fffffff + } spvc_result; -typedef enum spvc_capture_mode -{ - /* The Parsed IR payload will be copied, and the handle can be reused to create other compiler instances. */ - SPVC_CAPTURE_MODE_COPY = 0, + typedef enum spvc_capture_mode + { + /* The Parsed IR payload will be copied, and the handle can be reused to create other compiler instances. */ + SPVC_CAPTURE_MODE_COPY = 0, - /* + /* * The payload will now be owned by the compiler. * parsed_ir should now be considered a dead blob and must not be used further. * This is optimal for performance and should be the go-to option. */ - SPVC_CAPTURE_MODE_TAKE_OWNERSHIP = 1, - - SPVC_CAPTURE_MODE_INT_MAX = 0x7fffffff -} spvc_capture_mode; - -typedef enum spvc_backend -{ - /* This backend can only perform reflection, no compiler options are supported. Maps to spirv_cross::Compiler. */ - SPVC_BACKEND_NONE = 0, - SPVC_BACKEND_GLSL = 1, /* spirv_cross::CompilerGLSL */ - SPVC_BACKEND_HLSL = 2, /* CompilerHLSL */ - SPVC_BACKEND_MSL = 3, /* CompilerMSL */ - SPVC_BACKEND_CPP = 4, /* CompilerCPP */ - SPVC_BACKEND_JSON = 5, /* CompilerReflection w/ JSON backend */ - SPVC_BACKEND_INT_MAX = 0x7fffffff -} spvc_backend; - -/* Maps to C++ API. */ -typedef enum spvc_resource_type -{ - SPVC_RESOURCE_TYPE_UNKNOWN = 0, - SPVC_RESOURCE_TYPE_UNIFORM_BUFFER = 1, - SPVC_RESOURCE_TYPE_STORAGE_BUFFER = 2, - SPVC_RESOURCE_TYPE_STAGE_INPUT = 3, - SPVC_RESOURCE_TYPE_STAGE_OUTPUT = 4, - SPVC_RESOURCE_TYPE_SUBPASS_INPUT = 5, - SPVC_RESOURCE_TYPE_STORAGE_IMAGE = 6, - SPVC_RESOURCE_TYPE_SAMPLED_IMAGE = 7, - SPVC_RESOURCE_TYPE_ATOMIC_COUNTER = 8, - SPVC_RESOURCE_TYPE_PUSH_CONSTANT = 9, - SPVC_RESOURCE_TYPE_SEPARATE_IMAGE = 10, - SPVC_RESOURCE_TYPE_SEPARATE_SAMPLERS = 11, - SPVC_RESOURCE_TYPE_ACCELERATION_STRUCTURE = 12, - SPVC_RESOURCE_TYPE_RAY_QUERY = 13, - SPVC_RESOURCE_TYPE_SHADER_RECORD_BUFFER = 14, - SPVC_RESOURCE_TYPE_GL_PLAIN_UNIFORM = 15, - SPVC_RESOURCE_TYPE_TENSOR = 16, - SPVC_RESOURCE_TYPE_INT_MAX = 0x7fffffff -} spvc_resource_type; - -typedef enum spvc_builtin_resource_type -{ - SPVC_BUILTIN_RESOURCE_TYPE_UNKNOWN = 0, - SPVC_BUILTIN_RESOURCE_TYPE_STAGE_INPUT = 1, - SPVC_BUILTIN_RESOURCE_TYPE_STAGE_OUTPUT = 2, - SPVC_BUILTIN_RESOURCE_TYPE_INT_MAX = 0x7fffffff -} spvc_builtin_resource_type; - -/* Maps to spirv_cross::SPIRType::BaseType. */ -typedef enum spvc_basetype -{ - SPVC_BASETYPE_UNKNOWN = 0, - SPVC_BASETYPE_VOID = 1, - SPVC_BASETYPE_BOOLEAN = 2, - SPVC_BASETYPE_INT8 = 3, - SPVC_BASETYPE_UINT8 = 4, - SPVC_BASETYPE_INT16 = 5, - SPVC_BASETYPE_UINT16 = 6, - SPVC_BASETYPE_INT32 = 7, - SPVC_BASETYPE_UINT32 = 8, - SPVC_BASETYPE_INT64 = 9, - SPVC_BASETYPE_UINT64 = 10, - SPVC_BASETYPE_ATOMIC_COUNTER = 11, - SPVC_BASETYPE_FP16 = 12, - SPVC_BASETYPE_FP32 = 13, - SPVC_BASETYPE_FP64 = 14, - SPVC_BASETYPE_STRUCT = 15, - SPVC_BASETYPE_IMAGE = 16, - SPVC_BASETYPE_SAMPLED_IMAGE = 17, - SPVC_BASETYPE_SAMPLER = 18, - SPVC_BASETYPE_ACCELERATION_STRUCTURE = 19, - - SPVC_BASETYPE_INT_MAX = 0x7fffffff -} spvc_basetype; + SPVC_CAPTURE_MODE_TAKE_OWNERSHIP = 1, + + SPVC_CAPTURE_MODE_INT_MAX = 0x7fffffff + } spvc_capture_mode; + + typedef enum spvc_backend + { + /* This backend can only perform reflection, no compiler options are supported. Maps to spirv_cross::Compiler. */ + SPVC_BACKEND_NONE = 0, + SPVC_BACKEND_GLSL = 1, /* spirv_cross::CompilerGLSL */ + SPVC_BACKEND_HLSL = 2, /* CompilerHLSL */ + SPVC_BACKEND_MSL = 3, /* CompilerMSL */ + SPVC_BACKEND_CPP = 4, /* CompilerCPP */ + SPVC_BACKEND_JSON = 5, /* CompilerReflection w/ JSON backend */ + SPVC_BACKEND_OPENCL = 6, /* CompilerOpenCL */ + SPVC_BACKEND_INT_MAX = 0x7fffffff + } spvc_backend; + + /* Maps to C++ API. */ + typedef enum spvc_resource_type + { + SPVC_RESOURCE_TYPE_UNKNOWN = 0, + SPVC_RESOURCE_TYPE_UNIFORM_BUFFER = 1, + SPVC_RESOURCE_TYPE_STORAGE_BUFFER = 2, + SPVC_RESOURCE_TYPE_STAGE_INPUT = 3, + SPVC_RESOURCE_TYPE_STAGE_OUTPUT = 4, + SPVC_RESOURCE_TYPE_SUBPASS_INPUT = 5, + SPVC_RESOURCE_TYPE_STORAGE_IMAGE = 6, + SPVC_RESOURCE_TYPE_SAMPLED_IMAGE = 7, + SPVC_RESOURCE_TYPE_ATOMIC_COUNTER = 8, + SPVC_RESOURCE_TYPE_PUSH_CONSTANT = 9, + SPVC_RESOURCE_TYPE_SEPARATE_IMAGE = 10, + SPVC_RESOURCE_TYPE_SEPARATE_SAMPLERS = 11, + SPVC_RESOURCE_TYPE_ACCELERATION_STRUCTURE = 12, + SPVC_RESOURCE_TYPE_RAY_QUERY = 13, + SPVC_RESOURCE_TYPE_SHADER_RECORD_BUFFER = 14, + SPVC_RESOURCE_TYPE_GL_PLAIN_UNIFORM = 15, + SPVC_RESOURCE_TYPE_TENSOR = 16, + SPVC_RESOURCE_TYPE_INT_MAX = 0x7fffffff + } spvc_resource_type; + + typedef enum spvc_builtin_resource_type + { + SPVC_BUILTIN_RESOURCE_TYPE_UNKNOWN = 0, + SPVC_BUILTIN_RESOURCE_TYPE_STAGE_INPUT = 1, + SPVC_BUILTIN_RESOURCE_TYPE_STAGE_OUTPUT = 2, + SPVC_BUILTIN_RESOURCE_TYPE_INT_MAX = 0x7fffffff + } spvc_builtin_resource_type; + + /* Maps to spirv_cross::SPIRType::BaseType. */ + typedef enum spvc_basetype + { + SPVC_BASETYPE_UNKNOWN = 0, + SPVC_BASETYPE_VOID = 1, + SPVC_BASETYPE_BOOLEAN = 2, + SPVC_BASETYPE_INT8 = 3, + SPVC_BASETYPE_UINT8 = 4, + SPVC_BASETYPE_INT16 = 5, + SPVC_BASETYPE_UINT16 = 6, + SPVC_BASETYPE_INT32 = 7, + SPVC_BASETYPE_UINT32 = 8, + SPVC_BASETYPE_INT64 = 9, + SPVC_BASETYPE_UINT64 = 10, + SPVC_BASETYPE_ATOMIC_COUNTER = 11, + SPVC_BASETYPE_FP16 = 12, + SPVC_BASETYPE_FP32 = 13, + SPVC_BASETYPE_FP64 = 14, + SPVC_BASETYPE_STRUCT = 15, + SPVC_BASETYPE_IMAGE = 16, + SPVC_BASETYPE_SAMPLED_IMAGE = 17, + SPVC_BASETYPE_SAMPLER = 18, + SPVC_BASETYPE_ACCELERATION_STRUCTURE = 19, + + SPVC_BASETYPE_INT_MAX = 0x7fffffff + } spvc_basetype; #define SPVC_COMPILER_OPTION_COMMON_BIT 0x1000000 #define SPVC_COMPILER_OPTION_GLSL_BIT 0x2000000 @@ -275,143 +277,143 @@ typedef enum spvc_basetype #define SPVC_MAKE_MSL_VERSION(major, minor, patch) ((major) * 10000 + (minor) * 100 + (patch)) -/* Maps to C++ API. */ -typedef enum spvc_msl_platform -{ - SPVC_MSL_PLATFORM_IOS = 0, - SPVC_MSL_PLATFORM_MACOS = 1, - SPVC_MSL_PLATFORM_MAX_INT = 0x7fffffff -} spvc_msl_platform; - -/* Maps to C++ API. */ -typedef enum spvc_msl_index_type -{ - SPVC_MSL_INDEX_TYPE_NONE = 0, - SPVC_MSL_INDEX_TYPE_UINT16 = 1, - SPVC_MSL_INDEX_TYPE_UINT32 = 2, - SPVC_MSL_INDEX_TYPE_MAX_INT = 0x7fffffff -} spvc_msl_index_type; - -/* Maps to C++ API. */ -typedef enum spvc_msl_shader_variable_format -{ - SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER = 0, - SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8 = 1, - SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16 = 2, - SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY16 = 3, - SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY32 = 4, - - /* Deprecated names. */ - SPVC_MSL_VERTEX_FORMAT_OTHER = SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER, - SPVC_MSL_VERTEX_FORMAT_UINT8 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8, - SPVC_MSL_VERTEX_FORMAT_UINT16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16, - SPVC_MSL_SHADER_INPUT_FORMAT_OTHER = SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER, - SPVC_MSL_SHADER_INPUT_FORMAT_UINT8 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8, - SPVC_MSL_SHADER_INPUT_FORMAT_UINT16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16, - SPVC_MSL_SHADER_INPUT_FORMAT_ANY16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY16, - SPVC_MSL_SHADER_INPUT_FORMAT_ANY32 = SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY32, - - - SPVC_MSL_SHADER_INPUT_FORMAT_INT_MAX = 0x7fffffff -} spvc_msl_shader_variable_format, spvc_msl_shader_input_format, spvc_msl_vertex_format; - -/* Maps to C++ API. Deprecated; use spvc_msl_shader_interface_var. */ -typedef struct spvc_msl_vertex_attribute -{ - unsigned location; - - /* Obsolete, do not use. Only lingers on for ABI compatibility. */ - unsigned msl_buffer; - /* Obsolete, do not use. Only lingers on for ABI compatibility. */ - unsigned msl_offset; - /* Obsolete, do not use. Only lingers on for ABI compatibility. */ - unsigned msl_stride; - /* Obsolete, do not use. Only lingers on for ABI compatibility. */ - spvc_bool per_instance; - - spvc_msl_vertex_format format; - SpvBuiltIn builtin; -} spvc_msl_vertex_attribute; + /* Maps to C++ API. */ + typedef enum spvc_msl_platform + { + SPVC_MSL_PLATFORM_IOS = 0, + SPVC_MSL_PLATFORM_MACOS = 1, + SPVC_MSL_PLATFORM_MAX_INT = 0x7fffffff + } spvc_msl_platform; + + /* Maps to C++ API. */ + typedef enum spvc_msl_index_type + { + SPVC_MSL_INDEX_TYPE_NONE = 0, + SPVC_MSL_INDEX_TYPE_UINT16 = 1, + SPVC_MSL_INDEX_TYPE_UINT32 = 2, + SPVC_MSL_INDEX_TYPE_MAX_INT = 0x7fffffff + } spvc_msl_index_type; + + /* Maps to C++ API. */ + typedef enum spvc_msl_shader_variable_format + { + SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER = 0, + SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8 = 1, + SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16 = 2, + SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY16 = 3, + SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY32 = 4, + + /* Deprecated names. */ + SPVC_MSL_VERTEX_FORMAT_OTHER = SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER, + SPVC_MSL_VERTEX_FORMAT_UINT8 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8, + SPVC_MSL_VERTEX_FORMAT_UINT16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16, + SPVC_MSL_SHADER_INPUT_FORMAT_OTHER = SPVC_MSL_SHADER_VARIABLE_FORMAT_OTHER, + SPVC_MSL_SHADER_INPUT_FORMAT_UINT8 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT8, + SPVC_MSL_SHADER_INPUT_FORMAT_UINT16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_UINT16, + SPVC_MSL_SHADER_INPUT_FORMAT_ANY16 = SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY16, + SPVC_MSL_SHADER_INPUT_FORMAT_ANY32 = SPVC_MSL_SHADER_VARIABLE_FORMAT_ANY32, + + SPVC_MSL_SHADER_INPUT_FORMAT_INT_MAX = 0x7fffffff + } spvc_msl_shader_variable_format, + spvc_msl_shader_input_format, spvc_msl_vertex_format; + + /* Maps to C++ API. Deprecated; use spvc_msl_shader_interface_var. */ + typedef struct spvc_msl_vertex_attribute + { + unsigned location; + + /* Obsolete, do not use. Only lingers on for ABI compatibility. */ + unsigned msl_buffer; + /* Obsolete, do not use. Only lingers on for ABI compatibility. */ + unsigned msl_offset; + /* Obsolete, do not use. Only lingers on for ABI compatibility. */ + unsigned msl_stride; + /* Obsolete, do not use. Only lingers on for ABI compatibility. */ + spvc_bool per_instance; + + spvc_msl_vertex_format format; + SpvBuiltIn builtin; + } spvc_msl_vertex_attribute; -/* + /* * Initializes the vertex attribute struct. */ -SPVC_PUBLIC_API void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr); + SPVC_PUBLIC_API void spvc_msl_vertex_attribute_init(spvc_msl_vertex_attribute *attr); -/* Maps to C++ API. Deprecated; use spvc_msl_shader_interface_var_2. */ -typedef struct spvc_msl_shader_interface_var -{ - unsigned location; - spvc_msl_vertex_format format; - SpvBuiltIn builtin; - unsigned vecsize; -} spvc_msl_shader_interface_var, spvc_msl_shader_input; + /* Maps to C++ API. Deprecated; use spvc_msl_shader_interface_var_2. */ + typedef struct spvc_msl_shader_interface_var + { + unsigned location; + spvc_msl_vertex_format format; + SpvBuiltIn builtin; + unsigned vecsize; + } spvc_msl_shader_interface_var, spvc_msl_shader_input; -/* + /* * Initializes the shader input struct. * Deprecated. Use spvc_msl_shader_interface_var_init_2(). */ -SPVC_PUBLIC_API void spvc_msl_shader_interface_var_init(spvc_msl_shader_interface_var *var); -/* + SPVC_PUBLIC_API void spvc_msl_shader_interface_var_init(spvc_msl_shader_interface_var *var); + /* * Deprecated. Use spvc_msl_shader_interface_var_init_2(). */ -SPVC_PUBLIC_API void spvc_msl_shader_input_init(spvc_msl_shader_input *input); + SPVC_PUBLIC_API void spvc_msl_shader_input_init(spvc_msl_shader_input *input); + + /* Maps to C++ API. */ + typedef enum spvc_msl_shader_variable_rate + { + SPVC_MSL_SHADER_VARIABLE_RATE_PER_VERTEX = 0, + SPVC_MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE = 1, + SPVC_MSL_SHADER_VARIABLE_RATE_PER_PATCH = 2, + + SPVC_MSL_SHADER_VARIABLE_RATE_INT_MAX = 0x7fffffff, + } spvc_msl_shader_variable_rate; + + /* Maps to C++ API. */ + typedef struct spvc_msl_shader_interface_var_2 + { + unsigned location; + spvc_msl_shader_variable_format format; + SpvBuiltIn builtin; + unsigned vecsize; + spvc_msl_shader_variable_rate rate; + } spvc_msl_shader_interface_var_2; -/* Maps to C++ API. */ -typedef enum spvc_msl_shader_variable_rate -{ - SPVC_MSL_SHADER_VARIABLE_RATE_PER_VERTEX = 0, - SPVC_MSL_SHADER_VARIABLE_RATE_PER_PRIMITIVE = 1, - SPVC_MSL_SHADER_VARIABLE_RATE_PER_PATCH = 2, - - SPVC_MSL_SHADER_VARIABLE_RATE_INT_MAX = 0x7fffffff, -} spvc_msl_shader_variable_rate; - -/* Maps to C++ API. */ -typedef struct spvc_msl_shader_interface_var_2 -{ - unsigned location; - spvc_msl_shader_variable_format format; - SpvBuiltIn builtin; - unsigned vecsize; - spvc_msl_shader_variable_rate rate; -} spvc_msl_shader_interface_var_2; - -/* + /* * Initializes the shader interface variable struct. */ -SPVC_PUBLIC_API void spvc_msl_shader_interface_var_init_2(spvc_msl_shader_interface_var_2 *var); + SPVC_PUBLIC_API void spvc_msl_shader_interface_var_init_2(spvc_msl_shader_interface_var_2 *var); -/* Maps to C++ API. + /* Maps to C++ API. * Deprecated. Use spvc_msl_resource_binding_2. */ -typedef struct spvc_msl_resource_binding -{ - SpvExecutionModel stage; - unsigned desc_set; - unsigned binding; - unsigned msl_buffer; - unsigned msl_texture; - unsigned msl_sampler; -} spvc_msl_resource_binding; - -typedef struct spvc_msl_resource_binding_2 -{ - SpvExecutionModel stage; - unsigned desc_set; - unsigned binding; - unsigned count; - unsigned msl_buffer; - unsigned msl_texture; - unsigned msl_sampler; -} spvc_msl_resource_binding_2; + typedef struct spvc_msl_resource_binding + { + SpvExecutionModel stage; + unsigned desc_set; + unsigned binding; + unsigned msl_buffer; + unsigned msl_texture; + unsigned msl_sampler; + } spvc_msl_resource_binding; + + typedef struct spvc_msl_resource_binding_2 + { + SpvExecutionModel stage; + unsigned desc_set; + unsigned binding; + unsigned count; + unsigned msl_buffer; + unsigned msl_texture; + unsigned msl_sampler; + } spvc_msl_resource_binding_2; -/* + /* * Initializes the resource binding struct. * The defaults are non-zero. * Deprecated: Use spvc_msl_resource_binding_init_2. */ -SPVC_PUBLIC_API void spvc_msl_resource_binding_init(spvc_msl_resource_binding *binding); -SPVC_PUBLIC_API void spvc_msl_resource_binding_init_2(spvc_msl_resource_binding_2 *binding); + SPVC_PUBLIC_API void spvc_msl_resource_binding_init(spvc_msl_resource_binding *binding); + SPVC_PUBLIC_API void spvc_msl_resource_binding_init_2(spvc_msl_resource_binding_2 *binding); #define SPVC_MSL_PUSH_CONSTANT_DESC_SET (~(0u)) #define SPVC_MSL_PUSH_CONSTANT_BINDING (0) @@ -422,707 +424,735 @@ SPVC_PUBLIC_API void spvc_msl_resource_binding_init_2(spvc_msl_resource_binding_ /* Obsolete. Sticks around for backwards compatibility. */ #define SPVC_MSL_AUX_BUFFER_STRUCT_VERSION 1 -/* Runtime check for incompatibility. Obsolete. */ -SPVC_PUBLIC_API unsigned spvc_msl_get_aux_buffer_struct_version(void); - -/* Maps to C++ API. */ -typedef enum spvc_msl_sampler_coord -{ - SPVC_MSL_SAMPLER_COORD_NORMALIZED = 0, - SPVC_MSL_SAMPLER_COORD_PIXEL = 1, - SPVC_MSL_SAMPLER_INT_MAX = 0x7fffffff -} spvc_msl_sampler_coord; - -/* Maps to C++ API. */ -typedef enum spvc_msl_sampler_filter -{ - SPVC_MSL_SAMPLER_FILTER_NEAREST = 0, - SPVC_MSL_SAMPLER_FILTER_LINEAR = 1, - SPVC_MSL_SAMPLER_FILTER_INT_MAX = 0x7fffffff -} spvc_msl_sampler_filter; + /* Runtime check for incompatibility. Obsolete. */ + SPVC_PUBLIC_API unsigned spvc_msl_get_aux_buffer_struct_version(void); + + /* Maps to C++ API. */ + typedef enum spvc_msl_sampler_coord + { + SPVC_MSL_SAMPLER_COORD_NORMALIZED = 0, + SPVC_MSL_SAMPLER_COORD_PIXEL = 1, + SPVC_MSL_SAMPLER_INT_MAX = 0x7fffffff + } spvc_msl_sampler_coord; + + /* Maps to C++ API. */ + typedef enum spvc_msl_sampler_filter + { + SPVC_MSL_SAMPLER_FILTER_NEAREST = 0, + SPVC_MSL_SAMPLER_FILTER_LINEAR = 1, + SPVC_MSL_SAMPLER_FILTER_INT_MAX = 0x7fffffff + } spvc_msl_sampler_filter; + + /* Maps to C++ API. */ + typedef enum spvc_msl_sampler_mip_filter + { + SPVC_MSL_SAMPLER_MIP_FILTER_NONE = 0, + SPVC_MSL_SAMPLER_MIP_FILTER_NEAREST = 1, + SPVC_MSL_SAMPLER_MIP_FILTER_LINEAR = 2, + SPVC_MSL_SAMPLER_MIP_FILTER_INT_MAX = 0x7fffffff + } spvc_msl_sampler_mip_filter; + + /* Maps to C++ API. */ + typedef enum spvc_msl_sampler_address + { + SPVC_MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO = 0, + SPVC_MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE = 1, + SPVC_MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER = 2, + SPVC_MSL_SAMPLER_ADDRESS_REPEAT = 3, + SPVC_MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT = 4, + SPVC_MSL_SAMPLER_ADDRESS_INT_MAX = 0x7fffffff + } spvc_msl_sampler_address; + + /* Maps to C++ API. */ + typedef enum spvc_msl_sampler_compare_func + { + SPVC_MSL_SAMPLER_COMPARE_FUNC_NEVER = 0, + SPVC_MSL_SAMPLER_COMPARE_FUNC_LESS = 1, + SPVC_MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL = 2, + SPVC_MSL_SAMPLER_COMPARE_FUNC_GREATER = 3, + SPVC_MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL = 4, + SPVC_MSL_SAMPLER_COMPARE_FUNC_EQUAL = 5, + SPVC_MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL = 6, + SPVC_MSL_SAMPLER_COMPARE_FUNC_ALWAYS = 7, + SPVC_MSL_SAMPLER_COMPARE_FUNC_INT_MAX = 0x7fffffff + } spvc_msl_sampler_compare_func; + + /* Maps to C++ API. */ + typedef enum spvc_msl_sampler_border_color + { + SPVC_MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK = 0, + SPVC_MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK = 1, + SPVC_MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE = 2, + SPVC_MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff + } spvc_msl_sampler_border_color; + + /* Maps to C++ API. */ + typedef enum spvc_msl_format_resolution + { + SPVC_MSL_FORMAT_RESOLUTION_444 = 0, + SPVC_MSL_FORMAT_RESOLUTION_422, + SPVC_MSL_FORMAT_RESOLUTION_420, + SPVC_MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff + } spvc_msl_format_resolution; + + /* Maps to C++ API. */ + typedef enum spvc_msl_chroma_location + { + SPVC_MSL_CHROMA_LOCATION_COSITED_EVEN = 0, + SPVC_MSL_CHROMA_LOCATION_MIDPOINT, + SPVC_MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff + } spvc_msl_chroma_location; + + /* Maps to C++ API. */ + typedef enum spvc_msl_component_swizzle + { + SPVC_MSL_COMPONENT_SWIZZLE_IDENTITY = 0, + SPVC_MSL_COMPONENT_SWIZZLE_ZERO, + SPVC_MSL_COMPONENT_SWIZZLE_ONE, + SPVC_MSL_COMPONENT_SWIZZLE_R, + SPVC_MSL_COMPONENT_SWIZZLE_G, + SPVC_MSL_COMPONENT_SWIZZLE_B, + SPVC_MSL_COMPONENT_SWIZZLE_A, + SPVC_MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff + } spvc_msl_component_swizzle; + + /* Maps to C++ API. */ + typedef enum spvc_msl_sampler_ycbcr_model_conversion + { + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff + } spvc_msl_sampler_ycbcr_model_conversion; + + /* Maps to C+ API. */ + typedef enum spvc_msl_sampler_ycbcr_range + { + SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, + SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, + SPVC_MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff + } spvc_msl_sampler_ycbcr_range; + + /* Maps to C++ API. */ + typedef struct spvc_msl_constexpr_sampler + { + spvc_msl_sampler_coord coord; + spvc_msl_sampler_filter min_filter; + spvc_msl_sampler_filter mag_filter; + spvc_msl_sampler_mip_filter mip_filter; + spvc_msl_sampler_address s_address; + spvc_msl_sampler_address t_address; + spvc_msl_sampler_address r_address; + spvc_msl_sampler_compare_func compare_func; + spvc_msl_sampler_border_color border_color; + float lod_clamp_min; + float lod_clamp_max; + int max_anisotropy; + + spvc_bool compare_enable; + spvc_bool lod_clamp_enable; + spvc_bool anisotropy_enable; + } spvc_msl_constexpr_sampler; -/* Maps to C++ API. */ -typedef enum spvc_msl_sampler_mip_filter -{ - SPVC_MSL_SAMPLER_MIP_FILTER_NONE = 0, - SPVC_MSL_SAMPLER_MIP_FILTER_NEAREST = 1, - SPVC_MSL_SAMPLER_MIP_FILTER_LINEAR = 2, - SPVC_MSL_SAMPLER_MIP_FILTER_INT_MAX = 0x7fffffff -} spvc_msl_sampler_mip_filter; - -/* Maps to C++ API. */ -typedef enum spvc_msl_sampler_address -{ - SPVC_MSL_SAMPLER_ADDRESS_CLAMP_TO_ZERO = 0, - SPVC_MSL_SAMPLER_ADDRESS_CLAMP_TO_EDGE = 1, - SPVC_MSL_SAMPLER_ADDRESS_CLAMP_TO_BORDER = 2, - SPVC_MSL_SAMPLER_ADDRESS_REPEAT = 3, - SPVC_MSL_SAMPLER_ADDRESS_MIRRORED_REPEAT = 4, - SPVC_MSL_SAMPLER_ADDRESS_INT_MAX = 0x7fffffff -} spvc_msl_sampler_address; - -/* Maps to C++ API. */ -typedef enum spvc_msl_sampler_compare_func -{ - SPVC_MSL_SAMPLER_COMPARE_FUNC_NEVER = 0, - SPVC_MSL_SAMPLER_COMPARE_FUNC_LESS = 1, - SPVC_MSL_SAMPLER_COMPARE_FUNC_LESS_EQUAL = 2, - SPVC_MSL_SAMPLER_COMPARE_FUNC_GREATER = 3, - SPVC_MSL_SAMPLER_COMPARE_FUNC_GREATER_EQUAL = 4, - SPVC_MSL_SAMPLER_COMPARE_FUNC_EQUAL = 5, - SPVC_MSL_SAMPLER_COMPARE_FUNC_NOT_EQUAL = 6, - SPVC_MSL_SAMPLER_COMPARE_FUNC_ALWAYS = 7, - SPVC_MSL_SAMPLER_COMPARE_FUNC_INT_MAX = 0x7fffffff -} spvc_msl_sampler_compare_func; - -/* Maps to C++ API. */ -typedef enum spvc_msl_sampler_border_color -{ - SPVC_MSL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK = 0, - SPVC_MSL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK = 1, - SPVC_MSL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE = 2, - SPVC_MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff -} spvc_msl_sampler_border_color; - -/* Maps to C++ API. */ -typedef enum spvc_msl_format_resolution -{ - SPVC_MSL_FORMAT_RESOLUTION_444 = 0, - SPVC_MSL_FORMAT_RESOLUTION_422, - SPVC_MSL_FORMAT_RESOLUTION_420, - SPVC_MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff -} spvc_msl_format_resolution; - -/* Maps to C++ API. */ -typedef enum spvc_msl_chroma_location -{ - SPVC_MSL_CHROMA_LOCATION_COSITED_EVEN = 0, - SPVC_MSL_CHROMA_LOCATION_MIDPOINT, - SPVC_MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff -} spvc_msl_chroma_location; - -/* Maps to C++ API. */ -typedef enum spvc_msl_component_swizzle -{ - SPVC_MSL_COMPONENT_SWIZZLE_IDENTITY = 0, - SPVC_MSL_COMPONENT_SWIZZLE_ZERO, - SPVC_MSL_COMPONENT_SWIZZLE_ONE, - SPVC_MSL_COMPONENT_SWIZZLE_R, - SPVC_MSL_COMPONENT_SWIZZLE_G, - SPVC_MSL_COMPONENT_SWIZZLE_B, - SPVC_MSL_COMPONENT_SWIZZLE_A, - SPVC_MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff -} spvc_msl_component_swizzle; - -/* Maps to C++ API. */ -typedef enum spvc_msl_sampler_ycbcr_model_conversion -{ - SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, - SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, - SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, - SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, - SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, - SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff -} spvc_msl_sampler_ycbcr_model_conversion; - -/* Maps to C+ API. */ -typedef enum spvc_msl_sampler_ycbcr_range -{ - SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, - SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, - SPVC_MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff -} spvc_msl_sampler_ycbcr_range; - -/* Maps to C++ API. */ -typedef struct spvc_msl_constexpr_sampler -{ - spvc_msl_sampler_coord coord; - spvc_msl_sampler_filter min_filter; - spvc_msl_sampler_filter mag_filter; - spvc_msl_sampler_mip_filter mip_filter; - spvc_msl_sampler_address s_address; - spvc_msl_sampler_address t_address; - spvc_msl_sampler_address r_address; - spvc_msl_sampler_compare_func compare_func; - spvc_msl_sampler_border_color border_color; - float lod_clamp_min; - float lod_clamp_max; - int max_anisotropy; - - spvc_bool compare_enable; - spvc_bool lod_clamp_enable; - spvc_bool anisotropy_enable; -} spvc_msl_constexpr_sampler; - -/* + /* * Initializes the constexpr sampler struct. * The defaults are non-zero. */ -SPVC_PUBLIC_API void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler); + SPVC_PUBLIC_API void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler); + + /* Maps to the sampler Y'CbCr conversion-related portions of MSLConstexprSampler. See C++ API for defaults and details. */ + typedef struct spvc_msl_sampler_ycbcr_conversion + { + unsigned planes; + spvc_msl_format_resolution resolution; + spvc_msl_sampler_filter chroma_filter; + spvc_msl_chroma_location x_chroma_offset; + spvc_msl_chroma_location y_chroma_offset; + spvc_msl_component_swizzle swizzle[4]; + spvc_msl_sampler_ycbcr_model_conversion ycbcr_model; + spvc_msl_sampler_ycbcr_range ycbcr_range; + unsigned bpc; + } spvc_msl_sampler_ycbcr_conversion; -/* Maps to the sampler Y'CbCr conversion-related portions of MSLConstexprSampler. See C++ API for defaults and details. */ -typedef struct spvc_msl_sampler_ycbcr_conversion -{ - unsigned planes; - spvc_msl_format_resolution resolution; - spvc_msl_sampler_filter chroma_filter; - spvc_msl_chroma_location x_chroma_offset; - spvc_msl_chroma_location y_chroma_offset; - spvc_msl_component_swizzle swizzle[4]; - spvc_msl_sampler_ycbcr_model_conversion ycbcr_model; - spvc_msl_sampler_ycbcr_range ycbcr_range; - unsigned bpc; -} spvc_msl_sampler_ycbcr_conversion; - -/* + /* * Initializes the constexpr sampler struct. * The defaults are non-zero. */ -SPVC_PUBLIC_API void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv); - -/* Maps to C++ API. */ -typedef enum spvc_hlsl_binding_flag_bits -{ - SPVC_HLSL_BINDING_AUTO_NONE_BIT = 0, - SPVC_HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT = 1 << 0, - SPVC_HLSL_BINDING_AUTO_CBV_BIT = 1 << 1, - SPVC_HLSL_BINDING_AUTO_SRV_BIT = 1 << 2, - SPVC_HLSL_BINDING_AUTO_UAV_BIT = 1 << 3, - SPVC_HLSL_BINDING_AUTO_SAMPLER_BIT = 1 << 4, - SPVC_HLSL_BINDING_AUTO_ALL = 0x7fffffff -} spvc_hlsl_binding_flag_bits; -typedef unsigned spvc_hlsl_binding_flags; + SPVC_PUBLIC_API void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv); + + /* Maps to C++ API. */ + typedef enum spvc_hlsl_binding_flag_bits + { + SPVC_HLSL_BINDING_AUTO_NONE_BIT = 0, + SPVC_HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT = 1 << 0, + SPVC_HLSL_BINDING_AUTO_CBV_BIT = 1 << 1, + SPVC_HLSL_BINDING_AUTO_SRV_BIT = 1 << 2, + SPVC_HLSL_BINDING_AUTO_UAV_BIT = 1 << 3, + SPVC_HLSL_BINDING_AUTO_SAMPLER_BIT = 1 << 4, + SPVC_HLSL_BINDING_AUTO_ALL = 0x7fffffff + } spvc_hlsl_binding_flag_bits; + typedef unsigned spvc_hlsl_binding_flags; #define SPVC_HLSL_PUSH_CONSTANT_DESC_SET (~(0u)) #define SPVC_HLSL_PUSH_CONSTANT_BINDING (0) -/* Maps to C++ API. */ -typedef struct spvc_hlsl_resource_binding_mapping -{ - unsigned register_space; - unsigned register_binding; -} spvc_hlsl_resource_binding_mapping; + /* Maps to C++ API. */ + typedef struct spvc_hlsl_resource_binding_mapping + { + unsigned register_space; + unsigned register_binding; + } spvc_hlsl_resource_binding_mapping; -typedef struct spvc_hlsl_resource_binding -{ - SpvExecutionModel stage; - unsigned desc_set; - unsigned binding; + typedef struct spvc_hlsl_resource_binding + { + SpvExecutionModel stage; + unsigned desc_set; + unsigned binding; - spvc_hlsl_resource_binding_mapping cbv, uav, srv, sampler; -} spvc_hlsl_resource_binding; + spvc_hlsl_resource_binding_mapping cbv, uav, srv, sampler; + } spvc_hlsl_resource_binding; -/* + /* * Initializes the resource binding struct. * The defaults are non-zero. */ -SPVC_PUBLIC_API void spvc_hlsl_resource_binding_init(spvc_hlsl_resource_binding *binding); - -/* Maps to the various spirv_cross::Compiler*::Option structures. See C++ API for defaults and details. */ -typedef enum spvc_compiler_option -{ - SPVC_COMPILER_OPTION_UNKNOWN = 0, + SPVC_PUBLIC_API void spvc_hlsl_resource_binding_init(spvc_hlsl_resource_binding *binding); + + /* Maps to the various spirv_cross::Compiler*::Option structures. See C++ API for defaults and details. */ + typedef enum spvc_compiler_option + { + SPVC_COMPILER_OPTION_UNKNOWN = 0, - SPVC_COMPILER_OPTION_FORCE_TEMPORARY = 1 | SPVC_COMPILER_OPTION_COMMON_BIT, - SPVC_COMPILER_OPTION_FLATTEN_MULTIDIMENSIONAL_ARRAYS = 2 | SPVC_COMPILER_OPTION_COMMON_BIT, - SPVC_COMPILER_OPTION_FIXUP_DEPTH_CONVENTION = 3 | SPVC_COMPILER_OPTION_COMMON_BIT, - SPVC_COMPILER_OPTION_FLIP_VERTEX_Y = 4 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_FORCE_TEMPORARY = 1 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_FLATTEN_MULTIDIMENSIONAL_ARRAYS = 2 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_FIXUP_DEPTH_CONVENTION = 3 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_FLIP_VERTEX_Y = 4 | SPVC_COMPILER_OPTION_COMMON_BIT, + + SPVC_COMPILER_OPTION_GLSL_SUPPORT_NONZERO_BASE_INSTANCE = 5 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_SEPARATE_SHADER_OBJECTS = 6 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_ENABLE_420PACK_EXTENSION = 7 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_VERSION = 8 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_ES = 9 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_VULKAN_SEMANTICS = 10 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_ES_DEFAULT_FLOAT_PRECISION_HIGHP = 11 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_GLSL_ES_DEFAULT_INT_PRECISION_HIGHP = 12 | SPVC_COMPILER_OPTION_GLSL_BIT, - SPVC_COMPILER_OPTION_GLSL_SUPPORT_NONZERO_BASE_INSTANCE = 5 | SPVC_COMPILER_OPTION_GLSL_BIT, - SPVC_COMPILER_OPTION_GLSL_SEPARATE_SHADER_OBJECTS = 6 | SPVC_COMPILER_OPTION_GLSL_BIT, - SPVC_COMPILER_OPTION_GLSL_ENABLE_420PACK_EXTENSION = 7 | SPVC_COMPILER_OPTION_GLSL_BIT, - SPVC_COMPILER_OPTION_GLSL_VERSION = 8 | SPVC_COMPILER_OPTION_GLSL_BIT, - SPVC_COMPILER_OPTION_GLSL_ES = 9 | SPVC_COMPILER_OPTION_GLSL_BIT, - SPVC_COMPILER_OPTION_GLSL_VULKAN_SEMANTICS = 10 | SPVC_COMPILER_OPTION_GLSL_BIT, - SPVC_COMPILER_OPTION_GLSL_ES_DEFAULT_FLOAT_PRECISION_HIGHP = 11 | SPVC_COMPILER_OPTION_GLSL_BIT, - SPVC_COMPILER_OPTION_GLSL_ES_DEFAULT_INT_PRECISION_HIGHP = 12 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL = 13 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_HLSL_POINT_SIZE_COMPAT = 14 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_HLSL_POINT_COORD_COMPAT = 15 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_HLSL_SUPPORT_NONZERO_BASE_VERTEX_BASE_INSTANCE = 16 | SPVC_COMPILER_OPTION_HLSL_BIT, - SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL = 13 | SPVC_COMPILER_OPTION_HLSL_BIT, - SPVC_COMPILER_OPTION_HLSL_POINT_SIZE_COMPAT = 14 | SPVC_COMPILER_OPTION_HLSL_BIT, - SPVC_COMPILER_OPTION_HLSL_POINT_COORD_COMPAT = 15 | SPVC_COMPILER_OPTION_HLSL_BIT, - SPVC_COMPILER_OPTION_HLSL_SUPPORT_NONZERO_BASE_VERTEX_BASE_INSTANCE = 16 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_MSL_VERSION = 17 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_TEXEL_BUFFER_TEXTURE_WIDTH = 18 | SPVC_COMPILER_OPTION_MSL_BIT, + + /* Obsolete, use SWIZZLE_BUFFER_INDEX instead. */ + SPVC_COMPILER_OPTION_MSL_AUX_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SWIZZLE_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_VERSION = 17 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_TEXEL_BUFFER_TEXTURE_WIDTH = 18 | SPVC_COMPILER_OPTION_MSL_BIT, - - /* Obsolete, use SWIZZLE_BUFFER_INDEX instead. */ - SPVC_COMPILER_OPTION_MSL_AUX_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_SWIZZLE_BUFFER_INDEX = 19 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX = 20 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_OUTPUT_BUFFER_INDEX = 21 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_OUTPUT_BUFFER_INDEX = 22 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_TESS_FACTOR_OUTPUT_BUFFER_INDEX = 23 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_WORKGROUP_INDEX = 24 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_POINT_SIZE_BUILTIN = 25 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DISABLE_RASTERIZATION = 26 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_CAPTURE_OUTPUT_TO_BUFFER = 27 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SWIZZLE_TEXTURE_SAMPLES = 28 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_PAD_FRAGMENT_OUTPUT_COMPONENTS = 29 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_TESS_DOMAIN_ORIGIN_LOWER_LEFT = 30 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_PLATFORM = 31 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS = 32 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_INDIRECT_PARAMS_BUFFER_INDEX = 20 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_SHADER_OUTPUT_BUFFER_INDEX = 21 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_OUTPUT_BUFFER_INDEX = 22 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_SHADER_TESS_FACTOR_OUTPUT_BUFFER_INDEX = 23 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_WORKGROUP_INDEX = 24 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_ENABLE_POINT_SIZE_BUILTIN = 25 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_DISABLE_RASTERIZATION = 26 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_CAPTURE_OUTPUT_TO_BUFFER = 27 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_SWIZZLE_TEXTURE_SAMPLES = 28 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_PAD_FRAGMENT_OUTPUT_COMPONENTS = 29 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_TESS_DOMAIN_ORIGIN_LOWER_LEFT = 30 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_PLATFORM = 31 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS = 32 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_GLSL_EMIT_PUSH_CONSTANT_AS_UNIFORM_BUFFER = 33 | SPVC_COMPILER_OPTION_GLSL_BIT, - SPVC_COMPILER_OPTION_GLSL_EMIT_PUSH_CONSTANT_AS_UNIFORM_BUFFER = 33 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_MSL_TEXTURE_BUFFER_NATIVE = 34 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_TEXTURE_BUFFER_NATIVE = 34 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_GLSL_EMIT_UNIFORM_BUFFER_AS_PLAIN_UNIFORMS = 35 | SPVC_COMPILER_OPTION_GLSL_BIT, - SPVC_COMPILER_OPTION_GLSL_EMIT_UNIFORM_BUFFER_AS_PLAIN_UNIFORMS = 35 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_MSL_BUFFER_SIZE_BUFFER_INDEX = 36 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_BUFFER_SIZE_BUFFER_INDEX = 36 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES = 37 | SPVC_COMPILER_OPTION_COMMON_BIT, - SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES = 37 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_MSL_MULTIVIEW = 38 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX = 39 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX = 40 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX = 41 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE = 42 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D = 44 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO = 45 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_MULTIVIEW = 38 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX = 39 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX = 40 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX = 41 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE = 42 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_TEXTURE_1D_AS_2D = 44 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_ENABLE_BASE_INDEX_ZERO = 45 | SPVC_COMPILER_OPTION_MSL_BIT, + /* Obsolete. Use MSL_FRAMEBUFFER_FETCH_SUBPASS instead. */ + SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT, - /* Obsolete. Use MSL_FRAMEBUFFER_FETCH_SUBPASS instead. */ - SPVC_COMPILER_OPTION_MSL_IOS_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_FRAMEBUFFER_FETCH_SUBPASS = 46 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH = 47 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY = 48 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING = 49 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FORCE_ACTIVE_ARGUMENT_BUFFER_RESOURCES = 50 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FORCE_NATIVE_ARRAYS = 51 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_INVARIANT_FP_MATH = 47 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_EMULATE_CUBEMAP_ARRAY = 48 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_ENABLE_DECORATION_BINDING = 49 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_FORCE_ACTIVE_ARGUMENT_BUFFER_RESOURCES = 50 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_FORCE_NATIVE_ARRAYS = 51 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_ENABLE_STORAGE_IMAGE_QUALIFIER_DEDUCTION = 52 | SPVC_COMPILER_OPTION_COMMON_BIT, - SPVC_COMPILER_OPTION_ENABLE_STORAGE_IMAGE_QUALIFIER_DEDUCTION = 52 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_HLSL_FORCE_STORAGE_BUFFER_AS_UAV = 53 | SPVC_COMPILER_OPTION_HLSL_BIT, - SPVC_COMPILER_OPTION_HLSL_FORCE_STORAGE_BUFFER_AS_UAV = 53 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_FORCE_ZERO_INITIALIZED_VARIABLES = 54 | SPVC_COMPILER_OPTION_COMMON_BIT, - SPVC_COMPILER_OPTION_FORCE_ZERO_INITIALIZED_VARIABLES = 54 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_HLSL_NONWRITABLE_UAV_TEXTURE_AS_SRV = 55 | SPVC_COMPILER_OPTION_HLSL_BIT, - SPVC_COMPILER_OPTION_HLSL_NONWRITABLE_UAV_TEXTURE_AS_SRV = 55 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_OUTPUT_MASK = 56 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_DEPTH_BUILTIN = 57 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_STENCIL_REF_BUILTIN = 58 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_CLIP_DISTANCE_USER_VARYING = 59 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_OUTPUT_MASK = 56 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_DEPTH_BUILTIN = 57 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_ENABLE_FRAG_STENCIL_REF_BUILTIN = 58 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_ENABLE_CLIP_DISTANCE_USER_VARYING = 59 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_HLSL_ENABLE_16BIT_TYPES = 60 | SPVC_COMPILER_OPTION_HLSL_BIT, - SPVC_COMPILER_OPTION_HLSL_ENABLE_16BIT_TYPES = 60 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_MSL_MULTI_PATCH_WORKGROUP = 61 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_BUFFER_INDEX = 62 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_INDEX_BUFFER_INDEX = 63 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VERTEX_FOR_TESSELLATION = 64 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VERTEX_INDEX_TYPE = 65 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_MULTI_PATCH_WORKGROUP = 61 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_SHADER_INPUT_BUFFER_INDEX = 62 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_SHADER_INDEX_BUFFER_INDEX = 63 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_VERTEX_FOR_TESSELLATION = 64 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_VERTEX_INDEX_TYPE = 65 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_GLSL_FORCE_FLATTENED_IO_BLOCKS = 66 | SPVC_COMPILER_OPTION_GLSL_BIT, - SPVC_COMPILER_OPTION_GLSL_FORCE_FLATTENED_IO_BLOCKS = 66 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_MSL_MULTIVIEW_LAYERED_RENDERING = 67 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ARRAYED_SUBPASS_INPUT = 68 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_R32UI_LINEAR_TEXTURE_ALIGNMENT = 69 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_R32UI_ALIGNMENT_CONSTANT_ID = 70 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_MULTIVIEW_LAYERED_RENDERING = 67 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_ARRAYED_SUBPASS_INPUT = 68 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_R32UI_LINEAR_TEXTURE_ALIGNMENT = 69 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_R32UI_ALIGNMENT_CONSTANT_ID = 70 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_HLSL_FLATTEN_MATRIX_VERTEX_INPUT_SEMANTICS = 71 | SPVC_COMPILER_OPTION_HLSL_BIT, - SPVC_COMPILER_OPTION_HLSL_FLATTEN_MATRIX_VERTEX_INPUT_SEMANTICS = 71 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_MSL_IOS_USE_SIMDGROUP_FUNCTIONS = 72 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_EMULATE_SUBGROUPS = 73 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FIXED_SUBGROUP_SIZE = 74 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FORCE_SAMPLE_RATE_SHADING = 75 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_IOS_SUPPORT_BASE_VERTEX_INSTANCE = 76 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_IOS_USE_SIMDGROUP_FUNCTIONS = 72 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_EMULATE_SUBGROUPS = 73 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_FIXED_SUBGROUP_SIZE = 74 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_FORCE_SAMPLE_RATE_SHADING = 75 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_IOS_SUPPORT_BASE_VERTEX_INSTANCE = 76 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_GLSL_OVR_MULTIVIEW_VIEW_COUNT = 77 | SPVC_COMPILER_OPTION_GLSL_BIT, - SPVC_COMPILER_OPTION_GLSL_OVR_MULTIVIEW_VIEW_COUNT = 77 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_RELAX_NAN_CHECKS = 78 | SPVC_COMPILER_OPTION_COMMON_BIT, - SPVC_COMPILER_OPTION_RELAX_NAN_CHECKS = 78 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_MSL_RAW_BUFFER_TESE_INPUT = 79 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_INPUT_BUFFER_INDEX = 80 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_MANUAL_HELPER_INVOCATION_UPDATES = 81 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_CHECK_DISCARDED_FRAG_STORES = 82 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_RAW_BUFFER_TESE_INPUT = 79 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_SHADER_PATCH_INPUT_BUFFER_INDEX = 80 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_MANUAL_HELPER_INVOCATION_UPDATES = 81 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_CHECK_DISCARDED_FRAG_STORES = 82 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_GLSL_ENABLE_ROW_MAJOR_LOAD_WORKAROUND = 83 | SPVC_COMPILER_OPTION_GLSL_BIT, - SPVC_COMPILER_OPTION_GLSL_ENABLE_ROW_MAJOR_LOAD_WORKAROUND = 83 | SPVC_COMPILER_OPTION_GLSL_BIT, + SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS_TIER = 84 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_SAMPLE_DREF_LOD_ARRAY_AS_GRAD = 85 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_READWRITE_TEXTURE_FENCES = 86 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_REPLACE_RECURSIVE_INPUTS = 87 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_AGX_MANUAL_CUBE_GRAD_FIXUP = 88 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_FORCE_FRAGMENT_WITH_SIDE_EFFECTS_EXECUTION = 89 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS_TIER = 84 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_SAMPLE_DREF_LOD_ARRAY_AS_GRAD = 85 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_READWRITE_TEXTURE_FENCES = 86 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_REPLACE_RECURSIVE_INPUTS = 87 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_AGX_MANUAL_CUBE_GRAD_FIXUP = 88 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_FORCE_FRAGMENT_WITH_SIDE_EFFECTS_EXECUTION = 89 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_HLSL_USE_ENTRY_POINT_NAME = 90 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_HLSL_PRESERVE_STRUCTURED_BUFFERS = 91 | SPVC_COMPILER_OPTION_HLSL_BIT, - SPVC_COMPILER_OPTION_HLSL_USE_ENTRY_POINT_NAME = 90 | SPVC_COMPILER_OPTION_HLSL_BIT, - SPVC_COMPILER_OPTION_HLSL_PRESERVE_STRUCTURED_BUFFERS = 91 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_MSL_AUTO_DISABLE_RASTERIZATION = 92 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_AUTO_DISABLE_RASTERIZATION = 92 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_ENABLE_POINT_SIZE_DEFAULT = 93 | SPVC_COMPILER_OPTION_MSL_BIT, - SPVC_COMPILER_OPTION_MSL_ENABLE_POINT_SIZE_DEFAULT = 93 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_HLSL_USER_SEMANTIC = 94 | SPVC_COMPILER_OPTION_HLSL_BIT, - SPVC_COMPILER_OPTION_HLSL_USER_SEMANTIC = 94 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff + } spvc_compiler_option; - SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff -} spvc_compiler_option; - -/* + /* * Context is the highest-level API construct. * The context owns all memory allocations made by its child object hierarchy, including various non-opaque structs and strings. * This means that the API user only has to care about one "destroy" call ever when using the C API. * All pointers handed out by the APIs are only valid as long as the context * is alive and spvc_context_release_allocations has not been called. */ -SPVC_PUBLIC_API spvc_result spvc_context_create(spvc_context *context); + SPVC_PUBLIC_API spvc_result spvc_context_create(spvc_context *context); -/* Frees all memory allocations and objects associated with the context and its child objects. */ -SPVC_PUBLIC_API void spvc_context_destroy(spvc_context context); + /* Frees all memory allocations and objects associated with the context and its child objects. */ + SPVC_PUBLIC_API void spvc_context_destroy(spvc_context context); -/* Frees all memory allocations and objects associated with the context and its child objects, but keeps the context alive. */ -SPVC_PUBLIC_API void spvc_context_release_allocations(spvc_context context); + /* Frees all memory allocations and objects associated with the context and its child objects, but keeps the context alive. */ + SPVC_PUBLIC_API void spvc_context_release_allocations(spvc_context context); -/* Get the string for the last error which was logged. */ -SPVC_PUBLIC_API const char *spvc_context_get_last_error_string(spvc_context context); + /* Get the string for the last error which was logged. */ + SPVC_PUBLIC_API const char *spvc_context_get_last_error_string(spvc_context context); -/* Get notified in a callback when an error triggers. Useful for debugging. */ -typedef void (*spvc_error_callback)(void *userdata, const char *error); -SPVC_PUBLIC_API void spvc_context_set_error_callback(spvc_context context, spvc_error_callback cb, void *userdata); + /* Get notified in a callback when an error triggers. Useful for debugging. */ + typedef void (*spvc_error_callback)(void *userdata, const char *error); + SPVC_PUBLIC_API void spvc_context_set_error_callback(spvc_context context, spvc_error_callback cb, void *userdata); -/* SPIR-V parsing interface. Maps to Parser which then creates a ParsedIR, and that IR is extracted into the handle. */ -SPVC_PUBLIC_API spvc_result spvc_context_parse_spirv(spvc_context context, const SpvId *spirv, size_t word_count, - spvc_parsed_ir *parsed_ir); + /* SPIR-V parsing interface. Maps to Parser which then creates a ParsedIR, and that IR is extracted into the handle. */ + SPVC_PUBLIC_API spvc_result spvc_context_parse_spirv(spvc_context context, const SpvId *spirv, size_t word_count, + spvc_parsed_ir *parsed_ir); -/* + /* * Create a compiler backend. Capture mode controls if we construct by copy or move semantics. * It is always recommended to use SPVC_CAPTURE_MODE_TAKE_OWNERSHIP if you only intend to cross-compile the IR once. */ -SPVC_PUBLIC_API spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend backend, - spvc_parsed_ir parsed_ir, spvc_capture_mode mode, - spvc_compiler *compiler); - -/* Maps directly to C++ API. */ -SPVC_PUBLIC_API unsigned spvc_compiler_get_current_id_bound(spvc_compiler compiler); - -/* Create compiler options, which will initialize defaults. */ -SPVC_PUBLIC_API spvc_result spvc_compiler_create_compiler_options(spvc_compiler compiler, - spvc_compiler_options *options); -/* Override options. Will return error if e.g. MSL options are used for the HLSL backend, etc. */ -SPVC_PUBLIC_API spvc_result spvc_compiler_options_set_bool(spvc_compiler_options options, - spvc_compiler_option option, spvc_bool value); -SPVC_PUBLIC_API spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, - spvc_compiler_option option, unsigned value); -/* Set compiler options. */ -SPVC_PUBLIC_API spvc_result spvc_compiler_install_compiler_options(spvc_compiler compiler, - spvc_compiler_options options); - -/* Compile IR into a string. *source is owned by the context, and caller must not free it themselves. */ -SPVC_PUBLIC_API spvc_result spvc_compiler_compile(spvc_compiler compiler, const char **source); - -/* Maps to C++ API. */ -SPVC_PUBLIC_API spvc_result spvc_compiler_add_header_line(spvc_compiler compiler, const char *line); -SPVC_PUBLIC_API spvc_result spvc_compiler_require_extension(spvc_compiler compiler, const char *ext); -SPVC_PUBLIC_API size_t spvc_compiler_get_num_required_extensions(spvc_compiler compiler); -SPVC_PUBLIC_API const char *spvc_compiler_get_required_extension(spvc_compiler compiler, size_t index); -SPVC_PUBLIC_API spvc_result spvc_compiler_flatten_buffer_block(spvc_compiler compiler, spvc_variable_id id); - -SPVC_PUBLIC_API spvc_bool spvc_compiler_variable_is_depth_or_compare(spvc_compiler compiler, spvc_variable_id id); - -SPVC_PUBLIC_API spvc_result spvc_compiler_mask_stage_output_by_location(spvc_compiler compiler, - unsigned location, unsigned component); -SPVC_PUBLIC_API spvc_result spvc_compiler_mask_stage_output_by_builtin(spvc_compiler compiler, SpvBuiltIn builtin); + SPVC_PUBLIC_API spvc_result spvc_context_create_compiler(spvc_context context, spvc_backend backend, + spvc_parsed_ir parsed_ir, spvc_capture_mode mode, + spvc_compiler *compiler); + + /* Maps directly to C++ API. */ + SPVC_PUBLIC_API unsigned spvc_compiler_get_current_id_bound(spvc_compiler compiler); + + /* Create compiler options, which will initialize defaults. */ + SPVC_PUBLIC_API spvc_result spvc_compiler_create_compiler_options(spvc_compiler compiler, + spvc_compiler_options *options); + /* Override options. Will return error if e.g. MSL options are used for the HLSL backend, etc. */ + SPVC_PUBLIC_API spvc_result spvc_compiler_options_set_bool(spvc_compiler_options options, + spvc_compiler_option option, spvc_bool value); + SPVC_PUBLIC_API spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, + spvc_compiler_option option, unsigned value); + /* Set compiler options. */ + SPVC_PUBLIC_API spvc_result spvc_compiler_install_compiler_options(spvc_compiler compiler, + spvc_compiler_options options); + + /* Compile IR into a string. *source is owned by the context, and caller must not free it themselves. */ + SPVC_PUBLIC_API spvc_result spvc_compiler_compile(spvc_compiler compiler, const char **source); + + /* Maps to C++ API. */ + SPVC_PUBLIC_API spvc_result spvc_compiler_add_header_line(spvc_compiler compiler, const char *line); + SPVC_PUBLIC_API spvc_result spvc_compiler_require_extension(spvc_compiler compiler, const char *ext); + SPVC_PUBLIC_API size_t spvc_compiler_get_num_required_extensions(spvc_compiler compiler); + SPVC_PUBLIC_API const char *spvc_compiler_get_required_extension(spvc_compiler compiler, size_t index); + SPVC_PUBLIC_API spvc_result spvc_compiler_flatten_buffer_block(spvc_compiler compiler, spvc_variable_id id); + + SPVC_PUBLIC_API spvc_bool spvc_compiler_variable_is_depth_or_compare(spvc_compiler compiler, spvc_variable_id id); + + SPVC_PUBLIC_API spvc_result spvc_compiler_mask_stage_output_by_location(spvc_compiler compiler, unsigned location, + unsigned component); + SPVC_PUBLIC_API spvc_result spvc_compiler_mask_stage_output_by_builtin(spvc_compiler compiler, SpvBuiltIn builtin); -/* + /* * HLSL specifics. * Maps to C++ API. */ -SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_set_root_constants_layout(spvc_compiler compiler, - const spvc_hlsl_root_constants *constant_info, - size_t count); -SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_add_vertex_attribute_remap(spvc_compiler compiler, - const spvc_hlsl_vertex_attribute_remap *remap, - size_t remaps); -SPVC_PUBLIC_API spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin(spvc_compiler compiler); - -SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_set_resource_binding_flags(spvc_compiler compiler, - spvc_hlsl_binding_flags flags); - -SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_add_resource_binding(spvc_compiler compiler, - const spvc_hlsl_resource_binding *binding); -SPVC_PUBLIC_API spvc_bool spvc_compiler_hlsl_is_resource_used(spvc_compiler compiler, - SpvExecutionModel model, - unsigned set, - unsigned binding); + SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_set_root_constants_layout( + spvc_compiler compiler, const spvc_hlsl_root_constants *constant_info, size_t count); + SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_add_vertex_attribute_remap( + spvc_compiler compiler, const spvc_hlsl_vertex_attribute_remap *remap, size_t remaps); + SPVC_PUBLIC_API spvc_variable_id spvc_compiler_hlsl_remap_num_workgroups_builtin(spvc_compiler compiler); -/* + SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_set_resource_binding_flags(spvc_compiler compiler, + spvc_hlsl_binding_flags flags); + + SPVC_PUBLIC_API spvc_result spvc_compiler_hlsl_add_resource_binding(spvc_compiler compiler, + const spvc_hlsl_resource_binding *binding); + SPVC_PUBLIC_API spvc_bool spvc_compiler_hlsl_is_resource_used(spvc_compiler compiler, SpvExecutionModel model, + unsigned set, unsigned binding); + + /* * MSL specifics. * Maps to C++ API. */ -SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler); - -/* Obsolete. Renamed to needs_swizzle_buffer. */ -SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler); -SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_swizzle_buffer(spvc_compiler compiler); -SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_buffer_size_buffer(spvc_compiler compiler); - -SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler); -SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_patch_output_buffer(spvc_compiler compiler); -SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_input_threadgroup_mem(spvc_compiler compiler); -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, - const spvc_msl_vertex_attribute *attrs); -/* Deprecated; use spvc_compiler_msl_add_resource_binding_2(). */ -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, - const spvc_msl_resource_binding *binding); -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_resource_binding_2(spvc_compiler compiler, - const spvc_msl_resource_binding_2 *binding); -/* Deprecated; use spvc_compiler_msl_add_shader_input_2(). */ -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, - const spvc_msl_shader_interface_var *input); -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_input_2(spvc_compiler compiler, - const spvc_msl_shader_interface_var_2 *input); -/* Deprecated; use spvc_compiler_msl_add_shader_output_2(). */ -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_output(spvc_compiler compiler, - const spvc_msl_shader_interface_var *output); -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_output_2(spvc_compiler compiler, - const spvc_msl_shader_interface_var_2 *output); -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set); -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, unsigned desc_set, spvc_bool device_address); - -/* Obsolete, use is_shader_input_used. */ -SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location); -SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_shader_input_used(spvc_compiler compiler, unsigned location); -SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_shader_output_used(spvc_compiler compiler, unsigned location); - -SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, - SpvExecutionModel model, - unsigned set, - unsigned binding); -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler); -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler); -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv); -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv); -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location, unsigned components); - -SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding(spvc_compiler compiler, spvc_variable_id id); -SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding_secondary(spvc_compiler compiler, spvc_variable_id id); - -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index); - -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_inline_uniform_block(spvc_compiler compiler, unsigned desc_set, unsigned binding); - -SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_combined_sampler_suffix(spvc_compiler compiler, const char *suffix); -SPVC_PUBLIC_API const char *spvc_compiler_msl_get_combined_sampler_suffix(spvc_compiler compiler); + SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_rasterization_disabled(spvc_compiler compiler); + + /* Obsolete. Renamed to needs_swizzle_buffer. */ + SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_aux_buffer(spvc_compiler compiler); + SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_swizzle_buffer(spvc_compiler compiler); + SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_buffer_size_buffer(spvc_compiler compiler); + + SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_output_buffer(spvc_compiler compiler); + SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_patch_output_buffer(spvc_compiler compiler); + SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_needs_input_threadgroup_mem(spvc_compiler compiler); + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_vertex_attribute(spvc_compiler compiler, + const spvc_msl_vertex_attribute *attrs); + /* Deprecated; use spvc_compiler_msl_add_resource_binding_2(). */ + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, + const spvc_msl_resource_binding *binding); + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_resource_binding_2(spvc_compiler compiler, + const spvc_msl_resource_binding_2 *binding); + /* Deprecated; use spvc_compiler_msl_add_shader_input_2(). */ + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_input(spvc_compiler compiler, + const spvc_msl_shader_interface_var *input); + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_input_2(spvc_compiler compiler, + const spvc_msl_shader_interface_var_2 *input); + /* Deprecated; use spvc_compiler_msl_add_shader_output_2(). */ + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_output(spvc_compiler compiler, + const spvc_msl_shader_interface_var *output); + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_shader_output_2(spvc_compiler compiler, + const spvc_msl_shader_interface_var_2 *output); + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, + unsigned desc_set); + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_argument_buffer_device_address_space(spvc_compiler compiler, + unsigned desc_set, + spvc_bool device_address); + + /* Obsolete, use is_shader_input_used. */ + SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_vertex_attribute_used(spvc_compiler compiler, unsigned location); + SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_shader_input_used(spvc_compiler compiler, unsigned location); + SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_shader_output_used(spvc_compiler compiler, unsigned location); + + SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compiler, SpvExecutionModel model, + unsigned set, unsigned binding); + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, + const spvc_msl_constexpr_sampler *sampler); + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding( + spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler); + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr( + spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler, + const spvc_msl_sampler_ycbcr_conversion *conv); + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr( + spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler, + const spvc_msl_sampler_ycbcr_conversion *conv); + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, + unsigned location, + unsigned components); + + SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding(spvc_compiler compiler, + spvc_variable_id id); + SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding_secondary(spvc_compiler compiler, + spvc_variable_id id); + + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, + unsigned binding, unsigned index); + + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_inline_uniform_block(spvc_compiler compiler, unsigned desc_set, + unsigned binding); + + SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_combined_sampler_suffix(spvc_compiler compiler, + const char *suffix); + SPVC_PUBLIC_API const char *spvc_compiler_msl_get_combined_sampler_suffix(spvc_compiler compiler); -/* + /* * Reflect resources. * Maps almost 1:1 to C++ API. */ -SPVC_PUBLIC_API spvc_result spvc_compiler_get_active_interface_variables(spvc_compiler compiler, spvc_set *set); -SPVC_PUBLIC_API spvc_result spvc_compiler_set_enabled_interface_variables(spvc_compiler compiler, spvc_set set); -SPVC_PUBLIC_API spvc_result spvc_compiler_create_shader_resources(spvc_compiler compiler, spvc_resources *resources); -SPVC_PUBLIC_API spvc_result spvc_compiler_create_shader_resources_for_active_variables(spvc_compiler compiler, - spvc_resources *resources, - spvc_set active); -SPVC_PUBLIC_API spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources, spvc_resource_type type, - const spvc_reflected_resource **resource_list, - size_t *resource_size); - -SPVC_PUBLIC_API spvc_result spvc_resources_get_builtin_resource_list_for_type( - spvc_resources resources, spvc_builtin_resource_type type, - const spvc_reflected_builtin_resource **resource_list, - size_t *resource_size); + SPVC_PUBLIC_API spvc_result spvc_compiler_get_active_interface_variables(spvc_compiler compiler, spvc_set *set); + SPVC_PUBLIC_API spvc_result spvc_compiler_set_enabled_interface_variables(spvc_compiler compiler, spvc_set set); + SPVC_PUBLIC_API spvc_result spvc_compiler_create_shader_resources(spvc_compiler compiler, + spvc_resources *resources); + SPVC_PUBLIC_API spvc_result spvc_compiler_create_shader_resources_for_active_variables(spvc_compiler compiler, + spvc_resources *resources, + spvc_set active); + SPVC_PUBLIC_API spvc_result spvc_resources_get_resource_list_for_type(spvc_resources resources, + spvc_resource_type type, + const spvc_reflected_resource **resource_list, + size_t *resource_size); + + SPVC_PUBLIC_API spvc_result spvc_resources_get_builtin_resource_list_for_type( + spvc_resources resources, spvc_builtin_resource_type type, + const spvc_reflected_builtin_resource **resource_list, size_t *resource_size); -/* + /* * Decorations. * Maps to C++ API. */ -SPVC_PUBLIC_API void spvc_compiler_set_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration, - unsigned argument); -SPVC_PUBLIC_API void spvc_compiler_set_decoration_string(spvc_compiler compiler, SpvId id, SpvDecoration decoration, - const char *argument); -SPVC_PUBLIC_API void spvc_compiler_set_name(spvc_compiler compiler, SpvId id, const char *argument); -SPVC_PUBLIC_API void spvc_compiler_set_member_decoration(spvc_compiler compiler, spvc_type_id id, unsigned member_index, - SpvDecoration decoration, unsigned argument); -SPVC_PUBLIC_API void spvc_compiler_set_member_decoration_string(spvc_compiler compiler, spvc_type_id id, - unsigned member_index, SpvDecoration decoration, - const char *argument); -SPVC_PUBLIC_API void spvc_compiler_set_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index, - const char *argument); -SPVC_PUBLIC_API void spvc_compiler_unset_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration); -SPVC_PUBLIC_API void spvc_compiler_unset_member_decoration(spvc_compiler compiler, spvc_type_id id, - unsigned member_index, SpvDecoration decoration); - -SPVC_PUBLIC_API spvc_bool spvc_compiler_has_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration); -SPVC_PUBLIC_API spvc_bool spvc_compiler_has_member_decoration(spvc_compiler compiler, spvc_type_id id, - unsigned member_index, SpvDecoration decoration); -SPVC_PUBLIC_API const char *spvc_compiler_get_name(spvc_compiler compiler, SpvId id); -SPVC_PUBLIC_API unsigned spvc_compiler_get_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration); -SPVC_PUBLIC_API const char *spvc_compiler_get_decoration_string(spvc_compiler compiler, SpvId id, - SpvDecoration decoration); -SPVC_PUBLIC_API unsigned spvc_compiler_get_member_decoration(spvc_compiler compiler, spvc_type_id id, - unsigned member_index, SpvDecoration decoration); -SPVC_PUBLIC_API const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, spvc_type_id id, - unsigned member_index, SpvDecoration decoration); -SPVC_PUBLIC_API const char *spvc_compiler_get_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index); + SPVC_PUBLIC_API void spvc_compiler_set_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration, + unsigned argument); + SPVC_PUBLIC_API void spvc_compiler_set_decoration_string(spvc_compiler compiler, SpvId id, SpvDecoration decoration, + const char *argument); + SPVC_PUBLIC_API void spvc_compiler_set_name(spvc_compiler compiler, SpvId id, const char *argument); + SPVC_PUBLIC_API void spvc_compiler_set_member_decoration(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration, + unsigned argument); + SPVC_PUBLIC_API void spvc_compiler_set_member_decoration_string(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration, + const char *argument); + SPVC_PUBLIC_API void spvc_compiler_set_member_name(spvc_compiler compiler, spvc_type_id id, unsigned member_index, + const char *argument); + SPVC_PUBLIC_API void spvc_compiler_unset_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration); + SPVC_PUBLIC_API void spvc_compiler_unset_member_decoration(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration); + + SPVC_PUBLIC_API spvc_bool spvc_compiler_has_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration); + SPVC_PUBLIC_API spvc_bool spvc_compiler_has_member_decoration(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration); + SPVC_PUBLIC_API const char *spvc_compiler_get_name(spvc_compiler compiler, SpvId id); + SPVC_PUBLIC_API unsigned spvc_compiler_get_decoration(spvc_compiler compiler, SpvId id, SpvDecoration decoration); + SPVC_PUBLIC_API const char *spvc_compiler_get_decoration_string(spvc_compiler compiler, SpvId id, + SpvDecoration decoration); + SPVC_PUBLIC_API unsigned spvc_compiler_get_member_decoration(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, SpvDecoration decoration); + SPVC_PUBLIC_API const char *spvc_compiler_get_member_decoration_string(spvc_compiler compiler, spvc_type_id id, + unsigned member_index, + SpvDecoration decoration); + SPVC_PUBLIC_API const char *spvc_compiler_get_member_name(spvc_compiler compiler, spvc_type_id id, + unsigned member_index); -/* + /* * Entry points. * Maps to C++ API. */ -SPVC_PUBLIC_API spvc_result spvc_compiler_get_entry_points(spvc_compiler compiler, - const spvc_entry_point **entry_points, - size_t *num_entry_points); -SPVC_PUBLIC_API spvc_result spvc_compiler_set_entry_point(spvc_compiler compiler, const char *name, - SpvExecutionModel model); -SPVC_PUBLIC_API spvc_result spvc_compiler_rename_entry_point(spvc_compiler compiler, const char *old_name, - const char *new_name, SpvExecutionModel model); -SPVC_PUBLIC_API const char *spvc_compiler_get_cleansed_entry_point_name(spvc_compiler compiler, const char *name, - SpvExecutionModel model); -SPVC_PUBLIC_API void spvc_compiler_set_execution_mode(spvc_compiler compiler, SpvExecutionMode mode); -SPVC_PUBLIC_API void spvc_compiler_unset_execution_mode(spvc_compiler compiler, SpvExecutionMode mode); -SPVC_PUBLIC_API void spvc_compiler_set_execution_mode_with_arguments(spvc_compiler compiler, SpvExecutionMode mode, - unsigned arg0, unsigned arg1, unsigned arg2); -SPVC_PUBLIC_API spvc_result spvc_compiler_get_execution_modes(spvc_compiler compiler, const SpvExecutionMode **modes, - size_t *num_modes); -SPVC_PUBLIC_API unsigned spvc_compiler_get_execution_mode_argument(spvc_compiler compiler, SpvExecutionMode mode); -SPVC_PUBLIC_API unsigned spvc_compiler_get_execution_mode_argument_by_index(spvc_compiler compiler, - SpvExecutionMode mode, unsigned index); -SPVC_PUBLIC_API SpvExecutionModel spvc_compiler_get_execution_model(spvc_compiler compiler); -SPVC_PUBLIC_API void spvc_compiler_update_active_builtins(spvc_compiler compiler); -SPVC_PUBLIC_API spvc_bool spvc_compiler_has_active_builtin(spvc_compiler compiler, SpvBuiltIn builtin, SpvStorageClass storage); + SPVC_PUBLIC_API spvc_result spvc_compiler_get_entry_points(spvc_compiler compiler, + const spvc_entry_point **entry_points, + size_t *num_entry_points); + SPVC_PUBLIC_API spvc_result spvc_compiler_set_entry_point(spvc_compiler compiler, const char *name, + SpvExecutionModel model); + SPVC_PUBLIC_API spvc_result spvc_compiler_rename_entry_point(spvc_compiler compiler, const char *old_name, + const char *new_name, SpvExecutionModel model); + SPVC_PUBLIC_API const char *spvc_compiler_get_cleansed_entry_point_name(spvc_compiler compiler, const char *name, + SpvExecutionModel model); + SPVC_PUBLIC_API void spvc_compiler_set_execution_mode(spvc_compiler compiler, SpvExecutionMode mode); + SPVC_PUBLIC_API void spvc_compiler_unset_execution_mode(spvc_compiler compiler, SpvExecutionMode mode); + SPVC_PUBLIC_API void spvc_compiler_set_execution_mode_with_arguments(spvc_compiler compiler, SpvExecutionMode mode, + unsigned arg0, unsigned arg1, unsigned arg2); + SPVC_PUBLIC_API spvc_result spvc_compiler_get_execution_modes(spvc_compiler compiler, + const SpvExecutionMode **modes, size_t *num_modes); + SPVC_PUBLIC_API unsigned spvc_compiler_get_execution_mode_argument(spvc_compiler compiler, SpvExecutionMode mode); + SPVC_PUBLIC_API unsigned spvc_compiler_get_execution_mode_argument_by_index(spvc_compiler compiler, + SpvExecutionMode mode, unsigned index); + SPVC_PUBLIC_API SpvExecutionModel spvc_compiler_get_execution_model(spvc_compiler compiler); + SPVC_PUBLIC_API void spvc_compiler_update_active_builtins(spvc_compiler compiler); + SPVC_PUBLIC_API spvc_bool spvc_compiler_has_active_builtin(spvc_compiler compiler, SpvBuiltIn builtin, + SpvStorageClass storage); -/* + /* * Type query interface. * Maps to C++ API, except it's read-only. */ -SPVC_PUBLIC_API spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id); + SPVC_PUBLIC_API spvc_type spvc_compiler_get_type_handle(spvc_compiler compiler, spvc_type_id id); -/* Pulls out SPIRType::self. This effectively gives the type ID without array or pointer qualifiers. + /* Pulls out SPIRType::self. This effectively gives the type ID without array or pointer qualifiers. * This is necessary when reflecting decoration/name information on members of a struct, * which are placed in the base type, not the qualified type. * This is similar to spvc_reflected_resource::base_type_id. */ -SPVC_PUBLIC_API spvc_type_id spvc_type_get_base_type_id(spvc_type type); - -SPVC_PUBLIC_API spvc_basetype spvc_type_get_basetype(spvc_type type); -SPVC_PUBLIC_API unsigned spvc_type_get_bit_width(spvc_type type); -SPVC_PUBLIC_API unsigned spvc_type_get_vector_size(spvc_type type); -SPVC_PUBLIC_API unsigned spvc_type_get_columns(spvc_type type); -SPVC_PUBLIC_API unsigned spvc_type_get_num_array_dimensions(spvc_type type); -SPVC_PUBLIC_API spvc_bool spvc_type_array_dimension_is_literal(spvc_type type, unsigned dimension); -SPVC_PUBLIC_API SpvId spvc_type_get_array_dimension(spvc_type type, unsigned dimension); -SPVC_PUBLIC_API unsigned spvc_type_get_num_member_types(spvc_type type); -SPVC_PUBLIC_API spvc_type_id spvc_type_get_member_type(spvc_type type, unsigned index); -SPVC_PUBLIC_API SpvStorageClass spvc_type_get_storage_class(spvc_type type); - -/* Image type query. */ -SPVC_PUBLIC_API spvc_type_id spvc_type_get_image_sampled_type(spvc_type type); -SPVC_PUBLIC_API SpvDim spvc_type_get_image_dimension(spvc_type type); -SPVC_PUBLIC_API spvc_bool spvc_type_get_image_is_depth(spvc_type type); -SPVC_PUBLIC_API spvc_bool spvc_type_get_image_arrayed(spvc_type type); -SPVC_PUBLIC_API spvc_bool spvc_type_get_image_multisampled(spvc_type type); -SPVC_PUBLIC_API spvc_bool spvc_type_get_image_is_storage(spvc_type type); -SPVC_PUBLIC_API SpvImageFormat spvc_type_get_image_storage_format(spvc_type type); -SPVC_PUBLIC_API SpvAccessQualifier spvc_type_get_image_access_qualifier(spvc_type type); + SPVC_PUBLIC_API spvc_type_id spvc_type_get_base_type_id(spvc_type type); + + SPVC_PUBLIC_API spvc_basetype spvc_type_get_basetype(spvc_type type); + SPVC_PUBLIC_API unsigned spvc_type_get_bit_width(spvc_type type); + SPVC_PUBLIC_API unsigned spvc_type_get_vector_size(spvc_type type); + SPVC_PUBLIC_API unsigned spvc_type_get_columns(spvc_type type); + SPVC_PUBLIC_API unsigned spvc_type_get_num_array_dimensions(spvc_type type); + SPVC_PUBLIC_API spvc_bool spvc_type_array_dimension_is_literal(spvc_type type, unsigned dimension); + SPVC_PUBLIC_API SpvId spvc_type_get_array_dimension(spvc_type type, unsigned dimension); + SPVC_PUBLIC_API unsigned spvc_type_get_num_member_types(spvc_type type); + SPVC_PUBLIC_API spvc_type_id spvc_type_get_member_type(spvc_type type, unsigned index); + SPVC_PUBLIC_API SpvStorageClass spvc_type_get_storage_class(spvc_type type); + + /* Image type query. */ + SPVC_PUBLIC_API spvc_type_id spvc_type_get_image_sampled_type(spvc_type type); + SPVC_PUBLIC_API SpvDim spvc_type_get_image_dimension(spvc_type type); + SPVC_PUBLIC_API spvc_bool spvc_type_get_image_is_depth(spvc_type type); + SPVC_PUBLIC_API spvc_bool spvc_type_get_image_arrayed(spvc_type type); + SPVC_PUBLIC_API spvc_bool spvc_type_get_image_multisampled(spvc_type type); + SPVC_PUBLIC_API spvc_bool spvc_type_get_image_is_storage(spvc_type type); + SPVC_PUBLIC_API SpvImageFormat spvc_type_get_image_storage_format(spvc_type type); + SPVC_PUBLIC_API SpvAccessQualifier spvc_type_get_image_access_qualifier(spvc_type type); -/* + /* * Buffer layout query. * Maps to C++ API. */ -SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size(spvc_compiler compiler, spvc_type struct_type, size_t *size); -SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler compiler, - spvc_type struct_type, size_t array_size, size_t *size); -SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type type, unsigned index, size_t *size); - -SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, - spvc_type type, unsigned index, unsigned *offset); -SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_array_stride(spvc_compiler compiler, - spvc_type type, unsigned index, unsigned *stride); -SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_matrix_stride(spvc_compiler compiler, - spvc_type type, unsigned index, unsigned *stride); + SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size(spvc_compiler compiler, spvc_type struct_type, + size_t *size); + SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_size_runtime_array(spvc_compiler compiler, + spvc_type struct_type, + size_t array_size, size_t *size); + SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_struct_member_size(spvc_compiler compiler, spvc_type type, + unsigned index, size_t *size); + + SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_offset(spvc_compiler compiler, spvc_type type, + unsigned index, unsigned *offset); + SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_array_stride(spvc_compiler compiler, spvc_type type, + unsigned index, unsigned *stride); + SPVC_PUBLIC_API spvc_result spvc_compiler_type_struct_member_matrix_stride(spvc_compiler compiler, spvc_type type, + unsigned index, unsigned *stride); -/* + /* * Workaround helper functions. * Maps to C++ API. */ -SPVC_PUBLIC_API spvc_result spvc_compiler_build_dummy_sampler_for_combined_images(spvc_compiler compiler, spvc_variable_id *id); -SPVC_PUBLIC_API spvc_result spvc_compiler_build_combined_image_samplers(spvc_compiler compiler); -SPVC_PUBLIC_API spvc_result spvc_compiler_get_combined_image_samplers(spvc_compiler compiler, - const spvc_combined_image_sampler **samplers, - size_t *num_samplers); + SPVC_PUBLIC_API spvc_result spvc_compiler_build_dummy_sampler_for_combined_images(spvc_compiler compiler, + spvc_variable_id *id); + SPVC_PUBLIC_API spvc_result spvc_compiler_build_combined_image_samplers(spvc_compiler compiler); + SPVC_PUBLIC_API spvc_result spvc_compiler_get_combined_image_samplers(spvc_compiler compiler, + const spvc_combined_image_sampler **samplers, + size_t *num_samplers); -/* + /* * Constants * Maps to C++ API. */ -SPVC_PUBLIC_API spvc_result spvc_compiler_get_specialization_constants(spvc_compiler compiler, - const spvc_specialization_constant **constants, - size_t *num_constants); -SPVC_PUBLIC_API spvc_constant spvc_compiler_get_constant_handle(spvc_compiler compiler, - spvc_constant_id id); + SPVC_PUBLIC_API spvc_result spvc_compiler_get_specialization_constants( + spvc_compiler compiler, const spvc_specialization_constant **constants, size_t *num_constants); + SPVC_PUBLIC_API spvc_constant spvc_compiler_get_constant_handle(spvc_compiler compiler, spvc_constant_id id); -SPVC_PUBLIC_API spvc_constant_id spvc_compiler_get_work_group_size_specialization_constants(spvc_compiler compiler, - spvc_specialization_constant *x, - spvc_specialization_constant *y, - spvc_specialization_constant *z); + SPVC_PUBLIC_API spvc_constant_id spvc_compiler_get_work_group_size_specialization_constants( + spvc_compiler compiler, spvc_specialization_constant *x, spvc_specialization_constant *y, + spvc_specialization_constant *z); -/* + /* * Buffer ranges * Maps to C++ API. */ -SPVC_PUBLIC_API spvc_result spvc_compiler_get_active_buffer_ranges(spvc_compiler compiler, - spvc_variable_id id, - const spvc_buffer_range **ranges, - size_t *num_ranges); + SPVC_PUBLIC_API spvc_result spvc_compiler_get_active_buffer_ranges(spvc_compiler compiler, spvc_variable_id id, + const spvc_buffer_range **ranges, + size_t *num_ranges); -/* + /* * No stdint.h until C99, sigh :( * For smaller types, the result is sign or zero-extended as appropriate. * Maps to C++ API. * TODO: The SPIRConstant query interface and modification interface is not quite complete. */ -SPVC_PUBLIC_API float spvc_constant_get_scalar_fp16(spvc_constant constant, unsigned column, unsigned row); -SPVC_PUBLIC_API float spvc_constant_get_scalar_fp32(spvc_constant constant, unsigned column, unsigned row); -SPVC_PUBLIC_API double spvc_constant_get_scalar_fp64(spvc_constant constant, unsigned column, unsigned row); -SPVC_PUBLIC_API unsigned spvc_constant_get_scalar_u32(spvc_constant constant, unsigned column, unsigned row); -SPVC_PUBLIC_API int spvc_constant_get_scalar_i32(spvc_constant constant, unsigned column, unsigned row); -SPVC_PUBLIC_API unsigned spvc_constant_get_scalar_u16(spvc_constant constant, unsigned column, unsigned row); -SPVC_PUBLIC_API int spvc_constant_get_scalar_i16(spvc_constant constant, unsigned column, unsigned row); -SPVC_PUBLIC_API unsigned spvc_constant_get_scalar_u8(spvc_constant constant, unsigned column, unsigned row); -SPVC_PUBLIC_API int spvc_constant_get_scalar_i8(spvc_constant constant, unsigned column, unsigned row); -SPVC_PUBLIC_API void spvc_constant_get_subconstants(spvc_constant constant, const spvc_constant_id **constituents, size_t *count); -SPVC_PUBLIC_API unsigned long long spvc_constant_get_scalar_u64(spvc_constant constant, unsigned column, unsigned row); -SPVC_PUBLIC_API long long spvc_constant_get_scalar_i64(spvc_constant constant, unsigned column, unsigned row); -SPVC_PUBLIC_API spvc_type_id spvc_constant_get_type(spvc_constant constant); + SPVC_PUBLIC_API float spvc_constant_get_scalar_fp16(spvc_constant constant, unsigned column, unsigned row); + SPVC_PUBLIC_API float spvc_constant_get_scalar_fp32(spvc_constant constant, unsigned column, unsigned row); + SPVC_PUBLIC_API double spvc_constant_get_scalar_fp64(spvc_constant constant, unsigned column, unsigned row); + SPVC_PUBLIC_API unsigned spvc_constant_get_scalar_u32(spvc_constant constant, unsigned column, unsigned row); + SPVC_PUBLIC_API int spvc_constant_get_scalar_i32(spvc_constant constant, unsigned column, unsigned row); + SPVC_PUBLIC_API unsigned spvc_constant_get_scalar_u16(spvc_constant constant, unsigned column, unsigned row); + SPVC_PUBLIC_API int spvc_constant_get_scalar_i16(spvc_constant constant, unsigned column, unsigned row); + SPVC_PUBLIC_API unsigned spvc_constant_get_scalar_u8(spvc_constant constant, unsigned column, unsigned row); + SPVC_PUBLIC_API int spvc_constant_get_scalar_i8(spvc_constant constant, unsigned column, unsigned row); + SPVC_PUBLIC_API void spvc_constant_get_subconstants(spvc_constant constant, const spvc_constant_id **constituents, + size_t *count); + SPVC_PUBLIC_API unsigned long long spvc_constant_get_scalar_u64(spvc_constant constant, unsigned column, + unsigned row); + SPVC_PUBLIC_API long long spvc_constant_get_scalar_i64(spvc_constant constant, unsigned column, unsigned row); + SPVC_PUBLIC_API spvc_type_id spvc_constant_get_type(spvc_constant constant); -/* + /* * C implementation of the C++ api. */ -SPVC_PUBLIC_API void spvc_constant_set_scalar_fp16(spvc_constant constant, unsigned column, unsigned row, unsigned short value); -SPVC_PUBLIC_API void spvc_constant_set_scalar_fp32(spvc_constant constant, unsigned column, unsigned row, float value); -SPVC_PUBLIC_API void spvc_constant_set_scalar_fp64(spvc_constant constant, unsigned column, unsigned row, double value); -SPVC_PUBLIC_API void spvc_constant_set_scalar_u32(spvc_constant constant, unsigned column, unsigned row, unsigned value); -SPVC_PUBLIC_API void spvc_constant_set_scalar_i32(spvc_constant constant, unsigned column, unsigned row, int value); -SPVC_PUBLIC_API void spvc_constant_set_scalar_u64(spvc_constant constant, unsigned column, unsigned row, unsigned long long value); -SPVC_PUBLIC_API void spvc_constant_set_scalar_i64(spvc_constant constant, unsigned column, unsigned row, long long value); -SPVC_PUBLIC_API void spvc_constant_set_scalar_u16(spvc_constant constant, unsigned column, unsigned row, unsigned short value); -SPVC_PUBLIC_API void spvc_constant_set_scalar_i16(spvc_constant constant, unsigned column, unsigned row, signed short value); -SPVC_PUBLIC_API void spvc_constant_set_scalar_u8(spvc_constant constant, unsigned column, unsigned row, unsigned char value); -SPVC_PUBLIC_API void spvc_constant_set_scalar_i8(spvc_constant constant, unsigned column, unsigned row, signed char value); + SPVC_PUBLIC_API void spvc_constant_set_scalar_fp16(spvc_constant constant, unsigned column, unsigned row, + unsigned short value); + SPVC_PUBLIC_API void spvc_constant_set_scalar_fp32(spvc_constant constant, unsigned column, unsigned row, + float value); + SPVC_PUBLIC_API void spvc_constant_set_scalar_fp64(spvc_constant constant, unsigned column, unsigned row, + double value); + SPVC_PUBLIC_API void spvc_constant_set_scalar_u32(spvc_constant constant, unsigned column, unsigned row, + unsigned value); + SPVC_PUBLIC_API void spvc_constant_set_scalar_i32(spvc_constant constant, unsigned column, unsigned row, int value); + SPVC_PUBLIC_API void spvc_constant_set_scalar_u64(spvc_constant constant, unsigned column, unsigned row, + unsigned long long value); + SPVC_PUBLIC_API void spvc_constant_set_scalar_i64(spvc_constant constant, unsigned column, unsigned row, + long long value); + SPVC_PUBLIC_API void spvc_constant_set_scalar_u16(spvc_constant constant, unsigned column, unsigned row, + unsigned short value); + SPVC_PUBLIC_API void spvc_constant_set_scalar_i16(spvc_constant constant, unsigned column, unsigned row, + signed short value); + SPVC_PUBLIC_API void spvc_constant_set_scalar_u8(spvc_constant constant, unsigned column, unsigned row, + unsigned char value); + SPVC_PUBLIC_API void spvc_constant_set_scalar_i8(spvc_constant constant, unsigned column, unsigned row, + signed char value); -/* + /* * Misc reflection * Maps to C++ API. */ -SPVC_PUBLIC_API spvc_bool spvc_compiler_get_binary_offset_for_decoration(spvc_compiler compiler, - spvc_variable_id id, - SpvDecoration decoration, - unsigned *word_offset); - -SPVC_PUBLIC_API spvc_bool spvc_compiler_buffer_is_hlsl_counter_buffer(spvc_compiler compiler, spvc_variable_id id); -SPVC_PUBLIC_API spvc_bool spvc_compiler_buffer_get_hlsl_counter_buffer(spvc_compiler compiler, spvc_variable_id id, - spvc_variable_id *counter_id); - -SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_capabilities(spvc_compiler compiler, - const SpvCapability **capabilities, - size_t *num_capabilities); -SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_extensions(spvc_compiler compiler, const char ***extensions, - size_t *num_extensions); - -SPVC_PUBLIC_API const char *spvc_compiler_get_remapped_declared_block_name(spvc_compiler compiler, spvc_variable_id id); -SPVC_PUBLIC_API spvc_result spvc_compiler_get_buffer_block_decorations(spvc_compiler compiler, spvc_variable_id id, - const SpvDecoration **decorations, - size_t *num_decorations); + SPVC_PUBLIC_API spvc_bool spvc_compiler_get_binary_offset_for_decoration(spvc_compiler compiler, + spvc_variable_id id, + SpvDecoration decoration, + unsigned *word_offset); + + SPVC_PUBLIC_API spvc_bool spvc_compiler_buffer_is_hlsl_counter_buffer(spvc_compiler compiler, spvc_variable_id id); + SPVC_PUBLIC_API spvc_bool spvc_compiler_buffer_get_hlsl_counter_buffer(spvc_compiler compiler, spvc_variable_id id, + spvc_variable_id *counter_id); + + SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_capabilities(spvc_compiler compiler, + const SpvCapability **capabilities, + size_t *num_capabilities); + SPVC_PUBLIC_API spvc_result spvc_compiler_get_declared_extensions(spvc_compiler compiler, const char ***extensions, + size_t *num_extensions); + + SPVC_PUBLIC_API const char *spvc_compiler_get_remapped_declared_block_name(spvc_compiler compiler, + spvc_variable_id id); + SPVC_PUBLIC_API spvc_result spvc_compiler_get_buffer_block_decorations(spvc_compiler compiler, spvc_variable_id id, + const SpvDecoration **decorations, + size_t *num_decorations); #ifdef __cplusplus } diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp new file mode 100644 index 000000000..68c447aa3 --- /dev/null +++ b/spirv_opencl.cpp @@ -0,0 +1,1067 @@ +/* + * Copyright 2016-2021 The Brenwill Workshop Ltd. + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at, or + * 2. The MIT License, found at. + */ + +#include "spirv_opencl.hpp" +#include "GLSL.std.450.h" + +#include +#include +#include + +using namespace SPIRV_CROSS_SPV_HEADER_NAMESPACE; +using namespace SPIRV_CROSS_NAMESPACE; +using namespace std; + +CompilerOpenCL::CompilerOpenCL(vector spirv_) + : CompilerGLSL(std::move(spirv_)) +{ +} + +CompilerOpenCL::CompilerOpenCL(const uint32_t *ir_, size_t word_count) + : CompilerGLSL(ir_, word_count) +{ +} + +CompilerOpenCL::CompilerOpenCL(const ParsedIR &ir_) + : CompilerGLSL(ir_) +{ +} + +CompilerOpenCL::CompilerOpenCL(ParsedIR &&ir_) + : CompilerGLSL(std::move(ir_)) +{ +} + +string CompilerOpenCL::compile() +{ + if (get_execution_model() != ExecutionModelGLCompute) + SPIRV_CROSS_THROW("OpenCL backend only supports compute shaders (ExecutionModelGLCompute)."); + + ir.fixup_reserved_names(); + + options.vulkan_semantics = true; + options.es = false; + options.version = 450; + + backend.null_pointer_literal = "NULL"; + backend.float_literal_suffix = true; + backend.double_literal_suffix = true; + backend.uint32_t_literal_suffix = true; + backend.int16_t_literal_suffix = "s"; + backend.uint16_t_literal_suffix = "us"; + backend.basic_int_type = "int"; + backend.basic_uint_type = "uint"; + backend.basic_int8_type = "char"; + backend.basic_uint8_type = "uchar"; + backend.basic_int16_type = "short"; + backend.basic_uint16_type = "ushort"; + backend.boolean_mix_function = "mix"; + backend.swizzle_is_function = false; + backend.shared_is_implied = false; + backend.use_initializer_list = true; + backend.use_typed_initializer_list = true; + backend.native_row_major_matrix = false; + backend.unsized_array_supported = false; + backend.can_declare_arrays_inline = false; + backend.allow_truncated_access_chain = true; + backend.comparison_image_samples_scalar = true; + backend.native_pointers = true; + backend.nonuniform_qualifier = ""; + backend.supports_empty_struct = true; + backend.support_64bit_switch = opencl_options.enable_64bit_atomics; + backend.boolean_in_struct_remapped_type = SPIRType::Boolean; + backend.discard_literal = ""; + backend.demote_literal = ""; + backend.workgroup_size_is_hidden = false; + backend.supports_extensions = true; + backend.force_gl_in_out_block = false; + backend.force_merged_mesh_block = false; + backend.array_is_value_type = false; + backend.array_is_value_type_in_buffer_blocks = false; + backend.support_pointer_to_pointer = true; + backend.implicit_c_integer_promotion_rules = true; + backend.supports_spec_constant_array_size = false; + + fixup_anonymous_struct_names(); + fixup_type_alias(); + replace_illegal_names(); + build_function_control_flow_graphs_and_analyze(); + update_active_builtins(); + analyze_image_and_sampler_usage(); + analyze_interlocked_resource_usage(); + + set_enabled_interface_variables(get_active_interface_variables()); + reorder_type_alias(); + + uint32_t pass_count = 0; + do + { + reset(pass_count); + buffer.reset(); + + emit_header(); + emit_specialization_constants_and_structs(); + emit_resources(); + emit_function(get(ir.default_entry_point), Bitset()); + + pass_count++; + } while (is_forcing_recompilation()); + + return buffer.str(); +} + +bool CompilerOpenCL::specialization_constant_is_macro(uint32_t const_id) const +{ + return constant_macro_ids.find(const_id) != constant_macro_ids.end(); +} + +void CompilerOpenCL::emit_header() +{ + statement("// Generated from SPIR-V by SPIRV-Cross (OpenCL backend)"); + statement(""); + + if (opencl_options.opencl_version >= 200) + statement("#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable"); + if (opencl_options.enable_fp64) + statement("#pragma OPENCL EXTENSION cl_khr_fp64 : enable"); + if (opencl_options.enable_64bit_atomics && opencl_options.opencl_version >= 200) + statement("#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable"); + statement(""); + + for (auto &header : header_lines) + statement(header); + if (!header_lines.empty()) + statement(""); +} + +const char *CompilerOpenCL::to_storage_qualifiers_glsl(const SPIRVariable &) +{ + // OpenCL uses address space in type, not as a separate qualifier like "uniform" + return ""; +} + +void CompilerOpenCL::emit_resources() +{ + replace_illegal_names(); +} + +void CompilerOpenCL::replace_illegal_names() +{ + static const unordered_set keywords = { + "char", + "char2", + "char3", + "char4", + "char8", + "char16", + "uchar", + "uchar2", + "uchar3", + "uchar4", + "uchar8", + "uchar16", + "short", + "short2", + "short3", + "short4", + "short8", + "short16", + "ushort", + "ushort2", + "ushort3", + "ushort4", + "ushort8", + "ushort16", + "int", + "int2", + "int3", + "int4", + "int8", + "int16", + "uint", + "uint2", + "uint3", + "uint4", + "uint8", + "uint16", + "long", + "long2", + "long3", + "long4", + "long8", + "long16", + "ulong", + "ulong2", + "ulong3", + "ulong4", + "ulong8", + "ulong16", + "float", + "float2", + "float3", + "float4", + "float8", + "float16", + "double", + "double2", + "double3", + "double4", + "double8", + "double16", + "bool", + "bool2", + "bool3", + "bool4", + "bool8", + "bool16", + "half", + "half2", + "half3", + "half4", + "half8", + "half16", + "quad", + "quad2", + "quad3", + "quad4", + "quad8", + "quad16", + "complex", + "imaginary" + "__global", + "global", + "__local", + "local", + "__constant", + "constant", + "__private", + "private", + "image1d_t", + "image1d_buffer_t", + "image1d_array_t", + "image2d_t", + "image2d_array_t", + "image2d_depth_t", + "image2d_array_depth_t", + "image3d_t", + "sampler_t", + "event_t", + "clk_event_t", + "ndrange_t", + "queue_t", + "reserve_id_t", + "__kernel", + "kernel", + "__read_only", + "read_only", + "__write_only", + "write_only", + "__read_write", + "read_write", + "atomic", + "pipe", + "MAXFLOAT", + "HUGE_VALF", + "INFINITY", + "NAN", + "HUGE_VAL", + "M_E_F", + "M_LOG2E_F", + "M_LOG10E_F", + "M_LN2_F", + "M_LN10_F", + "M_PI_F", + "M_PI_2_F", + "M_PI_4_F", + "M_1_PI_F", + "M_2_PI_F", + "M_2_SQRTPI_F", + "M_SQRT2_F", + "M_SQRT1_2_F", + }; + + CompilerGLSL::replace_illegal_names(keywords); + CompilerGLSL::replace_illegal_names(); +} + +void CompilerOpenCL::emit_workgroup_size_attribute() +{ + auto &ep = get_entry_point(); + uint32_t x = ep.workgroup_size.x; + uint32_t y = ep.workgroup_size.y; + uint32_t z = ep.workgroup_size.z; + if (x == 0) + x = 1; + if (y == 0) + y = 1; + if (z == 0) + z = 1; + statement("__attribute__((reqd_work_group_size(", x, ", ", y, ", ", z, ")))"); +} + +void CompilerOpenCL::emit_entry_point_declarations() +{ + // Emit local variables for compute builtins so that builtin_to_glsl can return a name + if (!processing_entry_point) + return; + + auto &execution = get_entry_point(); + if (execution.model != ExecutionModelGLCompute) + return; + + bool need_workgroup_id = active_input_builtins.get(BuiltInWorkgroupId); + bool need_local_id = active_input_builtins.get(BuiltInLocalInvocationId); + bool need_global_id = active_input_builtins.get(BuiltInGlobalInvocationId); + bool need_num_workgroups = active_input_builtins.get(BuiltInNumWorkgroups); + bool need_workgroup_size = active_input_builtins.get(BuiltInWorkgroupSize); + bool need_local_invocation_index = active_input_builtins.get(BuiltInLocalInvocationIndex); + bool need_global_size = active_input_builtins.get(BuiltInGlobalSize); + + if (need_workgroup_id) + statement("uint3 spvWorkgroupId = (uint3)(get_group_id(0), get_group_id(1), get_group_id(2));"); + if (need_local_id) + statement("uint3 spvLocalInvocationId = (uint3)(get_local_id(0), get_local_id(1), get_local_id(2));"); + if (need_global_id) + statement("uint3 spvGlobalInvocationId = (uint3)(get_global_id(0), get_global_id(1), get_global_id(2));"); + if (need_num_workgroups) + statement("uint3 spvNumWorkgroups = (uint3)(get_num_groups(0), get_num_groups(1), get_num_groups(2));"); + if (need_workgroup_size) + statement("uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2));"); + if (need_local_invocation_index) + statement("uint spvLocalInvocationIndex = get_local_id(2) * get_local_size(0) * get_local_size(1) + " + "get_local_id(1) * get_local_size(0) + get_local_id(0);"); + if (need_global_size) + statement("uint3 spvGlobalSize = (uint3)(get_global_size(0), get_global_size(1), get_global_size(2));"); +} + +string CompilerOpenCL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) +{ + (void)storage; + switch (builtin) + { + case BuiltInWorkgroupId: + return "spvWorkgroupId"; + case BuiltInLocalInvocationId: + return "spvLocalInvocationId"; + case BuiltInGlobalInvocationId: + return "spvGlobalInvocationId"; + case BuiltInNumWorkgroups: + return "spvNumWorkgroups"; + case BuiltInWorkgroupSize: + return "spvWorkgroupSize"; + case BuiltInLocalInvocationIndex: + return "spvLocalInvocationIndex"; + case BuiltInGlobalSize: + return "spvGlobalSize"; + case BuiltInNumSubgroups: + case BuiltInSubgroupId: + case BuiltInSubgroupSize: + case BuiltInSubgroupLocalInvocationId: + SPIRV_CROSS_THROW("OpenCL subgroup builtins not yet implemented."); + default: + SPIRV_CROSS_THROW("Unsupported builtin for OpenCL compute shader."); + } +} + +bool CompilerOpenCL::builtin_translates_to_nonarray(BuiltIn builtin) const +{ + (void)builtin; + return false; +} + +// In OpenCL, address space qualifiers are required for all pointer or reference variables +string CompilerOpenCL::get_variable_address_space(const SPIRVariable &argument) +{ + const auto &type = get(argument.basetype); + return get_type_address_space(type, argument.self, true); +} + +string CompilerOpenCL::get_type_address_space(const SPIRType &type, uint32_t id, bool argument) +{ + // This can be called for variable pointer contexts as well, so be very careful about which method we choose. + Bitset flags; + auto *var = maybe_get(id); + if (var && type.basetype == SPIRType::Struct && + (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))) + flags = get_buffer_block_flags(id); + else + { + flags = get_decoration_bitset(id); + + if (type.basetype == SPIRType::Struct && + (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))) + { + flags.merge_or(ir.get_buffer_block_type_flags(type)); + } + } + + const char *addr_space = ""; + switch (type.storage) + { + case StorageClassUniform: + case StorageClassStorageBuffer: + addr_space = "__global"; + break; + case StorageClassUniformConstant: + case StorageClassPushConstant: + addr_space = "__constant"; + break; + case StorageClassWorkgroup: + addr_space = "__local"; + break; + default: + // __private is default and would be redundant + break; + } + return addr_space; +} + +const char *CompilerOpenCL::to_restrict(uint32_t id, bool space) +{ + // This can be called for variable pointer contexts as well, so be very careful about which method we choose. + Bitset flags; + if (ir.ids[id].get_type() == TypeVariable) + { + uint32_t type_id = expression_type_id(id); + auto &type = expression_type(id); + if (type.basetype == SPIRType::Struct && + (has_decoration(type_id, DecorationBlock) || has_decoration(type_id, DecorationBufferBlock))) + flags = get_buffer_block_flags(id); + else + flags = get_decoration_bitset(id); + } + else + flags = get_decoration_bitset(id); + + return flags.get(DecorationRestrict) || flags.get(DecorationRestrictPointerEXT) ? + (space ? "__restrict " : "__restrict") : + ""; +} + +string CompilerOpenCL::type_to_glsl(const SPIRType &type, uint32_t id, bool member) +{ + string type_name; + + // Pointer? + if (is_pointer(type) || type_is_array_of_pointers(type)) + { + assert(type.pointer_depth > 0); + + const char *restrict_kw; + + auto type_address_space = get_type_address_space(type, id); + const auto *p_parent_type = &get(type.parent_type); + + // Work around C pointer qualifier rules. If glsl_type is a pointer type as well + // we'll need to emit the address space to the right. + // We could always go this route, but it makes the code unnatural. + // Prefer emitting thread T *foo over T thread* foo since it's more readable, + // but we'll have to emit thread T * thread * T constant bar; for example. + if (is_pointer(type) && is_pointer(*p_parent_type)) + type_name = join(type_to_glsl(*p_parent_type, id), " ", type_address_space, " "); + else + { + // Since this is not a pointer-to-pointer, ensure we've dug down to the base type. + // Some situations chain pointers even though they are not formally pointers-of-pointers. + while (is_pointer(*p_parent_type)) + p_parent_type = &get(p_parent_type->parent_type); + + type_name = join(type_address_space, " ", type_to_glsl(*p_parent_type, id)); + } + + switch (type.basetype) + { + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::Sampler: + // These are handles. + break; + default: + // Anything else can be a raw pointer. + type_name += "*"; + restrict_kw = to_restrict(id, false); + if (*restrict_kw) + { + type_name += " "; + type_name += restrict_kw; + } + break; + } + return type_name; + } + + switch (type.basetype) + { + case SPIRType::Struct: + // Need OpName lookup here to get a "sensible" name for a struct. + type_name = to_name(type.self); + break; + + case SPIRType::Image: + case SPIRType::SampledImage: + return image_type_glsl(type, id, member); + + case SPIRType::Sampler: + return "sampler_t"; + + case SPIRType::Void: + return "void"; + + case SPIRType::AtomicCounter: + return "atomic_uint"; + + // Scalars + case SPIRType::Boolean: + type_name = "bool"; + break; + + case SPIRType::Char: + case SPIRType::SByte: + type_name = "char"; + break; + case SPIRType::UByte: + type_name = "uchar"; + break; + case SPIRType::Short: + type_name = "short"; + break; + case SPIRType::UShort: + type_name = "ushort"; + break; + case SPIRType::Int: + type_name = "int"; + break; + case SPIRType::UInt: + type_name = "uint"; + break; + case SPIRType::Int64: + type_name = "long"; + break; + case SPIRType::UInt64: + type_name = "ulong"; + break; + case SPIRType::Half: + type_name = "half"; + break; + case SPIRType::Float: + type_name = "float"; + break; + case SPIRType::Double: + if (!opencl_options.enable_fp64) + SPIRV_CROSS_THROW("Double requires cl_khr_fp64."); + type_name = "double"; + break; + + default: + return "unknown_type"; + } + + // Vector? + if (type.vecsize > 1) + type_name += to_string(type.vecsize); + + return type_name; +} + +string CompilerOpenCL::type_to_glsl(const SPIRType &type, uint32_t id) +{ + return type_to_glsl(type, id, false); +} + +string CompilerOpenCL::image_type_glsl(const SPIRType &type, uint32_t id, bool member) +{ + (void)id; + (void)member; + if (type.basetype != SPIRType::Image) + return ""; + + bool readonly = type.image.sampled != 2; + const char *access = readonly ? "read_only" : "write_only"; + switch (type.image.dim) + { + case Dim1D: + return join(access, " image1d_t"); + case Dim2D: + return type.image.arrayed ? join(access, " image2d_array_t") : join(access, " image2d_t"); + case Dim3D: + return join(access, " image3d_t"); + case DimCube: + return join(access, " image2d_t"); // OpenCL has no cube; use 2D + case DimBuffer: + return join(access, " image1d_buffer_t"); + default: + SPIRV_CROSS_THROW("Unsupported image dimension for OpenCL."); + } +} + +std::string CompilerOpenCL::type_to_glsl_constructor(const SPIRType &type) +{ + string ret = CompilerGLSL::type_to_glsl_constructor(type); + printf("type_to_glsl_constructor: %s\n", ret.c_str()); + if (!ret.empty()) + ret = join("(", ret, ")"); + return ret; +} + +// GCC workaround of lambdas calling protected funcs +std::string CompilerOpenCL::variable_decl(const SPIRType &type, const std::string &name, uint32_t id) +{ + return CompilerGLSL::variable_decl(type, name, id); +} + +std::string CompilerOpenCL::entry_point_args(bool append_comma) +{ + // Reset flattening maps for this compilation pass + flattened_buffer_vars.clear(); + push_const_member_map.clear(); + + std::string ep_args; + + struct Resource + { + SPIRVariable *var; + SPIRVariable *discrete_descriptor_alias; + string name; + SPIRType::BaseType basetype; + uint32_t index; + uint32_t plane; + uint32_t secondary_index; + }; + + SmallVector resources; + + ir.for_each_typed_id( + [&](uint32_t var_id, SPIRVariable &var) + { + auto &type = get_variable_data_type(var); + /* + if (var.storage == StorageClassPushConstant) + { + for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++) + { + if (!ep_args.empty()) + ep_args += ", "; + + auto mbr_name = to_member_name(type, mbr_idx); + const auto &member_type = this->get(type.member_types[mbr_idx]); + ep_args += join(this->type_to_glsl(member_type), " ", mbr_name); + // Record the mapping so emit_instruction can rewrite access chains + push_const_member_map[var_id][mbr_idx] = mbr_name; + } + } + */ + if (var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock)) + { + Bitset flags = ir.get_buffer_block_flags(var); + bool is_readonly = flags.get(DecorationNonWritable); + + auto to_structuredbuffer_subtype_name = [this](const SPIRType &parent_type) -> std::string + { + if (parent_type.basetype == SPIRType::Struct && parent_type.member_types.size() == 1) + { + // Use type of first struct member as a StructuredBuffer will have only one '._m0' field in SPIR-V + const auto &member0_type = this->get(parent_type.member_types.front()); + return this->type_to_glsl(member0_type); + } + else + { + // Otherwise, this StructuredBuffer only has a basic subtype, e.g. StructuredBuffer + return this->type_to_glsl(parent_type); + } + }; + if (!ep_args.empty()) + ep_args += ", "; + + ep_args += join("__global ", is_readonly ? "const " : "", to_structuredbuffer_subtype_name(type), "* ", + to_name(var_id)); + // Record so emit_instruction can rewrite OpAccessChain against this var + flattened_buffer_vars.insert(var_id); + } + else if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || + var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) && + !is_hidden_variable(var)) + { + switch (var.basetype) + { + case SPIRType::Struct: + { + break; + } + case SPIRType::Sampler: + break; + case SPIRType::Image: + { + if (!ep_args.empty()) + ep_args += ", "; + + ep_args += type_to_glsl(type, var_id) + " " + to_name(var_id); + break; + } + case SPIRType::AccelerationStructure: + { + break; + } + default: + if (!ep_args.empty()) + ep_args += ", "; + + ep_args += type_to_glsl(type, var_id) + " " + to_name(var_id); + break; + } + } + }); + + if (!ep_args.empty() && append_comma) + ep_args += ", "; + + return ep_args; +} + +string CompilerOpenCL::get_inner_entry_point_name() const +{ + return "comp_main"; +} + +void CompilerOpenCL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) +{ + (void)return_flags; + if (func.self != ir.default_entry_point) + add_function_overload(func); + + // Entry point: __kernel void name(...) + emit_workgroup_size_attribute(); + string decl; + decl += "__kernel void "; + if (func.self == ir.default_entry_point) + { + decl += get_inner_entry_point_name(); + processing_entry_point = true; + } + else + decl += to_name(func.self); + decl += "("; + + if (processing_entry_point) + { + decl += entry_point_args(!func.arguments.empty()); + + // append entry point args to avoid conflicts in local variable names. + local_variable_names.insert(resource_names.begin(), resource_names.end()); + } + + for (auto &arg : func.arguments) + { + add_local_variable_name(arg.id); + + decl += argument_decl(arg); + if (&arg != &func.arguments.back()) + decl += ", "; + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } + + decl += ")"; + statement(decl); +} + +void CompilerOpenCL::emit_specialization_constants_and_structs() +{ + SpecializationConstant wg_x, wg_y, wg_z; + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + bool emitted = false; + unordered_set declared_structs; + unordered_set aligned_structs; + + // Very particular use of the soft loop lock. + // align_struct may need to create custom types on the fly, but we don't care about + // these types for purpose of iterating over them in ir.ids_for_type and friends. + auto loop_lock = ir.create_loop_soft_lock(); + + // Physical storage buffer pointers can have cyclical references, + // so emit forward declarations of them before other structs. + // Ignore type_id because we want the underlying struct type from the pointer. + ir.for_each_typed_id( + [&](uint32_t /* type_id */, const SPIRType &type) + { + if (type.basetype == SPIRType::Struct && type.pointer && + type.storage == StorageClassPhysicalStorageBuffer && declared_structs.count(type.self) == 0) + { + statement("struct ", to_name(type.self), ";"); + declared_structs.insert(type.self); + emitted = true; + } + }); + if (emitted) + statement(""); + + emitted = false; + declared_structs.clear(); + + // It is possible to have multiple spec constants that use the same spec constant ID. + // The most common cause of this is defining spec constants in GLSL while also declaring + // the workgroup size to use those spec constants. But, Metal forbids declaring more than + // one variable with the same function constant ID. + // In this case, we must only declare one variable with the [[function_constant(id)]] + // attribute, and use its initializer to initialize all the spec constants with + // that ID. + std::unordered_map unique_func_constants; + + for (auto &id_ : ir.ids_for_constant_undef_or_type) + { + auto &id = ir.ids[id_]; + + if (id.get_type() == TypeConstant) + { + auto &c = id.get(); + + if (c.specialization) + { + auto &type = get(c.constant_type); + string sc_type_name = type_to_glsl(type); + add_resource_name(c.self); + string sc_name = to_name(c.self); + + // Specialization constants are only supported in SPIR-V not OpenCL C. + // Just declare the "default" directly. + if (has_decoration(c.self, DecorationSpecId)) + { + // Fallback to macro overrides. + uint32_t constant_id = get_decoration(c.self, DecorationSpecId); + c.specialization_constant_macro_name = constant_value_macro_name(constant_id); + + statement("#ifndef ", c.specialization_constant_macro_name); + statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c)); + statement("#endif"); + statement("constant ", sc_type_name, " ", sc_name, " = ", c.specialization_constant_macro_name, + ";"); + + // Record the usage of macro + constant_macro_ids.insert(constant_id); + } + else + { + // Composite specialization constants must be built from other specialization constants. + statement("constant ", sc_type_name, " ", sc_name, " = ", constant_expression(c), ";"); + } + emitted = true; + } + } + else if (id.get_type() == TypeConstantOp) + { + auto &c = id.get(); + auto &type = get(c.basetype); + add_resource_name(c.self); + auto name = to_name(c.self); + statement("constant ", variable_decl(type, name), " = ", constant_op_expression(c), ";"); + emitted = true; + } + else if (id.get_type() == TypeType) + { + // Output non-builtin interface structs. These include local function structs + // and structs nested within uniform and read-write buffers. + auto &type = id.get(); + TypeID type_id = type.self; + + bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty() && !type.pointer; + bool is_block = + has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); + + bool is_builtin_block = is_block && is_builtin_type(type); + bool is_declarable_struct = is_struct && !is_builtin_block; + + // Align and emit declarable structs...but avoid declaring each more than once. + if (is_declarable_struct && declared_structs.count(type_id) == 0) + { + if (emitted) + statement(""); + emitted = false; + + declared_structs.insert(type_id); + + // Make sure we declare the underlying struct type, and not the "decorated" type with pointers, etc. + emit_struct(get(type_id)); + } + } + else if (id.get_type() == TypeUndef) + { + auto &undef = id.get(); + auto &type = get(undef.basetype); + // OpUndef can be void for some reason ... + if (type.basetype == SPIRType::Void) + return; + } + } + + if (emitted) + statement(""); +} + +void CompilerOpenCL::emit_instruction(const Instruction &instruction) +{ + auto ops = stream(instruction); + auto opcode = static_cast(instruction.op); + + // Map buffer atomics to OpenCL C names (atomic_add, atomic_sub, etc.) + auto opencl_atomic = [this, ops](const char *opencl_op) + { + if (check_atomic_image(ops[2])) + SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], opencl_op); + }; + + switch (opcode) + { + case OpAtomicExchange: + if (check_atomic_image(ops[2])) + SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], "atomic_xchg"); + break; + case OpAtomicCompareExchange: + if (check_atomic_image(ops[2])) + SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + // OpenCL atomic_cmpxchg(ptr, expected, desired) + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[7], ops[6], "atomic_cmpxchg"); + break; + case OpAtomicIAdd: + case OpAtomicFAddEXT: + opencl_atomic("atomic_add"); + break; + case OpAtomicISub: + { + if (check_atomic_image(ops[2])) + SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + forced_temporaries.insert(ops[1]); + auto expr = join("atomic_sub(", to_atomic_ptr_expression(ops[2]), ", ", to_enclosed_expression(ops[5]), ")"); + emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); + flush_all_atomic_capable_variables(); + break; + } + case OpAtomicSMin: + case OpAtomicUMin: + opencl_atomic("atomic_min"); + break; + case OpAtomicSMax: + case OpAtomicUMax: + opencl_atomic("atomic_max"); + break; + case OpAtomicAnd: + opencl_atomic("atomic_and"); + break; + case OpAtomicOr: + opencl_atomic("atomic_or"); + break; + case OpAtomicXor: + opencl_atomic("atomic_xor"); + break; + case OpAtomicLoad: + { + if (check_atomic_image(ops[2])) + SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + auto &type = expression_type(ops[2]); + forced_temporaries.insert(ops[1]); + bool unsigned_type = (type.basetype == SPIRType::UInt); + const char *increment = unsigned_type ? "0u" : "0"; + emit_op(ops[0], ops[1], join("atomic_add(", to_atomic_ptr_expression(ops[2]), ", ", increment, ")"), false); + flush_all_atomic_capable_variables(); + break; + } + case OpAtomicStore: + { + if (check_atomic_image(ops[0])) + SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + statement("atomic_xchg(", to_atomic_ptr_expression(ops[0]), ", ", to_expression(ops[3]), ");"); + flush_all_atomic_capable_variables(); + break; + } + case OpAtomicIIncrement: + case OpAtomicIDecrement: + { + if (check_atomic_image(ops[2])) + SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + forced_temporaries.insert(ops[1]); + auto &type = expression_type(ops[2]); + bool unsigned_type = (type.basetype == SPIRType::UInt); + const char *inc = (opcode == OpAtomicIIncrement && unsigned_type) ? "1u" : + (opcode == OpAtomicIIncrement) ? "1" : + unsigned_type ? "uint(-1)" : + "-1"; + emit_op(ops[0], ops[1], join("atomic_add(", to_atomic_ptr_expression(ops[2]), ", ", inc, ")"), false); + flush_all_atomic_capable_variables(); + break; + } + case OpAccessChain: + case OpInBoundsAccessChain: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t base_id = ops[2]; + uint32_t length = instruction.length; + + // SSBO flattened to __global T*: rewrite [base, member_0, element_idx] → base[element_idx] + if (flattened_buffer_vars.count(base_id) && length >= 5) + { + // ops[3] = struct member index (always 0 for single-member SSBO) — skip + // ops[4] = element index within the runtime array + auto expr = join(to_name(base_id), "[", to_expression(ops[4]), "]"); + auto &e = set(result_id, std::move(expr), result_type, true); + auto *backing_var = maybe_get_backing_variable(base_id); + e.loaded_from = backing_var ? backing_var->self : ID(base_id); + e.access_chain = true; + forwarded_temporaries.insert(result_id); + suppressed_usage_tracking.insert(result_id); + for (uint32_t i = 2; i < length; i++) + inherit_expression_dependencies(result_id, ops[i]); + if (get(result_id).expression_dependencies.empty()) + forwarded_temporaries.erase(result_id); + break; + } + + // Push constant expanded to scalar params: rewrite [p_var, member_idx] → scalar param name + auto push_it = push_const_member_map.find(base_id); + if (push_it != push_const_member_map.end() && length >= 4) + { + uint32_t mbr_idx = get(ops[3]).scalar(); + auto name_it = push_it->second.find(mbr_idx); + if (name_it != push_it->second.end()) + { + auto &e = set(result_id, name_it->second, result_type, false); + e.loaded_from = base_id; + e.access_chain = true; + break; + } + } + + // Fall through to base class for all other access chains + CompilerGLSL::emit_instruction(instruction); + break; + } + + default: + CompilerGLSL::emit_instruction(instruction); + break; + } +} diff --git a/spirv_opencl.hpp b/spirv_opencl.hpp new file mode 100644 index 000000000..da64673d8 --- /dev/null +++ b/spirv_opencl.hpp @@ -0,0 +1,125 @@ +/* + * Copyright 2016-2021 The Brenwill Workshop Ltd. + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at, or + * 2. The MIT License, found at. + */ + +#ifndef SPIRV_CROSS_OPENCL_HPP +#define SPIRV_CROSS_OPENCL_HPP + +#include "spirv_glsl.hpp" +#include +#include +#include +#include + +namespace SPIRV_CROSS_NAMESPACE +{ +using namespace SPIRV_CROSS_SPV_HEADER_NAMESPACE; + +// Decompiles SPIR-V (compute only) to OpenCL C +class CompilerOpenCL : public CompilerGLSL +{ +public: + struct Options + { + // OpenCL C version: 120 = 1.2, 200 = 2.0 + uint32_t opencl_version = make_opencl_version(1, 2); + // Enable cl_khr_fp64 (double) extension + bool enable_fp64 = false; + // Enable cl_khr_int64_extended_atomics extension + bool enable_64bit_atomics = false; + + void set_opencl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) + { + opencl_version = make_opencl_version(major, minor, patch); + } + + bool supports_opencl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) const + { + return opencl_version >= make_opencl_version(major, minor, patch); + } + + static uint32_t make_opencl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) + { + return (major * 100) + (minor * 10) + patch; + } + }; + + explicit CompilerOpenCL(std::vector spirv_); + CompilerOpenCL(const uint32_t *ir_, size_t word_count); + explicit CompilerOpenCL(const ParsedIR &ir_); + explicit CompilerOpenCL(ParsedIR &&ir_); + + const Options &get_opencl_options() const + { + return opencl_options; + } + void set_opencl_options(const Options &opts) + { + opencl_options = opts; + } + + std::string compile() override; + + // Information about specialization constants that are translated into macros + // instead of using constant declarations. + // These must only be called after a successful call to CompilerOpenCL::compile(). + bool specialization_constant_is_macro(uint32_t constant_id) const; + +protected: + void emit_header() override; + void emit_resources(); + void emit_specialization_constants_and_structs(); + std::string type_to_glsl(const SPIRType &type, uint32_t id, bool member); + std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; + std::string builtin_to_glsl(BuiltIn builtin, StorageClass storage) override; + std::string image_type_glsl(const SPIRType &type, uint32_t id = 0, bool member = false) override; + const char *to_storage_qualifiers_glsl(const SPIRVariable &var) override; + void emit_entry_point_declarations() override; + // GCC workaround of lambdas calling protected functions (for older GCC versions) + std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override; + void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; + void emit_instruction(const Instruction &instruction) override; + virtual bool builtin_translates_to_nonarray(BuiltIn builtin) const override; + std::string get_variable_address_space(const SPIRVariable &argument); + std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false); + const char *to_restrict(uint32_t id, bool space); + + void replace_illegal_names() override; + + Options opencl_options; + + // SSBO variables emitted as flat element pointers (__global T*) in the kernel signature + std::unordered_set flattened_buffer_vars; + // Push-constant variable → { member_index → scalar param name } + std::unordered_map> push_const_member_map; + + std::unordered_set constant_macro_ids; + + void emit_workgroup_size_attribute(); + + std::string entry_point_args(bool append_comma); + std::string get_inner_entry_point_name() const; +}; + +} // namespace SPIRV_CROSS_NAMESPACE + +#endif diff --git a/test_shaders.py b/test_shaders.py index 2e019f091..b3f87fc7f 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -33,12 +33,13 @@ from functools import partial class Paths(): - def __init__(self, spirv_cross, glslang, spirv_as, spirv_val, spirv_opt): + def __init__(self, spirv_cross, glslang, spirv_as, spirv_val, spirv_opt, clang): self.spirv_cross = spirv_cross self.glslang = glslang self.spirv_as = spirv_as self.spirv_val = spirv_val self.spirv_opt = spirv_opt + self.clang = clang def remove_file(path): #print('Removing file:', path) @@ -57,7 +58,7 @@ def parse_stats(stats): m = re.search('([0-9]+) uniform registers', stats) uniform_regs = int(m.group(1)) if m else 0 - m_list = re.findall('(-?[0-9]+)\s+(-?[0-9]+)\s+(-?[0-9]+)', stats) + m_list = re.findall(r'(-?[0-9]+)\s+(-?[0-9]+)\s+(-?[0-9]+)', stats) alu_short = float(m_list[1][0]) if m_list else 0 ls_short = float(m_list[1][1]) if m_list else 0 tex_short = float(m_list[1][2]) if m_list else 0 @@ -584,6 +585,104 @@ def cross_compile_hlsl(shader, spirv, opt, force_no_external_validation, iterati return (spirv_path, hlsl_path) +def path_to_opencl_standard(shader): + if '.cl30.' in shader: + return '-cl-std=CL3.0' + elif '.cl22.' in shader: + return '-cl-std=CL2.2' + elif '.cl21.' in shader: + return '-cl-std=CL2.1' + elif '.cl20.' in shader: + return '-cl-std=CL2.0' + else: + return '-cl-std=CL1.2' + +def path_to_opencl_standard_cli(shader): + if '.cl30.' in shader: + return '300' + elif '.cl22.' in shader: + return '220' + elif '.cl21.' in shader: + return '210' + elif '.cl20.' in shader: + return '200' + else: + return '120' + +ignore_clang = False +def validate_shader_opencl(shader, opt, paths): + shader = reference_path(shader[0], shader[1], opt) + extensions = [] + + global ignore_clang + try: + defines = ['-D' + ext for ext in extensions] + version = path_to_opencl_standard_cli(shader) + subprocess.check_call([paths.clang, '-Xclang', + path_to_opencl_standard(shader), + '-D__OPENCL_C_VERSION__=' + version, + '-D__OPENCL_VERSION__=' + version] + defines + + [ + '-emit-llvm', '-target', 'spir64-unknown-unknown', + '-Xclang', '-finclude-default-header', '-x', 'cl', '-c', shader]) + + except OSError as oe: + if (oe.errno != errno.ENOENT): # Ignore clang not found error + raise + print('clang does not exist, ignoring further attempts to use it.') + ignore_clang = True + except subprocess.CalledProcessError: + print('Error compiling OpenCL kernel: ' + shader) + raise RuntimeError('Failed to compile OpenCL kernel') + +def cross_compile_opencl(shader, spirv, opt, iterations, paths): + spirv_path = create_temporary() + opencl_path = create_temporary(os.path.basename(shader)) + + spirv_16 = '.spv16.' in shader + spirv_14 = '.spv14.' in shader + + if spirv_16: + spirv_env = 'spv1.6' + glslang_env = 'vulkan1.3' + elif spirv_14: + spirv_env = 'vulkan1.1spv1.4' + glslang_env = 'spirv1.4' + else: + spirv_env = 'vulkan1.1' + glslang_env = 'vulkan1.1' + + spirv_cmd = [paths.spirv_as, '--preserve-numeric-ids', '--target-env', spirv_env, '-o', spirv_path, shader] + + if spirv: + subprocess.check_call(spirv_cmd) + else: + glslang_cmd = [paths.glslang, '--amb' ,'--target-env', glslang_env, '-V', '-o', spirv_path, shader] + if '.g.' in shader: + glslang_cmd.append('-g') + if '.gV.' in shader: + glslang_cmd.append('-gV') + subprocess.check_call(glslang_cmd) + + if opt and (not shader_is_invalid_spirv(shader)): + if '.graphics-robust-access.' in shader: + subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '--graphics-robust-access', '-o', spirv_path, spirv_path]) + else: + subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path]) + + spirv_cross_path = paths.spirv_cross + + opencl_args = [spirv_cross_path, '--output', opencl_path, spirv_path, '--opencl', '--iterations', str(iterations)] + opencl_args.append('--opencl-version') + opencl_args.append(path_to_opencl_standard_cli(shader)) + + subprocess.check_call(opencl_args) + + if not shader_is_invalid_spirv(opencl_path): + subprocess.check_call([paths.spirv_val, '--allow-localsizeid', '--scalar-block-layout', '--target-env', spirv_env, spirv_path]) + + return (spirv_path, opencl_path) + def cross_compile_reflect(shader, spirv, opt, iterations, paths): spirv_path = create_temporary() reflect_path = create_temporary(os.path.basename(shader)) @@ -942,6 +1041,24 @@ def test_shader_hlsl(stats, shader, args, paths): regression_check(shader, hlsl, args) remove_file(spirv) +def test_shader_opencl(stats, shader, args, paths): + joined_path = os.path.join(shader[0], shader[1]) + + if os.path.splitext(joined_path)[1] == '.cl': + return + + print('Testing OpenCL kernel:', joined_path) + is_spirv = shader_is_spirv(shader[1]) + noopt = shader_is_noopt(shader[1]) + spirv, opencl = cross_compile_opencl(joined_path, is_spirv, args.opt and (not noopt), args.iterations, paths) + regression_check(shader, opencl, args) + + skip_validation = '.invalid.' in joined_path + if (not args.force_no_external_validation) and (not skip_validation): + validate_shader_opencl(shader, args.opt, paths) + + remove_file(spirv) + def test_shader_reflect(stats, shader, args, paths): joined_path = os.path.join(shader[0], shader[1]) print('Testing shader reflection:', joined_path) @@ -952,12 +1069,14 @@ def test_shader_reflect(stats, shader, args, paths): remove_file(spirv) def test_shader_file(relpath, stats, args, backend): - paths = Paths(args.spirv_cross, args.glslang, args.spirv_as, args.spirv_val, args.spirv_opt) + paths = Paths(args.spirv_cross, args.glslang, args.spirv_as, args.spirv_val, args.spirv_opt, args.clang) try: if backend == 'msl': test_shader_msl(stats, (args.folder, relpath), args, paths) elif backend == 'hlsl': test_shader_hlsl(stats, (args.folder, relpath), args, paths) + elif backend == 'opencl': + test_shader_opencl(stats, (args.folder, relpath), args, paths) elif backend == 'reflect': test_shader_reflect(stats, (args.folder, relpath), args, paths) else: @@ -1033,6 +1152,9 @@ def main(): parser.add_argument('--hlsl', action = 'store_true', help = 'Test HLSL backend.') + parser.add_argument('--opencl', + action = 'store_true', + help = 'Test OpenCL backend.') parser.add_argument('--force-no-external-validation', action = 'store_true', help = 'Disable all external validation.') @@ -1060,6 +1182,9 @@ def main(): parser.add_argument('--spirv-opt', default = 'spirv-opt', help = 'Explicit path to spirv-opt') + parser.add_argument('--clang', + default = 'clang', + help = 'Explicit path to clang') parser.add_argument('--iterations', default = 1, type = int, @@ -1082,6 +1207,8 @@ def main(): backend = 'msl' elif args.hlsl: backend = 'hlsl' + elif args.opencl: + backend = 'opencl' elif args.reflect: backend = 'reflect' From 5c943a9383d49bf6c3fd15b4c7fabe474cee6efd Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Thu, 12 Mar 2026 18:17:18 -0700 Subject: [PATCH 02/16] Adding shaders-opencl with more OpenCL backend support --- Package.swift | 2 + main.cpp | 6 + ...rent-structured-buffer.structured.asm.frag | 42 +- ...rent-structured-buffer.structured.asm.frag | 42 +- .../asm/comp/atomic-decrement.asm.comp | 19 + .../asm/comp/atomic-increment.asm.comp | 19 + .../asm/comp/bitcast_iadd.asm.comp | 32 + .../asm/comp/bitcast_icmp.asm.comp | 32 + .../asm/comp/bitcast_sar.asm.comp | 34 + .../asm/comp/bitcast_sdiv.asm.comp | 34 + .../asm/comp/bitcast_slr.asm.comp | 34 + .../asm/comp/block-name-alias-global.asm.comp | 48 + .../comp/buffer-write-relative-addr.asm.comp | 24 + .../asm/comp/buffer-write.asm.comp | 16 + .../comp/copy-object-ssbo-to-ssbo.asm.comp | 24 + .../asm/comp/copy-object-ubo-to-ssbo.asm.comp | 24 + .../asm/comp/duplicate-spec-id.asm.comp | 26 + .../asm/comp/fma.spv16.asm.comp | 23 + .../comp/global-parameter-name-alias.asm.comp | 30 + ...e-load-store-short-vector.invalid.asm.comp | 18 + ...p-spec-constant-op-vector-related.asm.comp | 78 + .../shaders-opencl/asm/comp/quantize.asm.comp | 35 + .../asm/comp/relaxed-block-layout.asm.comp | 23 + .../comp/replicated-composites.spv16.asm.comp | 28 + ...specialization-constant-workgroup.asm.comp | 26 + .../struct-resource-name-aliasing.asm.comp | 22 + .../asm/comp/uint_smulextended.asm.comp | 28 + .../undefined-constant-composite.asm.comp | 40 + ...undefined-spec-constant-composite.asm.comp | 46 + .../asm/comp/variable-pointers-2.asm.comp | 68 + ...ariable-pointers-store-forwarding.asm.comp | 35 + .../vector-builtin-type-cast-func.asm.comp | 34 + .../comp/vector-builtin-type-cast.asm.comp | 24 + .../access-private-workgroup-in-function.comp | 36 + reference/shaders-opencl/comp/arguments.comp | 25 + reference/shaders-opencl/comp/atomic.comp | 53 + reference/shaders-opencl/comp/barriers.comp | 80 + reference/shaders-opencl/comp/basic.comp | 37 + .../comp/basic.dispatchbase.comp | 43 + .../comp/buffer-push-const.comp | 25 + reference/shaders-opencl/comp/builtins.comp | 15 + .../comp/cfg-preserve-parameter.comp | 75 + .../comp/complex-type-alias.comp | 56 + .../comp/composite-construct.comp | 37 + reference/shaders-opencl/comp/culling.comp | 37 + .../shaders-opencl/comp/defer-parens.comp | 25 + reference/shaders-opencl/comp/dowhile.comp | 34 + .../shaders-opencl/comp/expect-assume.comp | 17 + .../comp/force-recompile-hooks.swizzle.comp | 12 + reference/shaders-opencl/comp/functions.comp | 15 + .../comp/global-invocation-id.comp | 18 + reference/shaders-opencl/comp/image.comp | 11 + reference/shaders-opencl/comp/insert.comp | 23 + .../comp/local-invocation-id.comp | 18 + .../comp/local-invocation-index.comp | 18 + .../comp/local-size-duplicate-spec-id.comp | 42 + reference/shaders-opencl/comp/mod.comp | 30 + reference/shaders-opencl/comp/modf.comp | 37 + .../shaders-opencl/comp/outer-product.comp | 42 + .../shaders-opencl/comp/packing-test-1.comp | 36 + .../shaders-opencl/comp/packing-test-2.comp | 33 + .../shaders-opencl/comp/read-write-only.comp | 35 + reference/shaders-opencl/comp/rmw-opt.comp | 31 + ...alar-std450-distance-length-normalize.comp | 25 + .../comp/shared-std450.double.comp | 31 + .../comp/shared-struct-bool-cast.comp | 106 + .../comp/shared-zero-init-simple.comp | 27 + .../shaders-opencl/comp/shared-zero-init.comp | 32 + reference/shaders-opencl/comp/shared.comp | 30 + .../comp/spec-constant-work-group-size.comp | 39 + .../shaders-opencl/comp/struct-layout.comp | 32 + .../shaders-opencl/comp/struct-nested.comp | 33 + .../comp/struct-packing.invalid.comp | 0 .../shaders-opencl/comp/torture-loop.comp | 55 + reference/shaders-opencl/comp/type-alias.comp | 61 + reference/shaders-opencl/comp/udiv.comp | 24 + .../shaders-opencl/comp/writable-ssbo.comp | 18 + .../asm/comp/atomic-decrement.asm.comp | 71 + .../asm/comp/atomic-increment.asm.comp | 71 + shaders-opencl/asm/comp/bitcast_iadd.asm.comp | 79 + shaders-opencl/asm/comp/bitcast_icmp.asm.comp | 101 + shaders-opencl/asm/comp/bitcast_sar.asm.comp | 77 + shaders-opencl/asm/comp/bitcast_sdiv.asm.comp | 77 + shaders-opencl/asm/comp/bitcast_slr.asm.comp | 77 + .../asm/comp/block-name-alias-global.asm.comp | 119 ++ .../comp/buffer-write-relative-addr.asm.comp | 93 + shaders-opencl/asm/comp/buffer-write.asm.comp | 59 + .../comp/copy-object-ssbo-to-ssbo.asm.comp | 43 + .../asm/comp/copy-object-ubo-to-ssbo.asm.comp | 43 + .../asm/comp/duplicate-spec-id.asm.comp | 54 + shaders-opencl/asm/comp/fma.spv16.asm.comp | 65 + .../comp/global-parameter-name-alias.asm.comp | 102 + ...e-load-store-short-vector.invalid.asm.comp | 75 + ...p-spec-constant-op-vector-related.asm.comp | 107 + shaders-opencl/asm/comp/quantize.asm.comp | 67 + .../asm/comp/relaxed-block-layout.asm.comp | 108 + .../comp/replicated-composites.spv16.asm.comp | 81 + ...specialization-constant-workgroup.asm.comp | 47 + .../struct-resource-name-aliasing.asm.comp | 49 + .../asm/comp/uint_smulextended.asm.comp | 61 + .../undefined-constant-composite.asm.comp | 102 + ...undefined-spec-constant-composite.asm.comp | 122 ++ .../asm/comp/variable-pointers-2.asm.comp | 117 + ...ariable-pointers-store-forwarding.asm.comp | 75 + .../vector-builtin-type-cast-func.asm.comp | 147 ++ .../comp/vector-builtin-type-cast.asm.comp | 128 ++ .../access-private-workgroup-in-function.comp | 31 + shaders-opencl/comp/arguments.comp | 13 + shaders-opencl/comp/atomic.comp | 56 + shaders-opencl/comp/barriers.comp | 79 + shaders-opencl/comp/basic.comp | 28 + shaders-opencl/comp/basic.dispatchbase.comp | 29 + shaders-opencl/comp/buffer-push-const.comp | 9 + shaders-opencl/comp/builtins.comp | 12 + .../comp/cfg-preserve-parameter.comp | 54 + shaders-opencl/comp/complex-type-alias.comp | 41 + shaders-opencl/comp/composite-construct.comp | 31 + shaders-opencl/comp/culling.comp | 26 + shaders-opencl/comp/defer-parens.comp | 30 + shaders-opencl/comp/dowhile.comp | 31 + shaders-opencl/comp/expect-assume.comp | 19 + .../comp/force-recompile-hooks.swizzle.comp | 9 + shaders-opencl/comp/functions.comp | 12 + shaders-opencl/comp/global-invocation-id.comp | 9 + shaders-opencl/comp/image.comp | 12 + shaders-opencl/comp/insert.comp | 18 + shaders-opencl/comp/local-invocation-id.comp | 9 + .../comp/local-invocation-index.comp | 9 + .../comp/local-size-duplicate-spec-id.comp | 15 + shaders-opencl/comp/mod.comp | 26 + shaders-opencl/comp/modf.comp | 23 + shaders-opencl/comp/outer-product.comp | 37 + shaders-opencl/comp/packing-test-1.comp | 18 + shaders-opencl/comp/packing-test-2.comp | 16 + shaders-opencl/comp/read-write-only.comp | 26 + shaders-opencl/comp/rmw-opt.comp | 27 + ...alar-std450-distance-length-normalize.comp | 20 + shaders-opencl/comp/shared-std450.double.comp | 27 + .../comp/shared-struct-bool-cast.comp | 35 + .../comp/shared-zero-init-simple.comp | 24 + shaders-opencl/comp/shared-zero-init.comp | 28 + shaders-opencl/comp/shared.comp | 27 + .../comp/spec-constant-work-group-size.comp | 17 + shaders-opencl/comp/struct-layout.comp | 24 + shaders-opencl/comp/struct-nested.comp | 20 + .../comp/struct-packing.invalid.comp | 77 + shaders-opencl/comp/torture-loop.comp | 40 + shaders-opencl/comp/type-alias.comp | 45 + shaders-opencl/comp/udiv.comp | 17 + shaders-opencl/comp/writable-ssbo.comp | 9 + spirv_glsl.hpp | 64 +- spirv_opencl.cpp | 1874 +++++++++++++++-- spirv_opencl.hpp | 55 + test_shaders.py | 19 +- test_shaders.sh | 2 + 155 files changed, 7911 insertions(+), 210 deletions(-) create mode 100644 reference/shaders-opencl/asm/comp/atomic-decrement.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/atomic-increment.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/bitcast_iadd.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/bitcast_icmp.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/bitcast_sar.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/bitcast_sdiv.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/bitcast_slr.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/block-name-alias-global.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/buffer-write-relative-addr.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/buffer-write.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/copy-object-ubo-to-ssbo.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/duplicate-spec-id.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/fma.spv16.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/global-parameter-name-alias.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/op-spec-constant-op-vector-related.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/quantize.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/specialization-constant-workgroup.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/struct-resource-name-aliasing.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/uint_smulextended.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/undefined-constant-composite.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/undefined-spec-constant-composite.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/variable-pointers-2.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp create mode 100644 reference/shaders-opencl/asm/comp/vector-builtin-type-cast.asm.comp create mode 100644 reference/shaders-opencl/comp/access-private-workgroup-in-function.comp create mode 100644 reference/shaders-opencl/comp/arguments.comp create mode 100644 reference/shaders-opencl/comp/atomic.comp create mode 100644 reference/shaders-opencl/comp/barriers.comp create mode 100644 reference/shaders-opencl/comp/basic.comp create mode 100644 reference/shaders-opencl/comp/basic.dispatchbase.comp create mode 100644 reference/shaders-opencl/comp/buffer-push-const.comp create mode 100644 reference/shaders-opencl/comp/builtins.comp create mode 100644 reference/shaders-opencl/comp/cfg-preserve-parameter.comp create mode 100644 reference/shaders-opencl/comp/complex-type-alias.comp create mode 100644 reference/shaders-opencl/comp/composite-construct.comp create mode 100644 reference/shaders-opencl/comp/culling.comp create mode 100644 reference/shaders-opencl/comp/defer-parens.comp create mode 100644 reference/shaders-opencl/comp/dowhile.comp create mode 100644 reference/shaders-opencl/comp/expect-assume.comp create mode 100644 reference/shaders-opencl/comp/force-recompile-hooks.swizzle.comp create mode 100644 reference/shaders-opencl/comp/functions.comp create mode 100644 reference/shaders-opencl/comp/global-invocation-id.comp create mode 100644 reference/shaders-opencl/comp/image.comp create mode 100644 reference/shaders-opencl/comp/insert.comp create mode 100644 reference/shaders-opencl/comp/local-invocation-id.comp create mode 100644 reference/shaders-opencl/comp/local-invocation-index.comp create mode 100644 reference/shaders-opencl/comp/local-size-duplicate-spec-id.comp create mode 100644 reference/shaders-opencl/comp/mod.comp create mode 100644 reference/shaders-opencl/comp/modf.comp create mode 100644 reference/shaders-opencl/comp/outer-product.comp create mode 100644 reference/shaders-opencl/comp/packing-test-1.comp create mode 100644 reference/shaders-opencl/comp/packing-test-2.comp create mode 100644 reference/shaders-opencl/comp/read-write-only.comp create mode 100644 reference/shaders-opencl/comp/rmw-opt.comp create mode 100644 reference/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp create mode 100644 reference/shaders-opencl/comp/shared-std450.double.comp create mode 100644 reference/shaders-opencl/comp/shared-struct-bool-cast.comp create mode 100644 reference/shaders-opencl/comp/shared-zero-init-simple.comp create mode 100644 reference/shaders-opencl/comp/shared-zero-init.comp create mode 100644 reference/shaders-opencl/comp/shared.comp create mode 100644 reference/shaders-opencl/comp/spec-constant-work-group-size.comp create mode 100644 reference/shaders-opencl/comp/struct-layout.comp create mode 100644 reference/shaders-opencl/comp/struct-nested.comp create mode 100644 reference/shaders-opencl/comp/struct-packing.invalid.comp create mode 100644 reference/shaders-opencl/comp/torture-loop.comp create mode 100644 reference/shaders-opencl/comp/type-alias.comp create mode 100644 reference/shaders-opencl/comp/udiv.comp create mode 100644 reference/shaders-opencl/comp/writable-ssbo.comp create mode 100644 shaders-opencl/asm/comp/atomic-decrement.asm.comp create mode 100644 shaders-opencl/asm/comp/atomic-increment.asm.comp create mode 100644 shaders-opencl/asm/comp/bitcast_iadd.asm.comp create mode 100644 shaders-opencl/asm/comp/bitcast_icmp.asm.comp create mode 100644 shaders-opencl/asm/comp/bitcast_sar.asm.comp create mode 100644 shaders-opencl/asm/comp/bitcast_sdiv.asm.comp create mode 100644 shaders-opencl/asm/comp/bitcast_slr.asm.comp create mode 100644 shaders-opencl/asm/comp/block-name-alias-global.asm.comp create mode 100644 shaders-opencl/asm/comp/buffer-write-relative-addr.asm.comp create mode 100644 shaders-opencl/asm/comp/buffer-write.asm.comp create mode 100644 shaders-opencl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp create mode 100644 shaders-opencl/asm/comp/copy-object-ubo-to-ssbo.asm.comp create mode 100644 shaders-opencl/asm/comp/duplicate-spec-id.asm.comp create mode 100644 shaders-opencl/asm/comp/fma.spv16.asm.comp create mode 100644 shaders-opencl/asm/comp/global-parameter-name-alias.asm.comp create mode 100644 shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp create mode 100644 shaders-opencl/asm/comp/op-spec-constant-op-vector-related.asm.comp create mode 100644 shaders-opencl/asm/comp/quantize.asm.comp create mode 100644 shaders-opencl/asm/comp/relaxed-block-layout.asm.comp create mode 100644 shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp create mode 100644 shaders-opencl/asm/comp/specialization-constant-workgroup.asm.comp create mode 100644 shaders-opencl/asm/comp/struct-resource-name-aliasing.asm.comp create mode 100644 shaders-opencl/asm/comp/uint_smulextended.asm.comp create mode 100644 shaders-opencl/asm/comp/undefined-constant-composite.asm.comp create mode 100644 shaders-opencl/asm/comp/undefined-spec-constant-composite.asm.comp create mode 100644 shaders-opencl/asm/comp/variable-pointers-2.asm.comp create mode 100644 shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp create mode 100644 shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp create mode 100644 shaders-opencl/asm/comp/vector-builtin-type-cast.asm.comp create mode 100644 shaders-opencl/comp/access-private-workgroup-in-function.comp create mode 100644 shaders-opencl/comp/arguments.comp create mode 100644 shaders-opencl/comp/atomic.comp create mode 100644 shaders-opencl/comp/barriers.comp create mode 100644 shaders-opencl/comp/basic.comp create mode 100644 shaders-opencl/comp/basic.dispatchbase.comp create mode 100644 shaders-opencl/comp/buffer-push-const.comp create mode 100644 shaders-opencl/comp/builtins.comp create mode 100644 shaders-opencl/comp/cfg-preserve-parameter.comp create mode 100644 shaders-opencl/comp/complex-type-alias.comp create mode 100644 shaders-opencl/comp/composite-construct.comp create mode 100644 shaders-opencl/comp/culling.comp create mode 100644 shaders-opencl/comp/defer-parens.comp create mode 100644 shaders-opencl/comp/dowhile.comp create mode 100644 shaders-opencl/comp/expect-assume.comp create mode 100644 shaders-opencl/comp/force-recompile-hooks.swizzle.comp create mode 100644 shaders-opencl/comp/functions.comp create mode 100644 shaders-opencl/comp/global-invocation-id.comp create mode 100644 shaders-opencl/comp/image.comp create mode 100644 shaders-opencl/comp/insert.comp create mode 100644 shaders-opencl/comp/local-invocation-id.comp create mode 100644 shaders-opencl/comp/local-invocation-index.comp create mode 100644 shaders-opencl/comp/local-size-duplicate-spec-id.comp create mode 100644 shaders-opencl/comp/mod.comp create mode 100644 shaders-opencl/comp/modf.comp create mode 100644 shaders-opencl/comp/outer-product.comp create mode 100644 shaders-opencl/comp/packing-test-1.comp create mode 100644 shaders-opencl/comp/packing-test-2.comp create mode 100644 shaders-opencl/comp/read-write-only.comp create mode 100644 shaders-opencl/comp/rmw-opt.comp create mode 100644 shaders-opencl/comp/scalar-std450-distance-length-normalize.comp create mode 100644 shaders-opencl/comp/shared-std450.double.comp create mode 100644 shaders-opencl/comp/shared-struct-bool-cast.comp create mode 100644 shaders-opencl/comp/shared-zero-init-simple.comp create mode 100644 shaders-opencl/comp/shared-zero-init.comp create mode 100644 shaders-opencl/comp/shared.comp create mode 100644 shaders-opencl/comp/spec-constant-work-group-size.comp create mode 100644 shaders-opencl/comp/struct-layout.comp create mode 100644 shaders-opencl/comp/struct-nested.comp create mode 100644 shaders-opencl/comp/struct-packing.invalid.comp create mode 100644 shaders-opencl/comp/torture-loop.comp create mode 100644 shaders-opencl/comp/type-alias.comp create mode 100644 shaders-opencl/comp/udiv.comp create mode 100644 shaders-opencl/comp/writable-ssbo.comp diff --git a/Package.swift b/Package.swift index 99e601936..2ab54c018 100644 --- a/Package.swift +++ b/Package.swift @@ -47,6 +47,8 @@ let package = Package( "shaders-msl", "shaders-msl-no-opt", "shaders-no-opt", + "shaders-opencl", + "shaders-opencl-no-opt", "shaders-other", "shaders-reflection", "shaders-ue4", diff --git a/main.cpp b/main.cpp index adcfccbdd..2fc6ced5c 100644 --- a/main.cpp +++ b/main.cpp @@ -780,6 +780,8 @@ struct CLIArguments uint32_t opencl_version = 120; bool opencl_enable_fp64 = false; bool opencl_enable_64bit_atomics = false; + bool opencl_enable_subgroups = false; + bool opencl_enable_shuffle = false; }; static void print_version() @@ -1361,6 +1363,8 @@ static string compile_iteration(const CLIArguments &args, std::vector CompilerOpenCL::Options ocl_opts = ocl_comp->get_opencl_options(); ocl_opts.opencl_version = args.opencl_version; ocl_opts.enable_fp64 = args.opencl_enable_fp64; + ocl_opts.enable_subgroups = args.opencl_enable_subgroups; + ocl_opts.enable_shuffle = args.opencl_enable_shuffle; ocl_comp->set_opencl_options(ocl_opts); } else if (args.hlsl) @@ -1993,6 +1997,8 @@ static int main_inner(int argc, char *argv[]) cbs.add("--opencl-version", [&args](CLIParser &parser) { args.opencl_version = parser.next_uint(); }); cbs.add("--opencl-fp64", [&args](CLIParser &) { args.opencl_enable_fp64 = true; }); cbs.add("--opencl-64bit-atomics", [&args](CLIParser &) { args.opencl_enable_64bit_atomics = true; }); + cbs.add("--opencl-subgroups", [&args](CLIParser &) { args.opencl_enable_subgroups = true; }); + cbs.add("--opencl-shuffle", [&args](CLIParser &) { args.opencl_enable_shuffle = true; }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) diff --git a/reference/opt/shaders-hlsl/asm/frag/globally-coherent-structured-buffer.structured.asm.frag b/reference/opt/shaders-hlsl/asm/frag/globally-coherent-structured-buffer.structured.asm.frag index 996d5f6fb..23994db3a 100644 --- a/reference/opt/shaders-hlsl/asm/frag/globally-coherent-structured-buffer.structured.asm.frag +++ b/reference/opt/shaders-hlsl/asm/frag/globally-coherent-structured-buffer.structured.asm.frag @@ -1,21 +1,21 @@ -globallycoherent RWStructuredBuffer TestBuffer : register(u0); - -static float4 out_var_SV_Target0; - -struct SPIRV_Cross_Output -{ - float4 out_var_SV_Target0 : SV_Target0; -}; - -void frag_main() -{ - out_var_SV_Target0 = TestBuffer[0u]; -} - -SPIRV_Cross_Output main() -{ - frag_main(); - SPIRV_Cross_Output stage_output; - stage_output.out_var_SV_Target0 = out_var_SV_Target0; - return stage_output; -} +globallycoherent RWStructuredBuffer TestBuffer : register(u0); + +static float4 out_var_SV_Target0; + +struct SPIRV_Cross_Output +{ + float4 out_var_SV_Target0 : SV_Target0; +}; + +void frag_main() +{ + out_var_SV_Target0 = TestBuffer[0u]; +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.out_var_SV_Target0 = out_var_SV_Target0; + return stage_output; +} diff --git a/reference/shaders-hlsl/asm/frag/globally-coherent-structured-buffer.structured.asm.frag b/reference/shaders-hlsl/asm/frag/globally-coherent-structured-buffer.structured.asm.frag index 996d5f6fb..23994db3a 100644 --- a/reference/shaders-hlsl/asm/frag/globally-coherent-structured-buffer.structured.asm.frag +++ b/reference/shaders-hlsl/asm/frag/globally-coherent-structured-buffer.structured.asm.frag @@ -1,21 +1,21 @@ -globallycoherent RWStructuredBuffer TestBuffer : register(u0); - -static float4 out_var_SV_Target0; - -struct SPIRV_Cross_Output -{ - float4 out_var_SV_Target0 : SV_Target0; -}; - -void frag_main() -{ - out_var_SV_Target0 = TestBuffer[0u]; -} - -SPIRV_Cross_Output main() -{ - frag_main(); - SPIRV_Cross_Output stage_output; - stage_output.out_var_SV_Target0 = out_var_SV_Target0; - return stage_output; -} +globallycoherent RWStructuredBuffer TestBuffer : register(u0); + +static float4 out_var_SV_Target0; + +struct SPIRV_Cross_Output +{ + float4 out_var_SV_Target0 : SV_Target0; +}; + +void frag_main() +{ + out_var_SV_Target0 = TestBuffer[0u]; +} + +SPIRV_Cross_Output main() +{ + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.out_var_SV_Target0 = out_var_SV_Target0; + return stage_output; +} diff --git a/reference/shaders-opencl/asm/comp/atomic-decrement.asm.comp b/reference/shaders-opencl/asm/comp/atomic-decrement.asm.comp new file mode 100644 index 000000000..36a844495 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/atomic-decrement.asm.comp @@ -0,0 +1,19 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct u0_counters +{ + uint c; +}; + +typedef struct u0_counters u0_counters; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(write_only image1d_buffer_t u0, __global uint* u0_counter) +{ + uint _24 = atomic_add(&(u0_counter[0]), (uint)(-1)); + float4 r0; + r0.x = as_float(_24); + write_imageui(u0, as_int((as_uint(as_int(r0.x)) * 1u) + (as_uint(0) >> 2u)), (uint4)(as_uint(as_int(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x)))); +} + diff --git a/reference/shaders-opencl/asm/comp/atomic-increment.asm.comp b/reference/shaders-opencl/asm/comp/atomic-increment.asm.comp new file mode 100644 index 000000000..4c9563240 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/atomic-increment.asm.comp @@ -0,0 +1,19 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct u0_counters +{ + uint c; +}; + +typedef struct u0_counters u0_counters; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(write_only image1d_buffer_t u0, __global uint* u0_counter) +{ + uint _24 = atomic_add(&(u0_counter[0]), 1u); + float4 r0; + r0.x = as_float(_24); + write_imageui(u0, as_int((as_uint(as_int(r0.x)) * 1u) + (as_uint(0) >> 2u)), (uint4)(as_uint(as_int(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x)))); +} + diff --git a/reference/shaders-opencl/asm/comp/bitcast_iadd.asm.comp b/reference/shaders-opencl/asm/comp/bitcast_iadd.asm.comp new file mode 100644 index 000000000..5c0520b3a --- /dev/null +++ b/reference/shaders-opencl/asm/comp/bitcast_iadd.asm.comp @@ -0,0 +1,32 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +typedef struct _3 _3; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +typedef struct _4 _4; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _3* _5, __global _4* _6) +{ + _6->_m0 = _5->_m1 + as_uint4(_5->_m0); + _6->_m0 = as_uint4(_5->_m0) + _5->_m1; + _6->_m0 = _5->_m1 + _5->_m1; + _6->_m0 = as_uint4(_5->_m0 + _5->_m0); + _6->_m1 = as_int4(_5->_m1 + _5->_m1); + _6->_m1 = _5->_m0 + _5->_m0; + _6->_m1 = as_int4(_5->_m1) + _5->_m0; + _6->_m1 = _5->_m0 + as_int4(_5->_m1); +} + diff --git a/reference/shaders-opencl/asm/comp/bitcast_icmp.asm.comp b/reference/shaders-opencl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 000000000..c2195a52c --- /dev/null +++ b/reference/shaders-opencl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,32 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +typedef struct _3 _3; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +typedef struct _4 _4; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _3* _5, __global _4* _6) +{ + _6->_m0 = select((uint4)(0u), (uint4)(1u), as_int4(_5->_m1) < _5->_m0); + _6->_m0 = select((uint4)(0u), (uint4)(1u), as_int4(_5->_m1) <= _5->_m0); + _6->_m0 = select((uint4)(0u), (uint4)(1u), _5->_m1 < as_uint4(_5->_m0)); + _6->_m0 = select((uint4)(0u), (uint4)(1u), _5->_m1 <= as_uint4(_5->_m0)); + _6->_m0 = select((uint4)(0u), (uint4)(1u), as_int4(_5->_m1) > _5->_m0); + _6->_m0 = select((uint4)(0u), (uint4)(1u), as_int4(_5->_m1) >= _5->_m0); + _6->_m0 = select((uint4)(0u), (uint4)(1u), _5->_m1 > as_uint4(_5->_m0)); + _6->_m0 = select((uint4)(0u), (uint4)(1u), _5->_m1 >= as_uint4(_5->_m0)); +} + diff --git a/reference/shaders-opencl/asm/comp/bitcast_sar.asm.comp b/reference/shaders-opencl/asm/comp/bitcast_sar.asm.comp new file mode 100644 index 000000000..93916384b --- /dev/null +++ b/reference/shaders-opencl/asm/comp/bitcast_sar.asm.comp @@ -0,0 +1,34 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +typedef struct _3 _3; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +typedef struct _4 _4; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _3* _5, __global _4* _6) +{ + int4 _22 = _5->_m0; + uint4 _23 = _5->_m1; + _6->_m0 = as_uint4(as_int4(_23) >> _22); + _6->_m0 = as_uint4(_22 >> as_int4(_23)); + _6->_m0 = as_uint4(as_int4(_23) >> as_int4(_23)); + _6->_m0 = as_uint4(_22 >> _22); + _6->_m1 = as_int4(_23) >> as_int4(_23); + _6->_m1 = _22 >> _22; + _6->_m1 = as_int4(_23) >> _22; + _6->_m1 = _22 >> as_int4(_23); +} + diff --git a/reference/shaders-opencl/asm/comp/bitcast_sdiv.asm.comp b/reference/shaders-opencl/asm/comp/bitcast_sdiv.asm.comp new file mode 100644 index 000000000..f5a1a3a67 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/bitcast_sdiv.asm.comp @@ -0,0 +1,34 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +typedef struct _3 _3; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +typedef struct _4 _4; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _3* _5, __global _4* _6) +{ + int4 _22 = _5->_m0; + uint4 _23 = _5->_m1; + _6->_m0 = as_uint4(as_int4(_23) / _22); + _6->_m0 = as_uint4(_22 / as_int4(_23)); + _6->_m0 = as_uint4(as_int4(_23) / as_int4(_23)); + _6->_m0 = as_uint4(_22 / _22); + _6->_m1 = as_int4(_23) / as_int4(_23); + _6->_m1 = _22 / _22; + _6->_m1 = as_int4(_23) / _22; + _6->_m1 = _22 / as_int4(_23); +} + diff --git a/reference/shaders-opencl/asm/comp/bitcast_slr.asm.comp b/reference/shaders-opencl/asm/comp/bitcast_slr.asm.comp new file mode 100644 index 000000000..525761cc2 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/bitcast_slr.asm.comp @@ -0,0 +1,34 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +typedef struct _3 _3; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +typedef struct _4 _4; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _3* _5, __global _4* _6) +{ + int4 _22 = _5->_m0; + uint4 _23 = _5->_m1; + _6->_m0 = _23 >> as_uint4(_22); + _6->_m0 = as_uint4(_22) >> _23; + _6->_m0 = _23 >> _23; + _6->_m0 = as_uint4(_22) >> as_uint4(_22); + _6->_m1 = as_int4(_23 >> _23); + _6->_m1 = as_int4(as_uint4(_22) >> as_uint4(_22)); + _6->_m1 = as_int4(_23 >> as_uint4(_22)); + _6->_m1 = as_int4(as_uint4(_22) >> _23); +} + diff --git a/reference/shaders-opencl/asm/comp/block-name-alias-global.asm.comp b/reference/shaders-opencl/asm/comp/block-name-alias-global.asm.comp new file mode 100644 index 000000000..166f01b62 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/block-name-alias-global.asm.comp @@ -0,0 +1,48 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct A +{ + int a; + int b; +}; + +typedef struct A A; + +struct A_1 +{ + A Data[1]; +}; + +typedef struct A_1 A_1; + +struct A_2 +{ + A Data[1024]; +}; + +typedef struct A_2 A_2; + +struct B +{ + A Data[1]; +}; + +typedef struct B B; + +struct B_1 +{ + A Data[1024]; +}; + +typedef struct B_1 B_1; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global A* C1, A_2 C2, __global A* C3, B_1 C4) +{ + C1[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].a = C2.Data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].a; + C1[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].b = C2.Data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].b; + C3[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].a = C4.Data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].a; + C3[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].b = C4.Data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].b; +} + diff --git a/reference/shaders-opencl/asm/comp/buffer-write-relative-addr.asm.comp b/reference/shaders-opencl/asm/comp/buffer-write-relative-addr.asm.comp new file mode 100644 index 000000000..d69fb8a51 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/buffer-write-relative-addr.asm.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct cb5_struct +{ + float4 _m0[5]; +}; + +typedef struct cb5_struct cb5_struct; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(cb5_struct cb0_5, write_only image1d_buffer_t u0) +{ + float4 r0_1; + r0_1.x = as_float(as_int(((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))).x) << 4); + r0_1.y = as_float(as_int(((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))).x)); + uint _41 = as_uint(r0_1.x) >> 2u; + uint4 _50 = as_uint4(cb0_5._m0[as_uint(as_int(r0_1.y)) + 1u]); + write_imageui(u0, as_int(_41), _50.xxxx); + write_imageui(u0, as_int(_41 + 1u), _50.yyyy); + write_imageui(u0, as_int(_41 + 2u), _50.zzzz); + write_imageui(u0, as_int(_41 + 3u), _50.wwww); +} + diff --git a/reference/shaders-opencl/asm/comp/buffer-write.asm.comp b/reference/shaders-opencl/asm/comp/buffer-write.asm.comp new file mode 100644 index 000000000..ce88fd4e3 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/buffer-write.asm.comp @@ -0,0 +1,16 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct cb +{ + float value; +}; + +typedef struct cb cb; + +__attribute__((reqd_work_group_size(32, 1, 1))) +__kernel void comp_main(cb _8, write_only image1d_buffer_t _buffer) +{ + write_imagef(_buffer, as_int((32u * ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x) + ((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))), (float4)(_8.value)); +} + diff --git a/reference/shaders-opencl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp b/reference/shaders-opencl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp new file mode 100644 index 000000000..8da6f6cfa --- /dev/null +++ b/reference/shaders-opencl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _19 +{ +}; +typedef struct _19 _19; + +struct _5 +{ + int _m0; + _19 _m1; + _19 _m2; + int _m3; +}; + +typedef struct _5 _5; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _5* _3, __global _5* _4) +{ + *_4 = (*_3); +} + diff --git a/reference/shaders-opencl/asm/comp/copy-object-ubo-to-ssbo.asm.comp b/reference/shaders-opencl/asm/comp/copy-object-ubo-to-ssbo.asm.comp new file mode 100644 index 000000000..3ab995c11 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/copy-object-ubo-to-ssbo.asm.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _19 +{ +}; +typedef struct _19 _19; + +struct _5 +{ + int _m0; + _19 _m1; + _19 _m2; + int _m3; +}; + +typedef struct _5 _5; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(_5 _3, __global _5* _4) +{ + *_4 = _3; +} + diff --git a/reference/shaders-opencl/asm/comp/duplicate-spec-id.asm.comp b/reference/shaders-opencl/asm/comp/duplicate-spec-id.asm.comp new file mode 100644 index 000000000..177a60dc6 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/duplicate-spec-id.asm.comp @@ -0,0 +1,26 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct StorageBuffer +{ + float values[1]; +}; + +typedef struct StorageBuffer StorageBuffer; + +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 1 +#endif +constant int foo = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 2.0f +#endif +constant float bar = SPIRV_CROSS_CONSTANT_ID_0; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float* ssbo) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + ssbo[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = convert_float(foo) + bar; +} + diff --git a/reference/shaders-opencl/asm/comp/fma.spv16.asm.comp b/reference/shaders-opencl/asm/comp/fma.spv16.asm.comp new file mode 100644 index 000000000..9343d7f25 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/fma.spv16.asm.comp @@ -0,0 +1,23 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO +{ + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float4* _4, __global const float4* _6) +{ + _4[0] = fma(_6[0], _6[1], _6[1]); +} + diff --git a/reference/shaders-opencl/asm/comp/global-parameter-name-alias.asm.comp b/reference/shaders-opencl/asm/comp/global-parameter-name-alias.asm.comp new file mode 100644 index 000000000..f3caf7d90 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/global-parameter-name-alias.asm.comp @@ -0,0 +1,30 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct ssbo +{ + uint _data[1]; +}; + +typedef struct ssbo ssbo; + +void Load( uint* size, __global const uint* ssbo) +{ + int byteAddrTemp = as_int((*size) >> as_uint(2)); + uint4 data = (uint4)(ssbo[byteAddrTemp], ssbo[byteAddrTemp + 1], ssbo[byteAddrTemp + 2], ssbo[byteAddrTemp + 3]); +} + +void _main( uint3* id, __global const uint* ssbo) +{ + uint param = 4u; + Load(¶m, ssbo); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const uint* ssbo) +{ + uint3 id_1 = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))); + uint3 param_1 = id_1; + _main(¶m_1, ssbo); +} + diff --git a/reference/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp b/reference/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp new file mode 100644 index 000000000..f7d65805e --- /dev/null +++ b/reference/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp @@ -0,0 +1,18 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +void _main( uint3* id) +{ + float2 loaded = read_imagef(TargetTexture, as_int2((*id).xy)).xy; + float2 storeTemp = loaded + (float2)(1.0f); + write_imagef(TargetTexture, as_int2((*id).xy + (uint2)(1u)), (float4)(storeTemp)); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(write_only image2d_t TargetTexture) +{ + uint3 id_1 = ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))); + uint3 param = id_1; + _main(¶m); +} + diff --git a/reference/shaders-opencl/asm/comp/op-spec-constant-op-vector-related.asm.comp b/reference/shaders-opencl/asm/comp/op-spec-constant-op-vector-related.asm.comp new file mode 100644 index 000000000..463f75b99 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/op-spec-constant-op-vector-related.asm.comp @@ -0,0 +1,78 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _29 +{ + int _m0[3][3]; +}; + +typedef struct _29 _29; + +struct _7 +{ + int _m0[1]; +}; + +typedef struct _7 _7; + +constant int3 _32 = (int3)(0); +constant int _33[3] = { 0, 0, 0 }; +constant int _34[3][3] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } }; +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 0 +#endif +constant int _3 = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 0 +#endif +constant int _4 = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 0 +#endif +constant int _5 = SPIRV_CROSS_CONSTANT_ID_2; +#define _36 ({ _3, 0, 0 }) +#define _37 ({ _3, _4, 0 }) +#define _38 ({ _3, _4, _5 }) +#define _39 ({ _4, 0, 0 }) +#define _40 ({ _4, _5, 0 }) +#define _41 ({ _4, _5, _3 }) +#define _42 ({ _5, 0, 0 }) +#define _43 ({ _5, _3, 0 }) +#define _44 ({ _5, _3, _4 }) +#define _45 ({ { _3, _4, _5 }, { 0, 0, 0 }, { 0, 0, 0 } }) +#define _46 ({ { _3, _4, _5 }, { _4, _5, _3 }, { 0, 0, 0 } }) +#define _47 ({ { _3, _4, _5 }, { _4, _5, _3 }, { _5, _3, _4 } }) +#define _48 ((_29){ { { _3, _4, _5 }, { _4, _5, _3 }, { _5, _3, _4 } } }) +#define _49 ((_29){ { { _3, _4, _5 }, { _4, _5, _5 }, { _5, _3, _4 } } }) +#define _50 (_48._m0[0][0]) +#define _51 (_48._m0[1][0]) +#define _52 (_48._m0[0][1]) +#define _53 (_48._m0[2][2]) +#define _54 (_48._m0[2][0]) +#define _55 (_48._m0[1][1]) +#define _56 ((_50 == _51)) +#define _57 ((_52 == _53)) +#define _58 ((_54 == _55)) +#define _59 ((int)(_56)) +#define _60 ((int)(_57)) +#define _61 (_58 ? 2 : 1) +#define _62 ((int3)(_3, 0, 0)) +#define _63 ((int3)(0, _4, 0)) +#define _64 ((int3)(0, 0, _5)) +#define _65 ((int3)(_62.x, 0, _62.z)) +#define _66 ((int3)(0, _63.y, _63.x)) +#define _67 ((int3)(_64.z, 0, _64.z)) +#define _68 ((int3)(_65.y, _65.x, _66.y)) +#define _69 ((int3)(_67.z, _68.y, _68.z)) +#define _70 (_69.x) +#define _71 (_69.y) +#define _72 (_69.z) +#define _73 ((_70 - _71)) +#define _74 ((_73 * _72)) + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global int* _8, __global int* _9) +{ + _9[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _8[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] + ((((1 - _59) * _60) * (_61 - 1)) * _74); +} + diff --git a/reference/shaders-opencl/asm/comp/quantize.asm.comp b/reference/shaders-opencl/asm/comp/quantize.asm.comp new file mode 100644 index 000000000..3743c7776 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/quantize.asm.comp @@ -0,0 +1,35 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO0 +{ + float scalar; + float2 vec2_val; + float3 vec3_val; + float4 vec4_val; +}; + +typedef struct SSBO0 SSBO0; + +uint spvPackHalf2x16(float2 v) { + uint r; + vstore_half(v.x, 0, (__private half *)&r); + vstore_half(v.y, 1, (__private half *)&r); + return r; +} + +float2 spvUnpackHalf2x16(uint u) { + const __private uint *p = &u; + return (float2)(vload_half(0, (const __private half *)p), + vload_half(1, (const __private half *)p)); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO0* _12) +{ + _12->scalar = spvUnpackHalf2x16(spvPackHalf2x16((float2)(_12->scalar, 0.0f))).x; + _12->vec2_val = spvUnpackHalf2x16(spvPackHalf2x16(_12->vec2_val)); + _12->vec3_val = (float3)(spvUnpackHalf2x16(spvPackHalf2x16(_12->vec3_val.xy)), spvUnpackHalf2x16(spvPackHalf2x16((float2)(_12->vec3_val.z, 0.0f))).x); + _12->vec4_val = (float4)(spvUnpackHalf2x16(spvPackHalf2x16(_12->vec4_val.xy)), spvUnpackHalf2x16(spvPackHalf2x16(_12->vec4_val.zw))); +} + diff --git a/reference/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp b/reference/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp new file mode 100644 index 000000000..ddae4bb54 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp @@ -0,0 +1,23 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct foo +{ + uint bar; + float3 baz; + uchar quux; + uchar4 blah; + half2 wibble; +}; + +typedef struct foo foo; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global foo* _10) +{ + _10->bar = ((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))).x; + _10->baz = convert_float3(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2)))); + _10->blah = convert_uchar4((uint4)(convert_uint4(_10->blah).xyz + ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))), 0u)); + _10->wibble = convert_half2(convert_float2(_10->wibble) * convert_float2(((uint3)(get_num_groups(0), get_num_groups(1), get_num_groups(2))).xy)); +} + diff --git a/reference/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp b/reference/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp new file mode 100644 index 000000000..5bcad0013 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp @@ -0,0 +1,28 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 0.0f +#endif +constant float spec_const = SPIRV_CROSS_CONSTANT_ID_0; +constant float4 _20 = (float4)(spec_const); +constant float _26[8] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }; + +struct UBO +{ + float uniform_float; +}; + +typedef struct UBO UBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(UBO ubo) +{ + float4 a = (float4)(0.0f); + float4 b = (float4)(1.0f); + float4 c = _20; + float4 d = (float4)(ubo.uniform_float); + float4 e = d; + float f[8] = {ubo.uniform_float, ubo.uniform_float, ubo.uniform_float, ubo.uniform_float, ubo.uniform_float, ubo.uniform_float, ubo.uniform_float, ubo.uniform_float}; +} + diff --git a/reference/shaders-opencl/asm/comp/specialization-constant-workgroup.asm.comp b/reference/shaders-opencl/asm/comp/specialization-constant-workgroup.asm.comp new file mode 100644 index 000000000..20235cb7f --- /dev/null +++ b/reference/shaders-opencl/asm/comp/specialization-constant-workgroup.asm.comp @@ -0,0 +1,26 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float a; +}; + +typedef struct SSBO SSBO; + +#ifndef SPIRV_CROSS_CONSTANT_ID_10 +#define SPIRV_CROSS_CONSTANT_ID_10 9u +#endif +constant uint _19 = SPIRV_CROSS_CONSTANT_ID_10; +#ifndef SPIRV_CROSS_CONSTANT_ID_12 +#define SPIRV_CROSS_CONSTANT_ID_12 4u +#endif +constant uint _21 = SPIRV_CROSS_CONSTANT_ID_12; +constant uint3 spvWorkgroupSize = (uint3)(_19, 20u, _21); + +__attribute__((reqd_work_group_size(9, 20, 4))) +__kernel void comp_main(__global float* _6) +{ + _6[0] += 1.0f; +} + diff --git a/reference/shaders-opencl/asm/comp/struct-resource-name-aliasing.asm.comp b/reference/shaders-opencl/asm/comp/struct-resource-name-aliasing.asm.comp new file mode 100644 index 000000000..bdcb6b78a --- /dev/null +++ b/reference/shaders-opencl/asm/comp/struct-resource-name-aliasing.asm.comp @@ -0,0 +1,22 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct bufA +{ + uint _data[1]; +}; + +typedef struct bufA bufA; + +void _main(__global uint* bufA, __global uint* bufB) +{ + bufA[0] = 0u; + bufB[0] = 0u; +} + +__attribute__((reqd_work_group_size(8, 8, 1))) +__kernel void comp_main(__global uint* bufA, __global uint* bufB) +{ + _main(bufA, bufB); +} + diff --git a/reference/shaders-opencl/asm/comp/uint_smulextended.asm.comp b/reference/shaders-opencl/asm/comp/uint_smulextended.asm.comp new file mode 100644 index 000000000..ab2d4a703 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/uint_smulextended.asm.comp @@ -0,0 +1,28 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _4 +{ + uint _m0[1]; +}; + +typedef struct _4 _4; + +struct _20 +{ + uint _m0; + uint _m1; +}; + +typedef struct _20 _20; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _5, __global uint* _6, __global uint* _7, __global uint* _8) +{ + _20 _28; + _28._m0 = _5[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] * _6[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + _28._m1 = mul_hi(_5[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x], _6[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]); + _7[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _28._m0; + _8[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _28._m1; +} + diff --git a/reference/shaders-opencl/asm/comp/undefined-constant-composite.asm.comp b/reference/shaders-opencl/asm/comp/undefined-constant-composite.asm.comp new file mode 100644 index 000000000..6a87c437b --- /dev/null +++ b/reference/shaders-opencl/asm/comp/undefined-constant-composite.asm.comp @@ -0,0 +1,40 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _20 +{ + int _m0; + int _m1; +}; + +typedef struct _20 _20; + +struct _5 +{ + int _m0[10]; +}; + +typedef struct _5 _5; + +struct _7 +{ + int _m0[10]; +}; + +typedef struct _7 _7; + +constant int _28 = 0; + +int _39( int* _41, _20* _42) +{ + return (*_41) + _42->_m1; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global int* _6, __global int* _8) +{ + int _32 = _8[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + _20 _33 = (_20){ _28, 200 }; + _6[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _39(&_32, &_33); +} + diff --git a/reference/shaders-opencl/asm/comp/undefined-spec-constant-composite.asm.comp b/reference/shaders-opencl/asm/comp/undefined-spec-constant-composite.asm.comp new file mode 100644 index 000000000..7ff37b8a2 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/undefined-spec-constant-composite.asm.comp @@ -0,0 +1,46 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _21 +{ + int _m0; + int _m1; +}; + +typedef struct _21 _21; + +struct _5 +{ + int _m0[10]; +}; + +typedef struct _5 _5; + +struct _7 +{ + int _m0[10]; +}; + +typedef struct _7 _7; + +constant int _29 = 0; +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 0 +#endif +constant int _9 = SPIRV_CROSS_CONSTANT_ID_0; +constant _21 _30 = (_21){ _9, _29 }; + +int _42( int* _44, _21* _45, _21* _46) +{ + return ((*_44) + _45->_m0) + _46->_m1; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global int* _6, __global int* _8) +{ + int _34 = _8[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + _21 _35 = _30; + _21 _36 = (_21){ _29, 200 }; + _6[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _42(&_34, &_35, &_36); +} + diff --git a/reference/shaders-opencl/asm/comp/variable-pointers-2.asm.comp b/reference/shaders-opencl/asm/comp/variable-pointers-2.asm.comp new file mode 100644 index 000000000..3bfb4fcbd --- /dev/null +++ b/reference/shaders-opencl/asm/comp/variable-pointers-2.asm.comp @@ -0,0 +1,68 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct foo +{ + int a[128]; + uint b; + float2 c; +}; + +typedef struct foo foo; + +struct bar +{ + int d; +}; + +typedef struct bar bar; + +__global foo* select_buffer(__global foo* a_1_1, bar cb) +{ + return (cb.d != 0) ? a_1_1 : NULL; +} + +__private uint3* select_input(__private uint3* _3_ptr, __private uint3* _4_ptr, bar cb) +{ + #define _3 (*_3_ptr) + #define _4 (*_4_ptr) + return (cb.d != 0) ? &_3 : &_4; + #undef _3 + #undef _4 +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global foo* buf, bar cb) +{ + uint3 _3 = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))); + uint3 _4 = ((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))); + __global foo* _44 = select_buffer((__global foo*)buf, cb); + __global foo* _65 = _44; + __private uint3* _45 = select_input(&_3, &_4, cb); + __global foo* _66 = _65; + __global int* _49; + __global int* _52; + _49 = &_66->a[0u]; + _52 = &buf->a[0u]; + int _54; + int _55; + for (;;) + { + _54 = *_49; + _55 = *_52; + if (_54 != _55) + { + int _63 = (_54 + _55) + as_int((*_45).x); + *_49 = _63; + *_52 = _63; + _49 = &_49[1u]; + _52 = &_52[1u]; + continue; + } + else + { + break; + } + } +} + diff --git a/reference/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp b/reference/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp new file mode 100644 index 000000000..cbc654c1c --- /dev/null +++ b/reference/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp @@ -0,0 +1,35 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct foo +{ + int a; +}; + +typedef struct foo foo; + +struct bar +{ + int b; +}; + +typedef struct bar bar; + +__global int* _24(__global foo* a_1, __global bar* b_1, __private uint3* _3_ptr) +{ + #define _3 (*_3_ptr) + return (_3.x != 0u) ? &a_1->a : &b_1->b; + #undef _3 +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global int* x, __global int* y) +{ + uint3 _3 = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))); + __global int* _34 = _24((__global foo*)x, (__global bar*)y, &_3); + __global int* _33 = _34; + int _37 = x[0]; + *_33 = 0; + y[0] = _37 + _37; +} + diff --git a/reference/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp b/reference/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp new file mode 100644 index 000000000..d4f5be9be --- /dev/null +++ b/reference/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp @@ -0,0 +1,34 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct cb1_struct +{ + float4 _RESERVED_IDENTIFIER_FIXUP_m0[1]; +}; + +typedef struct cb1_struct cb1_struct; + +int2 get_texcoord( int2* base, int2* index, __private int3* _RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID_ptr) +{ + #define _RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID (*_RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID_ptr) + return ((*base) * as_int3(_RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID).xy) + (*index); + #undef _RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID +} + +__attribute__((reqd_work_group_size(16, 16, 1))) +__kernel void comp_main(write_only image2d_t u0, cb1_struct cb0_1) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + int3 _RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID = as_int3(((uint3)(get_local_id(0), get_local_id(1), get_local_id(2)))); + int2 r0 = (int2)((int2)(get_image_width(u0), get_image_height(u0)) >> as_int2((uint2)(4u))); + for (int i = 0; i < r0.y; i++) + { + for (int j = 0; j < r0.x; j++) + { + int2 param = r0; + int2 param_1 = (int2)(i, j); + write_imagef(u0, get_texcoord(¶m, ¶m_1, &_RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID), cb0_1._RESERVED_IDENTIFIER_FIXUP_m0[0].xxxx); + } + } +} + diff --git a/reference/shaders-opencl/asm/comp/vector-builtin-type-cast.asm.comp b/reference/shaders-opencl/asm/comp/vector-builtin-type-cast.asm.comp new file mode 100644 index 000000000..22834dd8a --- /dev/null +++ b/reference/shaders-opencl/asm/comp/vector-builtin-type-cast.asm.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct cb1_struct +{ + float4 _RESERVED_IDENTIFIER_FIXUP_m0[1]; +}; + +typedef struct cb1_struct cb1_struct; + +__attribute__((reqd_work_group_size(16, 16, 1))) +__kernel void comp_main(write_only image2d_t u0, cb1_struct cb0_1) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + int2 r0 = (int2)((int2)(get_image_width(u0), get_image_height(u0)) >> as_int2((uint2)(4u))); + for (int i = 0; i < r0.y; i++) + { + for (int j = 0; j < r0.x; j++) + { + write_imagef(u0, (r0 * as_int3(((uint3)(get_local_id(0), get_local_id(1), get_local_id(2)))).xy) + (int2)(i, j), cb0_1._RESERVED_IDENTIFIER_FIXUP_m0[0].xxxx); + } + } +} + diff --git a/reference/shaders-opencl/comp/access-private-workgroup-in-function.comp b/reference/shaders-opencl/comp/access-private-workgroup-in-function.comp new file mode 100644 index 000000000..4aeedb66b --- /dev/null +++ b/reference/shaders-opencl/comp/access-private-workgroup-in-function.comp @@ -0,0 +1,36 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +void set_f(int* f_ptr) +{ + #define f (*f_ptr) + f = 40; + #undef f +} + +void set_shared_u(__local int* u_ptr) +{ + #define u (*u_ptr) + u = 50; + #undef u +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main() +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local int u; + int f; + set_f(&f); + set_shared_u(&u); + if (((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))) == 0u) + { + f = 10; + } + else + { + f = 30; + u = 20; + } +} + diff --git a/reference/shaders-opencl/comp/arguments.comp b/reference/shaders-opencl/comp/arguments.comp new file mode 100644 index 000000000..df95f2205 --- /dev/null +++ b/reference/shaders-opencl/comp/arguments.comp @@ -0,0 +1,25 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Buf +{ + uint data[1]; +}; + +typedef struct Buf Buf; + +struct parameter +{ + uint n; +}; + +typedef struct parameter parameter; + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void comp_main(__global uint* _19, parameter p) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint i = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + _19[i] = i + p.n; +} + diff --git a/reference/shaders-opencl/comp/atomic.comp b/reference/shaders-opencl/comp/atomic.comp new file mode 100644 index 000000000..5c5d824eb --- /dev/null +++ b/reference/shaders-opencl/comp/atomic.comp @@ -0,0 +1,53 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + uint u32; + int i32; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* ssbo) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local uint shared_u32; + __local int shared_i32; + uint _16 = atomic_add(&(ssbo->u32), 1u); + uint _18 = atomic_or(&(ssbo->u32), 1u); + uint _20 = atomic_xor(&(ssbo->u32), 1u); + uint _22 = atomic_and(&(ssbo->u32), 1u); + uint _24 = atomic_min(&(ssbo->u32), 1u); + uint _26 = atomic_max(&(ssbo->u32), 1u); + uint _28 = atomic_xchg(&(ssbo->u32), 1u); + uint _32 = atomic_cmpxchg(&(ssbo->u32), 10u, 2u); + int _36 = atomic_add(&(ssbo->i32), 1); + int _38 = atomic_or(&(ssbo->i32), 1); + int _40 = atomic_xor(&(ssbo->i32), 1); + int _42 = atomic_and(&(ssbo->i32), 1); + int _44 = atomic_min(&(ssbo->i32), 1); + int _46 = atomic_max(&(ssbo->i32), 1); + int _48 = atomic_xchg(&(ssbo->i32), 1); + int _52 = atomic_cmpxchg(&(ssbo->i32), 10, 2); + shared_u32 = 10u; + shared_i32 = 10; + uint _57 = atomic_add(&shared_u32, 1u); + uint _58 = atomic_or(&shared_u32, 1u); + uint _59 = atomic_xor(&shared_u32, 1u); + uint _60 = atomic_and(&shared_u32, 1u); + uint _61 = atomic_min(&shared_u32, 1u); + uint _62 = atomic_max(&shared_u32, 1u); + uint _63 = atomic_xchg(&shared_u32, 1u); + uint _64 = atomic_cmpxchg(&shared_u32, 10u, 2u); + int _65 = atomic_add(&shared_i32, 1); + int _66 = atomic_or(&shared_i32, 1); + int _67 = atomic_xor(&shared_i32, 1); + int _68 = atomic_and(&shared_i32, 1); + int _69 = atomic_min(&shared_i32, 1); + int _70 = atomic_max(&shared_i32, 1); + int _71 = atomic_xchg(&shared_i32, 1); + int _72 = atomic_cmpxchg(&shared_i32, 10, 2); +} + diff --git a/reference/shaders-opencl/comp/barriers.comp b/reference/shaders-opencl/comp/barriers.comp new file mode 100644 index 000000000..1a63caaf6 --- /dev/null +++ b/reference/shaders-opencl/comp/barriers.comp @@ -0,0 +1,80 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +void barrier_shared() +{ + mem_fence(CLK_LOCAL_MEM_FENCE); +} + +void full_barrier() +{ + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); +} + +void image_barrier() +{ + mem_fence(CLK_GLOBAL_MEM_FENCE); +} + +void buffer_barrier() +{ + mem_fence(CLK_GLOBAL_MEM_FENCE); +} + +void group_barrier() +{ + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); +} + +void barrier_shared_exec() +{ + mem_fence(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); +} + +void full_barrier_exec() +{ + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); +} + +void image_barrier_exec() +{ + mem_fence(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); +} + +void buffer_barrier_exec() +{ + mem_fence(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); +} + +void group_barrier_exec() +{ + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); +} + +void exec_barrier() +{ + barrier(CLK_LOCAL_MEM_FENCE); +} + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main() +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + barrier_shared(); + full_barrier(); + image_barrier(); + buffer_barrier(); + group_barrier(); + barrier_shared_exec(); + full_barrier_exec(); + image_barrier_exec(); + buffer_barrier_exec(); + group_barrier_exec(); + exec_barrier(); +} + diff --git a/reference/shaders-opencl/comp/basic.comp b/reference/shaders-opencl/comp/basic.comp new file mode 100644 index 000000000..541f0d8e2 --- /dev/null +++ b/reference/shaders-opencl/comp/basic.comp @@ -0,0 +1,37 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO3 +{ + uint counter; +}; + +typedef struct SSBO3 SSBO3; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const float4* _23, __global float4* _45, __global uint* _48) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + float4 idata = _23[ident]; + if (dot(idata, (float4)(1.0f, 5.0f, 6.0f, 2.0f)) > 8.19999980926513671875f) + { + uint _52 = atomic_add(&(_48[0]), 1u); + _45[_52] = idata; + } +} + diff --git a/reference/shaders-opencl/comp/basic.dispatchbase.comp b/reference/shaders-opencl/comp/basic.dispatchbase.comp new file mode 100644 index 000000000..fc994276b --- /dev/null +++ b/reference/shaders-opencl/comp/basic.dispatchbase.comp @@ -0,0 +1,43 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO3 +{ + uint counter; +}; + +typedef struct SSBO3 SSBO3; + +#ifndef SPIRV_CROSS_CONSTANT_ID_10 +#define SPIRV_CROSS_CONSTANT_ID_10 1u +#endif +constant uint _59 = SPIRV_CROSS_CONSTANT_ID_10; +constant uint3 spvWorkgroupSize = (uint3)(_59, 1u, 1u); + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const float4* _27, __global float4* _49, __global uint* _52) +{ + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + uint workgroup = ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x; + float4 idata = _27[ident]; + if (dot(idata, (float4)(1.0f, 5.0f, 6.0f, 2.0f)) > 8.19999980926513671875f) + { + uint _56 = atomic_add(&(_52[0]), 1u); + _49[_56] = idata; + } +} + diff --git a/reference/shaders-opencl/comp/buffer-push-const.comp b/reference/shaders-opencl/comp/buffer-push-const.comp new file mode 100644 index 000000000..df95f2205 --- /dev/null +++ b/reference/shaders-opencl/comp/buffer-push-const.comp @@ -0,0 +1,25 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Buf +{ + uint data[1]; +}; + +typedef struct Buf Buf; + +struct parameter +{ + uint n; +}; + +typedef struct parameter parameter; + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void comp_main(__global uint* _19, parameter p) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint i = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + _19[i] = i + p.n; +} + diff --git a/reference/shaders-opencl/comp/builtins.comp b/reference/shaders-opencl/comp/builtins.comp new file mode 100644 index 000000000..d0a877bee --- /dev/null +++ b/reference/shaders-opencl/comp/builtins.comp @@ -0,0 +1,15 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(8, 4, 2))) +__kernel void comp_main() +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint3 local_id = ((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))); + uint3 global_id = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))); + uint local_index = ((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))); + uint3 work_group_size = spvWorkgroupSize; + uint3 num_work_groups = ((uint3)(get_num_groups(0), get_num_groups(1), get_num_groups(2))); + uint3 work_group_id = ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))); +} + diff --git a/reference/shaders-opencl/comp/cfg-preserve-parameter.comp b/reference/shaders-opencl/comp/cfg-preserve-parameter.comp new file mode 100644 index 000000000..707d5ec40 --- /dev/null +++ b/reference/shaders-opencl/comp/cfg-preserve-parameter.comp @@ -0,0 +1,75 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +void out_test_0( int* cond_5, int* i_5) +{ + if ((*cond_5) == 0) + { + *i_5 = 40; + } + else + { + *i_5 = 60; + } +} + +void out_test_1( int* cond_1_1, int* i_1_1) +{ + switch ((*cond_1_1)) + { + case 40: + { + *i_1_1 = 40; + break; + } + default: + { + *i_1_1 = 70; + break; + } + } +} + +void inout_test_0( int* cond_2_1, int* i_2_1) +{ + if ((*cond_2_1) == 0) + { + *i_2_1 = 40; + } +} + +void inout_test_1( int* cond_3_1, int* i_3_1) +{ + switch ((*cond_3_1)) + { + case 40: + { + *i_3_1 = 40; + break; + } + } +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main() +{ + int cond_4_1 = 40; + int i_4_1 = 50; + int param_8 = cond_4_1; + int param_1_1 = i_4_1; + out_test_0(¶m_8, ¶m_1_1); + i_4_1 = param_1_1; + int param_2_1 = cond_4_1; + int param_3_1 = i_4_1; + out_test_1(¶m_2_1, ¶m_3_1); + i_4_1 = param_3_1; + int param_4_1 = cond_4_1; + int param_5_1 = i_4_1; + inout_test_0(¶m_4_1, ¶m_5_1); + i_4_1 = param_5_1; + int param_6_1 = cond_4_1; + int param_7_1 = i_4_1; + inout_test_1(¶m_6_1, ¶m_7_1); + i_4_1 = param_7_1; +} + diff --git a/reference/shaders-opencl/comp/complex-type-alias.comp b/reference/shaders-opencl/comp/complex-type-alias.comp new file mode 100644 index 000000000..137313959 --- /dev/null +++ b/reference/shaders-opencl/comp/complex-type-alias.comp @@ -0,0 +1,56 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Foo0 +{ + float a; +}; + +typedef struct Foo0 Foo0; + +struct Foo1 +{ + Foo0 a; +}; + +typedef struct Foo1 Foo1; + +struct Foo2 +{ + Foo1 a; + float weight; +}; + +typedef struct Foo2 Foo2; + +struct SSBO +{ + Foo2 outputs[1]; +}; + +typedef struct SSBO SSBO; + +void Zero( Foo0* v_1) +{ + v_1->a = 0.0f; +} + +__attribute__((reqd_work_group_size(8, 8, 1))) +__kernel void comp_main(__global Foo2* _53) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local Foo2 coeffs[64]; + Foo2 data_1; + data_1.weight = 0.0f; + Foo0 param_1; + Zero(¶m_1); + data_1.a.a = param_1; + coeffs[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = data_1; + barrier(CLK_LOCAL_MEM_FENCE); + if (((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))) == 0u) + { + _53[((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x].a.a.a = coeffs[0].a.a.a; + _53[((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x].weight = coeffs[0].weight; + } +} + diff --git a/reference/shaders-opencl/comp/composite-construct.comp b/reference/shaders-opencl/comp/composite-construct.comp new file mode 100644 index 000000000..995603ba4 --- /dev/null +++ b/reference/shaders-opencl/comp/composite-construct.comp @@ -0,0 +1,37 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO0 +{ + float4 as[1]; +}; + +typedef struct SSBO0 SSBO0; + +struct SSBO1 +{ + float4 bs[1]; +}; + +typedef struct SSBO1 SSBO1; + +constant float4 _43[2] = { (float4)(20.0f), (float4)(40.0f) }; + +struct Composite +{ + float4 a; + float4 b; +}; + +typedef struct Composite Composite; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float4* _16, __global float4* _32) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float4 values[2] = { _16[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x], _32[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] }; + Composite c = (Composite){ values[0], _43[1] }; + _16[0] = values[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]; + _32[1] = c.b; +} + diff --git a/reference/shaders-opencl/comp/culling.comp b/reference/shaders-opencl/comp/culling.comp new file mode 100644 index 000000000..c6286177f --- /dev/null +++ b/reference/shaders-opencl/comp/culling.comp @@ -0,0 +1,37 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO3 +{ + uint count; +}; + +typedef struct SSBO3 SSBO3; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global const float* _22, __global float* _38, __global uint* _41) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + float idata = _22[ident]; + if (idata > 12.0f) + { + uint _45 = atomic_add(&(_41[0]), 1u); + _38[_45] = idata; + } +} + diff --git a/reference/shaders-opencl/comp/defer-parens.comp b/reference/shaders-opencl/comp/defer-parens.comp new file mode 100644 index 000000000..90d8ebb41 --- /dev/null +++ b/reference/shaders-opencl/comp/defer-parens.comp @@ -0,0 +1,25 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 data; + int index; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _13) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float4 d = _13->data; + _13->data = (float4)(d.x, d.yz + (float2)(10.0f), d.w); + _13->data = (d + d) + d; + _13->data = (d.yz + (float2)(10.0f)).xxyy; + float t = (d.yz + (float2)(10.0f)).y; + _13->data = (float4)(t); + t = (d.zw + (float2)(10.0f))[_13->index]; + _13->data = (float4)(t); +} + diff --git a/reference/shaders-opencl/comp/dowhile.comp b/reference/shaders-opencl/comp/dowhile.comp new file mode 100644 index 000000000..2dca8bcda --- /dev/null +++ b/reference/shaders-opencl/comp/dowhile.comp @@ -0,0 +1,34 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 mvp; + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const SSBO* _28, __global float4* _52) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + int i; + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + i = 0; + float4 idat = _28->in_data[ident]; + do + { + idat = _28->mvp * idat; + i++; + } while (i < 16); + _52[ident] = idat; +} + diff --git a/reference/shaders-opencl/comp/expect-assume.comp b/reference/shaders-opencl/comp/expect-assume.comp new file mode 100644 index 000000000..a9415be79 --- /dev/null +++ b/reference/shaders-opencl/comp/expect-assume.comp @@ -0,0 +1,17 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct buffer_t +{ + uint z; +}; + +typedef struct buffer_t buffer_t; + +__attribute__((reqd_work_group_size(32, 1, 1))) +__kernel void comp_main(__global uint* buf) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + buf[0] = ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).z; +} + diff --git a/reference/shaders-opencl/comp/force-recompile-hooks.swizzle.comp b/reference/shaders-opencl/comp/force-recompile-hooks.swizzle.comp new file mode 100644 index 000000000..c36ee1b4c --- /dev/null +++ b/reference/shaders-opencl/comp/force-recompile-hooks.swizzle.comp @@ -0,0 +1,12 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +const sampler_t spvDefaultSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(read_only image2d_t foo, write_only image2d_t bar) +{ + float4 a_1 = read_imagef(foo, spvDefaultSampler, (float2)(1.0f)); + write_imagef(bar, (int2)(0), a_1); +} + diff --git a/reference/shaders-opencl/comp/functions.comp b/reference/shaders-opencl/comp/functions.comp new file mode 100644 index 000000000..490fc907e --- /dev/null +++ b/reference/shaders-opencl/comp/functions.comp @@ -0,0 +1,15 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +void myfunc(__local int* foo) +{ + foo[0] = 13; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main() +{ + __local int foo[1337]; + myfunc(foo); +} + diff --git a/reference/shaders-opencl/comp/global-invocation-id.comp b/reference/shaders-opencl/comp/global-invocation-id.comp new file mode 100644 index 000000000..84693b0ee --- /dev/null +++ b/reference/shaders-opencl/comp/global-invocation-id.comp @@ -0,0 +1,18 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct myBlock +{ + int a; + float b[1]; +}; + +typedef struct myBlock myBlock; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global myBlock* myStorage) +{ + myStorage->a = (myStorage->a + 1) % 256; + myStorage->b[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] += 0.0199999995529651641845703125f; +} + diff --git a/reference/shaders-opencl/comp/image.comp b/reference/shaders-opencl/comp/image.comp new file mode 100644 index 000000000..4eca27cde --- /dev/null +++ b/reference/shaders-opencl/comp/image.comp @@ -0,0 +1,11 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(read_only image2d_t uImageIn, write_only image2d_t uImageOut) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float4 v = read_imagef(uImageIn, as_int2(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).xy) + (int2)(get_image_width(uImageIn), get_image_height(uImageIn))); + write_imagef(uImageOut, as_int2(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).xy), v); +} + diff --git a/reference/shaders-opencl/comp/insert.comp b/reference/shaders-opencl/comp/insert.comp new file mode 100644 index 000000000..64d372262 --- /dev/null +++ b/reference/shaders-opencl/comp/insert.comp @@ -0,0 +1,23 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 out_data[1]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float4* _27) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float4 v; + v.x = 10.0f; + v.y = 30.0f; + v.z = 70.0f; + v.w = 90.0f; + _27[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = v; + _27[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x][1u] = 20.0f; +} + diff --git a/reference/shaders-opencl/comp/local-invocation-id.comp b/reference/shaders-opencl/comp/local-invocation-id.comp new file mode 100644 index 000000000..0def2374c --- /dev/null +++ b/reference/shaders-opencl/comp/local-invocation-id.comp @@ -0,0 +1,18 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct myBlock +{ + int a; + float b[1]; +}; + +typedef struct myBlock myBlock; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global myBlock* myStorage) +{ + myStorage->a = (myStorage->a + 1) % 256; + myStorage->b[((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))).x] += 0.0199999995529651641845703125f; +} + diff --git a/reference/shaders-opencl/comp/local-invocation-index.comp b/reference/shaders-opencl/comp/local-invocation-index.comp new file mode 100644 index 000000000..0a1a8ed3c --- /dev/null +++ b/reference/shaders-opencl/comp/local-invocation-index.comp @@ -0,0 +1,18 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct myBlock +{ + int a; + float b[1]; +}; + +typedef struct myBlock myBlock; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global myBlock* myStorage) +{ + myStorage->a = (myStorage->a + 1) % 256; + myStorage->b[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += 0.0199999995529651641845703125f; +} + diff --git a/reference/shaders-opencl/comp/local-size-duplicate-spec-id.comp b/reference/shaders-opencl/comp/local-size-duplicate-spec-id.comp new file mode 100644 index 000000000..5f462293d --- /dev/null +++ b/reference/shaders-opencl/comp/local-size-duplicate-spec-id.comp @@ -0,0 +1,42 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct StorageBuffer +{ + uint values[1]; +}; + +typedef struct StorageBuffer StorageBuffer; + +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 1 +#endif +constant int local_size_x_val = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 1 +#endif +constant int local_size_y_val = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 1 +#endif +constant int local_size_z_val = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 1u +#endif +constant uint _22 = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 1u +#endif +constant uint _23 = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 1u +#endif +constant uint _24 = SPIRV_CROSS_CONSTANT_ID_2; +constant uint3 spvWorkgroupSize = (uint3)(_22, _23, _24); + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* ssbo) +{ + ssbo[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = 1u; +} + diff --git a/reference/shaders-opencl/comp/mod.comp b/reference/shaders-opencl/comp/mod.comp new file mode 100644 index 000000000..ad1ead0ed --- /dev/null +++ b/reference/shaders-opencl/comp/mod.comp @@ -0,0 +1,30 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const float4* _23, __global float4* _33) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + float4 v = fmod(_23[ident], _33[ident]); + _33[ident] = v; + uint4 vu = as_uint4(_23[ident]) % as_uint4(_33[ident]); + _33[ident] = as_float4(vu); + int4 vi = as_int4(_23[ident]) % as_int4(_33[ident]); + _33[ident] = as_float4(vi); +} + diff --git a/reference/shaders-opencl/comp/modf.comp b/reference/shaders-opencl/comp/modf.comp new file mode 100644 index 000000000..e890b4341 --- /dev/null +++ b/reference/shaders-opencl/comp/modf.comp @@ -0,0 +1,37 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct ResType +{ + float4 _m0; + float4 _m1; +}; + +typedef struct ResType ResType; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const float4* _23, __global float4* _38) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + ResType _32; + _32._m0 = modf(_23[ident], &_32._m1); + float4 i = _32._m1; + float4 v = _32._m0; + _38[ident] = v; +} + diff --git a/reference/shaders-opencl/comp/outer-product.comp b/reference/shaders-opencl/comp/outer-product.comp new file mode 100644 index 000000000..8441e6d2d --- /dev/null +++ b/reference/shaders-opencl/comp/outer-product.comp @@ -0,0 +1,42 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float2 m22; + float3 m23; + float4 m24; + float2 m32; + float3 m33; + float4 m34; + float2 m42; + float3 m43; + float4 m44; +}; + +typedef struct SSBO SSBO; + +struct ReadSSBO +{ + float2 v2; + float3 v3; + float4 v4; +}; + +typedef struct ReadSSBO ReadSSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _21, __global const ReadSSBO* _26) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _21->m22 = _26->v2 * _26->v2.x; + _21->m23 = _26->v3 * _26->v2.x; + _21->m24 = _26->v4 * _26->v2.x; + _21->m32 = _26->v2 * _26->v3.x; + _21->m33 = _26->v3 * _26->v3.x; + _21->m34 = _26->v4 * _26->v3.x; + _21->m42 = _26->v2 * _26->v4.x; + _21->m43 = _26->v3 * _26->v4.x; + _21->m44 = _26->v4 * _26->v4.x; +} + diff --git a/reference/shaders-opencl/comp/packing-test-1.comp b/reference/shaders-opencl/comp/packing-test-1.comp new file mode 100644 index 000000000..0afe8d4eb --- /dev/null +++ b/reference/shaders-opencl/comp/packing-test-1.comp @@ -0,0 +1,36 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct T1 +{ + float3 a; + float b; +}; + +typedef struct T1 T1; + +struct Buffer0 +{ + T1 buf0[1]; +}; + +typedef struct Buffer0 Buffer0; + +struct Buffer1 +{ + float buf1[1]; +}; + +typedef struct Buffer1 Buffer1; + +__attribute__((reqd_work_group_size(32, 1, 1))) +__kernel void comp_main(__global T1* _15, __global float* _34) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + T1 v; + v.a = _15[0].a; + v.b = _15[0].b; + float x = v.b; + _34[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = x; +} + diff --git a/reference/shaders-opencl/comp/packing-test-2.comp b/reference/shaders-opencl/comp/packing-test-2.comp new file mode 100644 index 000000000..ddf27da61 --- /dev/null +++ b/reference/shaders-opencl/comp/packing-test-2.comp @@ -0,0 +1,33 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct T1 +{ + float3 a; + float b; +}; + +typedef struct T1 T1; + +struct Buffer0 +{ + T1 buf0[1]; +}; + +typedef struct Buffer0 Buffer0; + +struct Buffer1 +{ + float buf1[1]; +}; + +typedef struct Buffer1 Buffer1; + +__attribute__((reqd_work_group_size(32, 1, 1))) +__kernel void comp_main(__global T1* _14, __global float* _24) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float x = _14[0].b; + _24[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = x; +} + diff --git a/reference/shaders-opencl/comp/read-write-only.comp b/reference/shaders-opencl/comp/read-write-only.comp new file mode 100644 index 000000000..6b54b862b --- /dev/null +++ b/reference/shaders-opencl/comp/read-write-only.comp @@ -0,0 +1,35 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO2 +{ + float4 data4; + float4 data5; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO0 +{ + float4 data0; + float4 data1; +}; + +typedef struct SSBO0 SSBO0; + +struct SSBO1 +{ + float4 data2; + float4 data3; +}; + +typedef struct SSBO1 SSBO1; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO2* _10, __global const SSBO0* _15, __global SSBO1* _21) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _10->data4 = _15->data0 + _21->data2; + _10->data5 = _15->data1 + _21->data3; +} + diff --git a/reference/shaders-opencl/comp/rmw-opt.comp b/reference/shaders-opencl/comp/rmw-opt.comp new file mode 100644 index 000000000..f205a3654 --- /dev/null +++ b/reference/shaders-opencl/comp/rmw-opt.comp @@ -0,0 +1,31 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + int a; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global int* _9) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _9[0] += 10; + _9[0] -= 10; + _9[0] *= 10; + _9[0] /= 10; + _9[0] = _9[0] << 2; + _9[0] = _9[0] >> 3; + _9[0] &= 40; + _9[0] ^= 10; + _9[0] %= 40; + _9[0] |= 1; + bool c = false; + bool d = true; + c = c && d; + d = d || c; + _9[0] = (int)(c && d); +} + diff --git a/reference/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp new file mode 100644 index 000000000..59f3fb7ed --- /dev/null +++ b/reference/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp @@ -0,0 +1,25 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float a; + float b; + float c; + float d; + float e; + float f; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _9) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _9->c = distance(_9->a, _9->b); + _9->d = length(_9->a); + _9->e = normalize(_9->a); + _9->f = distance(_9->a - 1.0f, _9->b - 2.0f); +} + diff --git a/reference/shaders-opencl/comp/shared-std450.double.comp b/reference/shaders-opencl/comp/shared-std450.double.comp new file mode 100644 index 000000000..84cb8354c --- /dev/null +++ b/reference/shaders-opencl/comp/shared-std450.double.comp @@ -0,0 +1,31 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +struct SSBO +{ + double in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + double out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global const double* _22, __global double* _44) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local double sShared[4]; + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + double idata = _22[ident]; + sShared[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = idata; + mem_fence(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + _44[ident] = sShared[(4u - ((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))) - 1u]; +} + diff --git a/reference/shaders-opencl/comp/shared-struct-bool-cast.comp b/reference/shaders-opencl/comp/shared-struct-bool-cast.comp new file mode 100644 index 000000000..870672c31 --- /dev/null +++ b/reference/shaders-opencl/comp/shared-struct-bool-cast.comp @@ -0,0 +1,106 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct S1 +{ + int3 a; + uint2 b; + int4 c; + uint d; +}; + +typedef struct S1 S1; + +struct block +{ + uint passed; +}; + +typedef struct block block; + +bool compare_ivec3( int3* a, int3* b) +{ + return all((*a) == (*b)); +} + +bool compare_uvec2( uint2* a_1, uint2* b_1) +{ + return all((*a_1) == (*b_1)); +} + +bool compare_bvec4( int4* a_2, int4* b_2) +{ + return all((*a_2) == (*b_2)); +} + +bool compare_uint( uint* a_3, uint* b_3) +{ + return (*a_3) == (*b_3); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _132) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local S1 s1; + s1.a = (int3)(6, 8, 8); + s1.b = (uint2)(4u); + s1.c = (int4)(false, false, false, true); + s1.d = 6u; + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + bool allOk = true; + bool _99; + if (allOk) + { + int3 param = (int3)(6, 8, 8); + int3 param_1 = s1.a; + _99 = compare_ivec3(¶m, ¶m_1); + } + else + { + _99 = allOk; + } + allOk = _99; + bool _108; + if (allOk) + { + uint2 param_2 = (uint2)(4u); + uint2 param_3 = s1.b; + _108 = compare_uvec2(¶m_2, ¶m_3); + } + else + { + _108 = allOk; + } + allOk = _108; + bool _117; + if (allOk) + { + int4 param_4 = (int4)(false, false, false, true); + int4 param_5 = s1.c; + _117 = compare_bvec4(¶m_4, ¶m_5); + } + else + { + _117 = allOk; + } + allOk = _117; + bool _126; + if (allOk) + { + uint param_6 = 6u; + uint param_7 = s1.d; + _126 = compare_uint(¶m_6, ¶m_7); + } + else + { + _126 = allOk; + } + allOk = _126; + if (allOk) + { + _132[0] += as_uint(1); + } +} + diff --git a/reference/shaders-opencl/comp/shared-zero-init-simple.comp b/reference/shaders-opencl/comp/shared-zero-init-simple.comp new file mode 100644 index 000000000..62136f145 --- /dev/null +++ b/reference/shaders-opencl/comp/shared-zero-init-simple.comp @@ -0,0 +1,27 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global const float* _22, __global float* _32) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local float sShared; + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + float idata = _22[ident]; + _32[ident] = sShared + idata; +} + diff --git a/reference/shaders-opencl/comp/shared-zero-init.comp b/reference/shaders-opencl/comp/shared-zero-init.comp new file mode 100644 index 000000000..ec05e3c9a --- /dev/null +++ b/reference/shaders-opencl/comp/shared-zero-init.comp @@ -0,0 +1,32 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float in_data[1]; +}; + +typedef struct SSBO SSBO; + +constant float _31[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; + +struct SSBO2 +{ + float out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global const float* _22, __global float* _48) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local float sShared[4]; + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + float idata = _22[ident]; + sShared[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += idata; + mem_fence(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + _48[ident] = sShared[(4u - ((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))) - 1u]; +} + diff --git a/reference/shaders-opencl/comp/shared.comp b/reference/shaders-opencl/comp/shared.comp new file mode 100644 index 000000000..a1b217b6f --- /dev/null +++ b/reference/shaders-opencl/comp/shared.comp @@ -0,0 +1,30 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global const float* _22, __global float* _44) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local float sShared[4]; + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + float idata = _22[ident]; + sShared[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = idata; + mem_fence(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + _44[ident] = sShared[(4u - ((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))) - 1u]; +} + diff --git a/reference/shaders-opencl/comp/spec-constant-work-group-size.comp b/reference/shaders-opencl/comp/spec-constant-work-group-size.comp new file mode 100644 index 000000000..4bf86f53f --- /dev/null +++ b/reference/shaders-opencl/comp/spec-constant-work-group-size.comp @@ -0,0 +1,39 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 2 +#endif +constant int b = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 1 +#endif +constant int a = SPIRV_CROSS_CONSTANT_ID_0; + +struct SSBO +{ + int v[1]; +}; + +typedef struct SSBO SSBO; + +#define _21 ((as_uint(a) + 0u)) +#ifndef SPIRV_CROSS_CONSTANT_ID_10 +#define SPIRV_CROSS_CONSTANT_ID_10 1u +#endif +constant uint _22 = SPIRV_CROSS_CONSTANT_ID_10; +constant uint3 spvWorkgroupSize = (uint3)(_22, 20u, 1u); +#define _27 (spvWorkgroupSize.x) +#define _28 ((_21 + _27)) +#define _29 (spvWorkgroupSize.y) +#define _30 ((_28 + _29)) +#define _32 ((1 - a)) + +__attribute__((reqd_work_group_size(1, 20, 1))) +__kernel void comp_main(__global int* _17) +{ + int spec_const_array_size[b]; + spec_const_array_size[a] = a; + _17[_30] = b + spec_const_array_size[_32]; +} + diff --git a/reference/shaders-opencl/comp/struct-layout.comp b/reference/shaders-opencl/comp/struct-layout.comp new file mode 100644 index 000000000..eb416ee27 --- /dev/null +++ b/reference/shaders-opencl/comp/struct-layout.comp @@ -0,0 +1,32 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Foo +{ + float4 m; +}; + +typedef struct Foo Foo; + +struct SSBO2 +{ + Foo out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO +{ + Foo in_data[1]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global Foo* _23, __global const Foo* _30) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + _23[ident].m = _30[ident].m * _30[ident].m; +} + diff --git a/reference/shaders-opencl/comp/struct-nested.comp b/reference/shaders-opencl/comp/struct-nested.comp new file mode 100644 index 000000000..3aeed9be7 --- /dev/null +++ b/reference/shaders-opencl/comp/struct-nested.comp @@ -0,0 +1,33 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct s1 +{ + int a; +}; + +typedef struct s1 s1; + +struct s2 +{ + s1 b; +}; + +typedef struct s2 s2; + +struct dstbuffer +{ + s2 test[1]; +}; + +typedef struct dstbuffer dstbuffer; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global s2* _19) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + s2 testVal; + testVal.b.a = 0; + _19[0].b.a = testVal.b.a; +} + diff --git a/reference/shaders-opencl/comp/struct-packing.invalid.comp b/reference/shaders-opencl/comp/struct-packing.invalid.comp new file mode 100644 index 000000000..e69de29bb diff --git a/reference/shaders-opencl/comp/torture-loop.comp b/reference/shaders-opencl/comp/torture-loop.comp new file mode 100644 index 000000000..45f32a55b --- /dev/null +++ b/reference/shaders-opencl/comp/torture-loop.comp @@ -0,0 +1,55 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 mvp; + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const SSBO* _24, __global float4* _89) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint ident_1 = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + float4 idat_1 = _24->in_data[ident_1]; + int k_1 = 0; + for (;;) + { + int _39 = k_1; + int _40 = _39 + 1; + k_1 = _40; + if (_40 < 10) + { + idat_1 *= 2.0f; + k_1++; + continue; + } + else + { + break; + } + } + for (uint i_1 = 0u; i_1 < 16u; i_1 += as_uint(1), k_1++) + { + for (uint j_1 = 0u; j_1 < 30u; j_1 += as_uint(1)) + { + idat_1 = _24->mvp * idat_1; + } + } + do + { + k_1++; + } while (k_1 > 10); + _89[ident_1] = idat_1; +} + diff --git a/reference/shaders-opencl/comp/type-alias.comp b/reference/shaders-opencl/comp/type-alias.comp new file mode 100644 index 000000000..0a195bf2b --- /dev/null +++ b/reference/shaders-opencl/comp/type-alias.comp @@ -0,0 +1,61 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct S0 +{ + float4 a; +}; + +typedef struct S0 S0; + +struct S1 +{ + float4 a; +}; + +typedef struct S1 S1; + +struct SSBO0 +{ + S0 s0s[1]; +}; + +typedef struct SSBO0 SSBO0; + +struct SSBO1 +{ + S1 s1s[1]; +}; + +typedef struct SSBO1 SSBO1; + +struct SSBO2 +{ + float4 outputs[1]; +}; + +typedef struct SSBO2 SSBO2; + +float4 overload( S0* s0) +{ + return s0->a; +} + +float4 overload_1( S1* s1) +{ + return s1->a; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global S0* _36, __global S1* _55, __global float4* _66) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + S0 s0_1; + s0_1.a = _36[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].a; + S1 s1_1; + s1_1.a = _55[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].a; + S0 param = s0_1; + S1 param_1 = s1_1; + _66[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = overload(¶m) + overload_1(¶m_1); +} + diff --git a/reference/shaders-opencl/comp/udiv.comp b/reference/shaders-opencl/comp/udiv.comp new file mode 100644 index 000000000..7e336b9b4 --- /dev/null +++ b/reference/shaders-opencl/comp/udiv.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO2 +{ + uint outputs[1]; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO +{ + uint inputs[1]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _10, __global uint* _23) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _10[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _23[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] / 29u; +} + diff --git a/reference/shaders-opencl/comp/writable-ssbo.comp b/reference/shaders-opencl/comp/writable-ssbo.comp new file mode 100644 index 000000000..30716e427 --- /dev/null +++ b/reference/shaders-opencl/comp/writable-ssbo.comp @@ -0,0 +1,18 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct myBlock +{ + int a; + float b; +}; + +typedef struct myBlock myBlock; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global myBlock* myStorage) +{ + myStorage->a = (myStorage->a + 1) % 256; + myStorage->b += 0.0199999995529651641845703125f; +} + diff --git a/shaders-opencl/asm/comp/atomic-decrement.asm.comp b/shaders-opencl/asm/comp/atomic-decrement.asm.comp new file mode 100644 index 000000000..a87b93188 --- /dev/null +++ b/shaders-opencl/asm/comp/atomic-decrement.asm.comp @@ -0,0 +1,71 @@ +; SPIR-V +; Version: 1.0 +; Generator: Wine VKD3D Shader Compiler; 0 +; Bound: 43 +; Schema: 0 + OpCapability Shader + OpCapability SampledBuffer + OpCapability ImageBuffer + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %3 "main" %15 + OpExecutionMode %3 LocalSize 4 1 1 + OpName %3 "main" + OpName %8 "u0" + OpName %9 "u0_counters" + OpMemberName %9 0 "c" + OpName %11 "u0_counter" + OpName %15 "vThreadID" + OpName %19 "r0" + OpDecorate %8 DescriptorSet 0 + OpDecorate %8 Binding 0 + OpMemberDecorate %9 0 Offset 0 + OpDecorate %9 BufferBlock + OpDecorate %11 DescriptorSet 1 + OpDecorate %11 Binding 0 + OpDecorate %15 BuiltIn GlobalInvocationId + %1 = OpTypeVoid + %2 = OpTypeFunction %1 + %5 = OpTypeInt 32 0 + %6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui + %7 = OpTypePointer UniformConstant %6 + %8 = OpVariable %7 UniformConstant + %9 = OpTypeStruct %5 + %10 = OpTypePointer Uniform %9 + %11 = OpVariable %10 Uniform + %12 = OpTypeInt 32 1 + %13 = OpTypeVector %12 3 + %14 = OpTypePointer Input %13 + %15 = OpVariable %14 Input + %16 = OpTypeFloat 32 + %17 = OpTypeVector %16 4 + %18 = OpTypePointer Function %17 + %20 = OpTypePointer Uniform %5 + %21 = OpConstant %5 0 + %23 = OpConstant %5 1 + %26 = OpTypePointer Function %16 + %33 = OpConstant %12 0 + %34 = OpConstant %5 2 + %37 = OpTypePointer Input %12 + %41 = OpTypeVector %5 4 + %3 = OpFunction %1 None %2 + %4 = OpLabel + %19 = OpVariable %18 Function + %22 = OpAccessChain %20 %11 %21 + %24 = OpAtomicIDecrement %5 %22 %23 %21 + %25 = OpBitcast %16 %24 + %27 = OpInBoundsAccessChain %26 %19 %21 + OpStore %27 %25 + %28 = OpLoad %6 %8 + %29 = OpInBoundsAccessChain %26 %19 %21 + %30 = OpLoad %16 %29 + %31 = OpBitcast %12 %30 + %32 = OpIMul %5 %31 %23 + %35 = OpShiftRightLogical %5 %33 %34 + %36 = OpIAdd %5 %32 %35 + %38 = OpInBoundsAccessChain %37 %15 %21 + %39 = OpLoad %12 %38 + %40 = OpBitcast %5 %39 + %42 = OpCompositeConstruct %41 %40 %40 %40 %40 + OpImageWrite %28 %36 %42 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/atomic-increment.asm.comp b/shaders-opencl/asm/comp/atomic-increment.asm.comp new file mode 100644 index 000000000..3acb7115f --- /dev/null +++ b/shaders-opencl/asm/comp/atomic-increment.asm.comp @@ -0,0 +1,71 @@ +; SPIR-V +; Version: 1.0 +; Generator: Wine VKD3D Shader Compiler; 0 +; Bound: 43 +; Schema: 0 + OpCapability Shader + OpCapability SampledBuffer + OpCapability ImageBuffer + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %3 "main" %15 + OpExecutionMode %3 LocalSize 4 1 1 + OpName %3 "main" + OpName %8 "u0" + OpName %9 "u0_counters" + OpMemberName %9 0 "c" + OpName %11 "u0_counter" + OpName %15 "vThreadID" + OpName %19 "r0" + OpDecorate %8 DescriptorSet 0 + OpDecorate %8 Binding 0 + OpMemberDecorate %9 0 Offset 0 + OpDecorate %9 BufferBlock + OpDecorate %11 DescriptorSet 1 + OpDecorate %11 Binding 0 + OpDecorate %15 BuiltIn GlobalInvocationId + %1 = OpTypeVoid + %2 = OpTypeFunction %1 + %5 = OpTypeInt 32 0 + %6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui + %7 = OpTypePointer UniformConstant %6 + %8 = OpVariable %7 UniformConstant + %9 = OpTypeStruct %5 + %10 = OpTypePointer Uniform %9 + %11 = OpVariable %10 Uniform + %12 = OpTypeInt 32 1 + %13 = OpTypeVector %12 3 + %14 = OpTypePointer Input %13 + %15 = OpVariable %14 Input + %16 = OpTypeFloat 32 + %17 = OpTypeVector %16 4 + %18 = OpTypePointer Function %17 + %20 = OpTypePointer Uniform %5 + %21 = OpConstant %5 0 + %23 = OpConstant %5 1 + %26 = OpTypePointer Function %16 + %33 = OpConstant %12 0 + %34 = OpConstant %5 2 + %37 = OpTypePointer Input %12 + %41 = OpTypeVector %5 4 + %3 = OpFunction %1 None %2 + %4 = OpLabel + %19 = OpVariable %18 Function + %22 = OpAccessChain %20 %11 %21 + %24 = OpAtomicIIncrement %5 %22 %23 %21 + %25 = OpBitcast %16 %24 + %27 = OpInBoundsAccessChain %26 %19 %21 + OpStore %27 %25 + %28 = OpLoad %6 %8 + %29 = OpInBoundsAccessChain %26 %19 %21 + %30 = OpLoad %16 %29 + %31 = OpBitcast %12 %30 + %32 = OpIMul %5 %31 %23 + %35 = OpShiftRightLogical %5 %33 %34 + %36 = OpIAdd %5 %32 %35 + %38 = OpInBoundsAccessChain %37 %15 %21 + %39 = OpLoad %12 %38 + %40 = OpBitcast %5 %39 + %42 = OpCompositeConstruct %41 %40 %40 %40 %40 + OpImageWrite %28 %36 %42 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/bitcast_iadd.asm.comp b/shaders-opencl/asm/comp/bitcast_iadd.asm.comp new file mode 100644 index 000000000..3b31ab285 --- /dev/null +++ b/shaders-opencl/asm/comp/bitcast_iadd.asm.comp @@ -0,0 +1,79 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %inputs Restrict + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + OpDecorate %outputs Restrict + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one + +; Test all variants of IAdd + %result_iadd_0 = OpIAdd %uvec4 %input0 %input1 + %result_iadd_1 = OpIAdd %uvec4 %input1 %input0 + %result_iadd_2 = OpIAdd %uvec4 %input0 %input0 + %result_iadd_3 = OpIAdd %uvec4 %input1 %input1 + %result_iadd_4 = OpIAdd %ivec4 %input0 %input0 + %result_iadd_5 = OpIAdd %ivec4 %input1 %input1 + %result_iadd_6 = OpIAdd %ivec4 %input0 %input1 + %result_iadd_7 = OpIAdd %ivec4 %input1 %input0 + OpStore %output_ptr_uvec4 %result_iadd_0 + OpStore %output_ptr_uvec4 %result_iadd_1 + OpStore %output_ptr_uvec4 %result_iadd_2 + OpStore %output_ptr_uvec4 %result_iadd_3 + OpStore %output_ptr_ivec4 %result_iadd_4 + OpStore %output_ptr_ivec4 %result_iadd_5 + OpStore %output_ptr_ivec4 %result_iadd_6 + OpStore %output_ptr_ivec4 %result_iadd_7 + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/bitcast_icmp.asm.comp b/shaders-opencl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 000000000..b7b4e0b2e --- /dev/null +++ b/shaders-opencl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,101 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %inputs Restrict + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + OpDecorate %outputs Restrict + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %bool = OpTypeBool + %bvec4 = OpTypeVector %bool 4 + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + %uzero = OpConstant %uint 0 + %uone = OpConstant %uint 1 + %utrue = OpConstantComposite %uvec4 %uone %uone %uone %uone + %ufalse = OpConstantComposite %uvec4 %uzero %uzero %uzero %uzero + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + + %result_slt = OpSLessThan %bvec4 %input0 %input1 + %result_sle = OpSLessThanEqual %bvec4 %input0 %input1 + %result_ult = OpULessThan %bvec4 %input0 %input1 + %result_ule = OpULessThanEqual %bvec4 %input0 %input1 + %result_sgt = OpSGreaterThan %bvec4 %input0 %input1 + %result_sge = OpSGreaterThanEqual %bvec4 %input0 %input1 + %result_ugt = OpUGreaterThan %bvec4 %input0 %input1 + %result_uge = OpUGreaterThanEqual %bvec4 %input0 %input1 + + %int_slt = OpSelect %uvec4 %result_slt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_slt + + %int_sle = OpSelect %uvec4 %result_sle %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sle + + %int_ult = OpSelect %uvec4 %result_ult %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ult + + %int_ule = OpSelect %uvec4 %result_ule %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ule + + %int_sgt = OpSelect %uvec4 %result_sgt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sgt + + %int_sge = OpSelect %uvec4 %result_sge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_sge + + %int_ugt = OpSelect %uvec4 %result_ugt %utrue %ufalse + OpStore %output_ptr_uvec4 %int_ugt + + %int_uge = OpSelect %uvec4 %result_uge %utrue %ufalse + OpStore %output_ptr_uvec4 %int_uge + + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/bitcast_sar.asm.comp b/shaders-opencl/asm/comp/bitcast_sar.asm.comp new file mode 100644 index 000000000..64f19fc34 --- /dev/null +++ b/shaders-opencl/asm/comp/bitcast_sar.asm.comp @@ -0,0 +1,77 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one + +; Test all variants of ShiftRightArithmetic + %result_iadd_0 = OpShiftRightArithmetic %uvec4 %input0 %input1 + %result_iadd_1 = OpShiftRightArithmetic %uvec4 %input1 %input0 + %result_iadd_2 = OpShiftRightArithmetic %uvec4 %input0 %input0 + %result_iadd_3 = OpShiftRightArithmetic %uvec4 %input1 %input1 + %result_iadd_4 = OpShiftRightArithmetic %ivec4 %input0 %input0 + %result_iadd_5 = OpShiftRightArithmetic %ivec4 %input1 %input1 + %result_iadd_6 = OpShiftRightArithmetic %ivec4 %input0 %input1 + %result_iadd_7 = OpShiftRightArithmetic %ivec4 %input1 %input0 + OpStore %output_ptr_uvec4 %result_iadd_0 + OpStore %output_ptr_uvec4 %result_iadd_1 + OpStore %output_ptr_uvec4 %result_iadd_2 + OpStore %output_ptr_uvec4 %result_iadd_3 + OpStore %output_ptr_ivec4 %result_iadd_4 + OpStore %output_ptr_ivec4 %result_iadd_5 + OpStore %output_ptr_ivec4 %result_iadd_6 + OpStore %output_ptr_ivec4 %result_iadd_7 + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/bitcast_sdiv.asm.comp b/shaders-opencl/asm/comp/bitcast_sdiv.asm.comp new file mode 100644 index 000000000..ab73ec83d --- /dev/null +++ b/shaders-opencl/asm/comp/bitcast_sdiv.asm.comp @@ -0,0 +1,77 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one + +; Test all variants of SDiv + %result_iadd_0 = OpSDiv %uvec4 %input0 %input1 + %result_iadd_1 = OpSDiv %uvec4 %input1 %input0 + %result_iadd_2 = OpSDiv %uvec4 %input0 %input0 + %result_iadd_3 = OpSDiv %uvec4 %input1 %input1 + %result_iadd_4 = OpSDiv %ivec4 %input0 %input0 + %result_iadd_5 = OpSDiv %ivec4 %input1 %input1 + %result_iadd_6 = OpSDiv %ivec4 %input0 %input1 + %result_iadd_7 = OpSDiv %ivec4 %input1 %input0 + OpStore %output_ptr_uvec4 %result_iadd_0 + OpStore %output_ptr_uvec4 %result_iadd_1 + OpStore %output_ptr_uvec4 %result_iadd_2 + OpStore %output_ptr_uvec4 %result_iadd_3 + OpStore %output_ptr_ivec4 %result_iadd_4 + OpStore %output_ptr_ivec4 %result_iadd_5 + OpStore %output_ptr_ivec4 %result_iadd_6 + OpStore %output_ptr_ivec4 %result_iadd_7 + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/bitcast_slr.asm.comp b/shaders-opencl/asm/comp/bitcast_slr.asm.comp new file mode 100644 index 000000000..6741f5cb5 --- /dev/null +++ b/shaders-opencl/asm/comp/bitcast_slr.asm.comp @@ -0,0 +1,77 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %func "main" + OpExecutionMode %func LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_GOOGLE_cpp_style_line_directive" + OpSourceExtension "GL_GOOGLE_include_directive" + OpMemberDecorate %input_struct 0 Offset 0 + OpMemberDecorate %input_struct 1 Offset 16 + OpMemberDecorate %output_struct 0 Offset 0 + OpMemberDecorate %output_struct 1 Offset 16 + OpDecorate %input_struct BufferBlock + OpDecorate %inputs DescriptorSet 0 + OpDecorate %inputs Binding 0 + OpDecorate %output_struct BufferBlock + OpDecorate %outputs DescriptorSet 0 + OpDecorate %outputs Binding 1 + + %void = OpTypeVoid + %main_func = OpTypeFunction %void + + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + + %ivec4_ptr = OpTypePointer Uniform %ivec4 + %uvec4_ptr = OpTypePointer Uniform %uvec4 + + %zero = OpConstant %int 0 + %one = OpConstant %int 1 + + %input_struct = OpTypeStruct %ivec4 %uvec4 + %input_struct_ptr = OpTypePointer Uniform %input_struct + %inputs = OpVariable %input_struct_ptr Uniform + %output_struct = OpTypeStruct %uvec4 %ivec4 + %output_struct_ptr = OpTypePointer Uniform %output_struct + %outputs = OpVariable %output_struct_ptr Uniform + + %func = OpFunction %void None %main_func + %block = OpLabel + + %input1_ptr = OpAccessChain %ivec4_ptr %inputs %zero + %input0_ptr = OpAccessChain %uvec4_ptr %inputs %one + %input1 = OpLoad %ivec4 %input1_ptr + %input0 = OpLoad %uvec4 %input0_ptr + + %output_ptr_uvec4 = OpAccessChain %uvec4_ptr %outputs %zero + %output_ptr_ivec4 = OpAccessChain %ivec4_ptr %outputs %one + +; Test all variants of ShiftRightLogical + %result_iadd_0 = OpShiftRightLogical %uvec4 %input0 %input1 + %result_iadd_1 = OpShiftRightLogical %uvec4 %input1 %input0 + %result_iadd_2 = OpShiftRightLogical %uvec4 %input0 %input0 + %result_iadd_3 = OpShiftRightLogical %uvec4 %input1 %input1 + %result_iadd_4 = OpShiftRightLogical %ivec4 %input0 %input0 + %result_iadd_5 = OpShiftRightLogical %ivec4 %input1 %input1 + %result_iadd_6 = OpShiftRightLogical %ivec4 %input0 %input1 + %result_iadd_7 = OpShiftRightLogical %ivec4 %input1 %input0 + OpStore %output_ptr_uvec4 %result_iadd_0 + OpStore %output_ptr_uvec4 %result_iadd_1 + OpStore %output_ptr_uvec4 %result_iadd_2 + OpStore %output_ptr_uvec4 %result_iadd_3 + OpStore %output_ptr_ivec4 %result_iadd_4 + OpStore %output_ptr_ivec4 %result_iadd_5 + OpStore %output_ptr_ivec4 %result_iadd_6 + OpStore %output_ptr_ivec4 %result_iadd_7 + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/block-name-alias-global.asm.comp b/shaders-opencl/asm/comp/block-name-alias-global.asm.comp new file mode 100644 index 000000000..85f6cc041 --- /dev/null +++ b/shaders-opencl/asm/comp/block-name-alias-global.asm.comp @@ -0,0 +1,119 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 59 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %Foo "A" + OpMemberName %Foo 0 "a" + OpMemberName %Foo 1 "b" + OpName %A "A" + OpMemberName %A 0 "Data" + OpName %C1 "C1" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpName %Foo_0 "A" + OpMemberName %Foo_0 0 "a" + OpMemberName %Foo_0 1 "b" + OpName %A_0 "A" + OpMemberName %A_0 0 "Data" + OpName %C2 "C2" + OpName %B "B" + OpMemberName %B 0 "Data" + OpName %C3 "C3" + OpName %B_0 "B" + OpMemberName %B_0 0 "Data" + OpName %C4 "C4" + OpMemberDecorate %Foo 0 Offset 0 + OpMemberDecorate %Foo 1 Offset 4 + OpDecorate %_runtimearr_Foo ArrayStride 8 + OpMemberDecorate %A 0 Offset 0 + OpDecorate %A BufferBlock + OpDecorate %C1 DescriptorSet 0 + OpDecorate %C1 Binding 1 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpMemberDecorate %Foo_0 0 Offset 0 + OpMemberDecorate %Foo_0 1 Offset 4 + OpDecorate %_arr_Foo_0_uint_1024 ArrayStride 16 + OpMemberDecorate %A_0 0 Offset 0 + OpDecorate %A_0 Block + OpDecorate %C2 DescriptorSet 0 + OpDecorate %C2 Binding 2 + OpDecorate %_runtimearr_Foo_0 ArrayStride 8 + OpMemberDecorate %B 0 Offset 0 + OpDecorate %B BufferBlock + OpDecorate %C3 DescriptorSet 0 + OpDecorate %C3 Binding 0 + OpDecorate %_arr_Foo_0_uint_1024_0 ArrayStride 16 + OpMemberDecorate %B_0 0 Offset 0 + OpDecorate %B_0 Block + OpDecorate %C4 DescriptorSet 0 + OpDecorate %C4 Binding 3 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %Foo = OpTypeStruct %int %int +%_runtimearr_Foo = OpTypeRuntimeArray %Foo + %A = OpTypeStruct %_runtimearr_Foo +%_ptr_Uniform_A = OpTypePointer Uniform %A + %C1 = OpVariable %_ptr_Uniform_A Uniform + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %Foo_0 = OpTypeStruct %int %int + %uint_1024 = OpConstant %uint 1024 +%_arr_Foo_0_uint_1024 = OpTypeArray %Foo_0 %uint_1024 + %A_0 = OpTypeStruct %_arr_Foo_0_uint_1024 +%_ptr_Uniform_A_0 = OpTypePointer Uniform %A_0 + %C2 = OpVariable %_ptr_Uniform_A_0 Uniform +%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0 +%_ptr_Uniform_Foo = OpTypePointer Uniform %Foo +%_ptr_Uniform_int = OpTypePointer Uniform %int + %int_1 = OpConstant %int 1 +%_runtimearr_Foo_0 = OpTypeRuntimeArray %Foo + %B = OpTypeStruct %_runtimearr_Foo_0 +%_ptr_Uniform_B = OpTypePointer Uniform %B + %C3 = OpVariable %_ptr_Uniform_B Uniform +%_arr_Foo_0_uint_1024_0 = OpTypeArray %Foo_0 %uint_1024 + %B_0 = OpTypeStruct %_arr_Foo_0_uint_1024_0 +%_ptr_Uniform_B_0 = OpTypePointer Uniform %B_0 + %C4 = OpVariable %_ptr_Uniform_B_0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %20 = OpLoad %uint %19 + %27 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %28 = OpLoad %uint %27 + %30 = OpAccessChain %_ptr_Uniform_Foo_0 %C2 %int_0 %28 + %31 = OpLoad %Foo_0 %30 + %33 = OpAccessChain %_ptr_Uniform_Foo %C1 %int_0 %20 + %34 = OpCompositeExtract %int %31 0 + %36 = OpAccessChain %_ptr_Uniform_int %33 %int_0 + OpStore %36 %34 + %37 = OpCompositeExtract %int %31 1 + %39 = OpAccessChain %_ptr_Uniform_int %33 %int_1 + OpStore %39 %37 + %44 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %45 = OpLoad %uint %44 + %50 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %51 = OpLoad %uint %50 + %52 = OpAccessChain %_ptr_Uniform_Foo_0 %C4 %int_0 %51 + %53 = OpLoad %Foo_0 %52 + %54 = OpAccessChain %_ptr_Uniform_Foo %C3 %int_0 %45 + %55 = OpCompositeExtract %int %53 0 + %56 = OpAccessChain %_ptr_Uniform_int %54 %int_0 + OpStore %56 %55 + %57 = OpCompositeExtract %int %53 1 + %58 = OpAccessChain %_ptr_Uniform_int %54 %int_1 + OpStore %58 %57 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/buffer-write-relative-addr.asm.comp b/shaders-opencl/asm/comp/buffer-write-relative-addr.asm.comp new file mode 100644 index 000000000..400690b04 --- /dev/null +++ b/shaders-opencl/asm/comp/buffer-write-relative-addr.asm.comp @@ -0,0 +1,93 @@ +; SPIR-V +; Version: 1.0 +; Generator: Wine VKD3D Shader Compiler; 0 +; Bound: 59 +; Schema: 0 + OpCapability Shader + OpCapability UniformBufferArrayDynamicIndexing + OpCapability SampledBuffer + OpCapability ImageBuffer + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %vThreadIDInGroup + OpExecutionMode %main LocalSize 4 1 1 + OpName %main "main" + OpName %cb5_struct "cb5_struct" + OpName %cb0_5 "cb0_5" + OpName %u0 "u0" + OpName %vThreadIDInGroup "vThreadIDInGroup" + OpName %r0 "r0" + OpDecorate %_arr_v4float_uint_5 ArrayStride 16 + OpDecorate %cb5_struct Block + OpMemberDecorate %cb5_struct 0 Offset 0 + OpDecorate %cb0_5 DescriptorSet 0 + OpDecorate %cb0_5 Binding 1 + OpDecorate %u0 DescriptorSet 0 + OpDecorate %u0 Binding 0 + OpDecorate %u0 NonReadable + OpDecorate %vThreadIDInGroup BuiltIn LocalInvocationId + %void = OpTypeVoid + %2 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %uint = OpTypeInt 32 0 + %uint_5 = OpConstant %uint 5 +%_arr_v4float_uint_5 = OpTypeArray %v4float %uint_5 + %cb5_struct = OpTypeStruct %_arr_v4float_uint_5 +%_ptr_Uniform_cb5_struct = OpTypePointer Uniform %cb5_struct + %cb0_5 = OpVariable %_ptr_Uniform_cb5_struct Uniform + %13 = OpTypeImage %uint Buffer 0 0 0 2 R32ui +%_ptr_UniformConstant_13 = OpTypePointer UniformConstant %13 + %u0 = OpVariable %_ptr_UniformConstant_13 UniformConstant + %int = OpTypeInt 32 1 + %v3int = OpTypeVector %int 3 +%_ptr_Input_v3int = OpTypePointer Input %v3int +%vThreadIDInGroup = OpVariable %_ptr_Input_v3int Input +%_ptr_Function_v4float = OpTypePointer Function %v4float +%_ptr_Input_int = OpTypePointer Input %int + %uint_0 = OpConstant %uint 0 + %int_4 = OpConstant %int 4 +%_ptr_Function_float = OpTypePointer Function %float + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %v4uint = OpTypeVector %uint 4 + %uint_3 = OpConstant %uint 3 + %main = OpFunction %void None %2 + %4 = OpLabel + %r0 = OpVariable %_ptr_Function_v4float Function + %24 = OpInBoundsAccessChain %_ptr_Input_int %vThreadIDInGroup %uint_0 + %25 = OpLoad %int %24 + %27 = OpShiftLeftLogical %int %25 %int_4 + %28 = OpBitcast %float %27 + %30 = OpInBoundsAccessChain %_ptr_Function_float %r0 %uint_0 + OpStore %30 %28 + %31 = OpInBoundsAccessChain %_ptr_Input_int %vThreadIDInGroup %uint_0 + %32 = OpLoad %int %31 + %33 = OpBitcast %float %32 + %35 = OpInBoundsAccessChain %_ptr_Function_float %r0 %uint_1 + OpStore %35 %33 + %36 = OpLoad %13 %u0 + %37 = OpInBoundsAccessChain %_ptr_Function_float %r0 %uint_0 + %38 = OpLoad %float %37 + %39 = OpBitcast %uint %38 + %41 = OpShiftRightLogical %uint %39 %uint_2 + %42 = OpInBoundsAccessChain %_ptr_Function_float %r0 %uint_1 + %43 = OpLoad %float %42 + %44 = OpBitcast %int %43 + %45 = OpIAdd %uint %44 %uint_1 + %47 = OpAccessChain %_ptr_Uniform_v4float %cb0_5 %uint_0 %45 + %48 = OpLoad %v4float %47 + %50 = OpBitcast %v4uint %48 + %51 = OpVectorShuffle %v4uint %50 %50 0 0 0 0 + OpImageWrite %36 %41 %51 + %52 = OpVectorShuffle %v4uint %50 %50 1 1 1 1 + %53 = OpIAdd %uint %41 %uint_1 + OpImageWrite %36 %53 %52 + %54 = OpVectorShuffle %v4uint %50 %50 2 2 2 2 + %55 = OpIAdd %uint %41 %uint_2 + OpImageWrite %36 %55 %54 + %56 = OpVectorShuffle %v4uint %50 %50 3 3 3 3 + %58 = OpIAdd %uint %41 %uint_3 + OpImageWrite %36 %58 %56 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/buffer-write.asm.comp b/shaders-opencl/asm/comp/buffer-write.asm.comp new file mode 100644 index 000000000..697324ba6 --- /dev/null +++ b/shaders-opencl/asm/comp/buffer-write.asm.comp @@ -0,0 +1,59 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 63 +; Schema: 0 + OpCapability Shader + OpCapability ImageBuffer + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %group_id %group_index + OpExecutionMode %main LocalSize 32 1 1 + OpSource HLSL 500 + OpName %main "main" + OpName %cb "cb" + OpMemberName %cb 0 "value" + OpName %_ "" + OpName %buffer "buffer" + OpName %group_id "group_id" + OpName %group_index "group_index" + OpMemberDecorate %cb 0 Offset 0 + OpDecorate %cb Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 7 + OpDecorate %buffer DescriptorSet 0 + OpDecorate %buffer Binding 0 + OpDecorate %group_id BuiltIn WorkgroupId + OpDecorate %group_index BuiltIn LocalInvocationIndex + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 + %uint_32 = OpConstant %uint 32 + %float = OpTypeFloat 32 + %cb = OpTypeStruct %float +%_ptr_Uniform_cb = OpTypePointer Uniform %cb + %_ = OpVariable %_ptr_Uniform_cb Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %34 = OpTypeImage %float Buffer 0 0 0 2 R32f +%_ptr_UniformConstant_34 = OpTypePointer UniformConstant %34 + %buffer = OpVariable %_ptr_UniformConstant_34 UniformConstant +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %group_id = OpVariable %_ptr_Input_v3uint Input +%_ptr_Input_uint = OpTypePointer Input %uint +%group_index = OpVariable %_ptr_Input_uint Input + %main = OpFunction %void None %3 + %5 = OpLabel + %43 = OpLoad %v3uint %group_id + %47 = OpLoad %uint %group_index + %56 = OpCompositeExtract %uint %43 0 + %57 = OpIMul %uint %uint_32 %56 + %59 = OpIAdd %uint %57 %47 + %60 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %61 = OpLoad %float %60 + %62 = OpLoad %34 %buffer + OpImageWrite %62 %59 %61 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp b/shaders-opencl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp new file mode 100644 index 000000000..b01262f5b --- /dev/null +++ b/shaders-opencl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp @@ -0,0 +1,43 @@ +OpCapability Shader +OpExtension "SPV_KHR_storage_buffer_storage_class" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %main "main" %var_id +OpExecutionMode %main LocalSize 1 1 1 +OpDecorate %var_id BuiltIn GlobalInvocationId +OpDecorate %var_input Binding 0 +OpDecorate %var_input DescriptorSet 0 +OpDecorate %var_outdata Binding 1 +OpDecorate %var_outdata DescriptorSet 0 +OpMemberDecorate %type_container_struct 0 Offset 0 +OpMemberDecorate %type_container_struct 1 Offset 4 +OpMemberDecorate %type_container_struct 2 Offset 8 +OpMemberDecorate %type_container_struct 3 Offset 12 +OpDecorate %type_container_struct Block +%bool = OpTypeBool +%void = OpTypeVoid +%voidf = OpTypeFunction %void +%u32 = OpTypeInt 32 0 +%i32 = OpTypeInt 32 1 +%f32 = OpTypeFloat 32 +%uvec3 = OpTypeVector %u32 3 +%fvec3 = OpTypeVector %f32 3 +%uvec3ptr = OpTypePointer Input %uvec3 +%i32ptr = OpTypePointer Uniform %i32 +%f32ptr = OpTypePointer Uniform %f32 +%i32arr = OpTypeRuntimeArray %i32 +%f32arr = OpTypeRuntimeArray %f32 +%type_empty_struct = OpTypeStruct +%type_container_struct = OpTypeStruct %i32 %type_empty_struct %type_empty_struct %i32 +%type_container_struct_ubo_ptr = OpTypePointer Uniform %type_container_struct +%type_container_struct_ssbo_ptr = OpTypePointer StorageBuffer %type_container_struct +%var_id = OpVariable %uvec3ptr Input +%var_input = OpVariable %type_container_struct_ssbo_ptr StorageBuffer +%var_outdata = OpVariable %type_container_struct_ssbo_ptr StorageBuffer + +%main = OpFunction %void None %voidf +%label = OpLabel +%input_copy = OpCopyObject %type_container_struct_ssbo_ptr %var_input +%result = OpLoad %type_container_struct %input_copy +OpStore %var_outdata %result +OpReturn +OpFunctionEnd diff --git a/shaders-opencl/asm/comp/copy-object-ubo-to-ssbo.asm.comp b/shaders-opencl/asm/comp/copy-object-ubo-to-ssbo.asm.comp new file mode 100644 index 000000000..63df59ac3 --- /dev/null +++ b/shaders-opencl/asm/comp/copy-object-ubo-to-ssbo.asm.comp @@ -0,0 +1,43 @@ +OpCapability Shader +OpExtension "SPV_KHR_storage_buffer_storage_class" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %main "main" %var_id +OpExecutionMode %main LocalSize 1 1 1 +OpDecorate %var_id BuiltIn GlobalInvocationId +OpDecorate %var_input Binding 0 +OpDecorate %var_input DescriptorSet 0 +OpDecorate %var_outdata Binding 1 +OpDecorate %var_outdata DescriptorSet 0 +OpMemberDecorate %type_container_struct 0 Offset 0 +OpMemberDecorate %type_container_struct 1 Offset 16 +OpMemberDecorate %type_container_struct 2 Offset 32 +OpMemberDecorate %type_container_struct 3 Offset 48 +OpDecorate %type_container_struct Block +%bool = OpTypeBool +%void = OpTypeVoid +%voidf = OpTypeFunction %void +%u32 = OpTypeInt 32 0 +%i32 = OpTypeInt 32 1 +%f32 = OpTypeFloat 32 +%uvec3 = OpTypeVector %u32 3 +%fvec3 = OpTypeVector %f32 3 +%uvec3ptr = OpTypePointer Input %uvec3 +%i32ptr = OpTypePointer Uniform %i32 +%f32ptr = OpTypePointer Uniform %f32 +%i32arr = OpTypeRuntimeArray %i32 +%f32arr = OpTypeRuntimeArray %f32 +%type_empty_struct = OpTypeStruct +%type_container_struct = OpTypeStruct %i32 %type_empty_struct %type_empty_struct %i32 +%type_container_struct_ubo_ptr = OpTypePointer Uniform %type_container_struct +%type_container_struct_ssbo_ptr = OpTypePointer StorageBuffer %type_container_struct +%var_id = OpVariable %uvec3ptr Input +%var_input = OpVariable %type_container_struct_ubo_ptr Uniform +%var_outdata = OpVariable %type_container_struct_ssbo_ptr StorageBuffer + +%main = OpFunction %void None %voidf +%label = OpLabel +%input_copy = OpCopyObject %type_container_struct_ubo_ptr %var_input +%result = OpLoad %type_container_struct %input_copy +OpStore %var_outdata %result +OpReturn +OpFunctionEnd diff --git a/shaders-opencl/asm/comp/duplicate-spec-id.asm.comp b/shaders-opencl/asm/comp/duplicate-spec-id.asm.comp new file mode 100644 index 000000000..4a5aa3d8b --- /dev/null +++ b/shaders-opencl/asm/comp/duplicate-spec-id.asm.comp @@ -0,0 +1,54 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 11 +; Bound: 26 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %StorageBuffer "StorageBuffer" + OpMemberName %StorageBuffer 0 "values" + OpName %ssbo "ssbo" + OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex" + OpName %foo "foo" + OpName %bar "bar" + OpDecorate %_runtimearr_float ArrayStride 4 + OpMemberDecorate %StorageBuffer 0 Offset 0 + OpDecorate %StorageBuffer Block + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %foo SpecId 0 + OpDecorate %bar SpecId 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_runtimearr_float = OpTypeRuntimeArray %float +%StorageBuffer = OpTypeStruct %_runtimearr_float +%_ptr_StorageBuffer_StorageBuffer = OpTypePointer StorageBuffer %StorageBuffer + %ssbo = OpVariable %_ptr_StorageBuffer_StorageBuffer StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %foo = OpSpecConstant %int 1 + %bar = OpSpecConstant %float 2 +%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpLoad %uint %gl_LocalInvocationIndex + %18 = OpConvertSToF %float %foo + %20 = OpFAdd %float %18 %bar + %22 = OpAccessChain %_ptr_StorageBuffer_float %ssbo %int_0 %16 + OpStore %22 %20 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/fma.spv16.asm.comp b/shaders-opencl/asm/comp/fma.spv16.asm.comp new file mode 100644 index 000000000..ec57f19fe --- /dev/null +++ b/shaders-opencl/asm/comp/fma.spv16.asm.comp @@ -0,0 +1,65 @@ +; SPIR-V +; Version: 1.6 +; Generator: Khronos Glslang Reference Front End; 11 +; Bound: 30 +; Schema: 0 + OpCapability Shader + OpCapability FMAKHR + OpExtension "SPV_KHR_fma" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %_ %__0 + OpExecutionMode %main LocalSize 1 1 1 + OpSource ESSL 310 + OpName %main "main" + OpName %SSBO2 "SSBO2" + OpMemberName %SSBO2 0 "out_data" + OpName %_ "" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "in_data" + OpName %__0 "" + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpDecorate %SSBO2 Block + OpMemberDecorate %SSBO2 0 NonReadable + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %_ NonReadable + OpDecorate %_ Binding 1 + OpDecorate %_ DescriptorSet 0 + OpDecorate %_runtimearr_v4float_0 ArrayStride 16 + OpDecorate %SSBO Block + OpMemberDecorate %SSBO 0 NonWritable + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %__0 NonWritable + OpDecorate %__0 Binding 0 + OpDecorate %__0 DescriptorSet 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %SSBO2 = OpTypeStruct %_runtimearr_v4float +%_ptr_StorageBuffer_SSBO2 = OpTypePointer StorageBuffer %SSBO2 + %_ = OpVariable %_ptr_StorageBuffer_SSBO2 StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_runtimearr_v4float_0 = OpTypeRuntimeArray %v4float + %SSBO = OpTypeStruct %_runtimearr_v4float_0 +%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO + %__0 = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer +%_ptr_StorageBuffer_v4float = OpTypePointer StorageBuffer %v4float + %int_1 = OpConstant %int 1 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 + %29 = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpAccessChain %_ptr_StorageBuffer_v4float %__0 %int_0 %int_0 + %20 = OpLoad %v4float %19 + %22 = OpAccessChain %_ptr_StorageBuffer_v4float %__0 %int_0 %int_1 + %23 = OpLoad %v4float %22 + %24 = OpFmaKHR %v4float %20 %23 %23 + %25 = OpAccessChain %_ptr_StorageBuffer_v4float %_ %int_0 %int_0 + OpStore %25 %24 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/global-parameter-name-alias.asm.comp b/shaders-opencl/asm/comp/global-parameter-name-alias.asm.comp new file mode 100644 index 000000000..78b1dc74e --- /dev/null +++ b/shaders-opencl/asm/comp/global-parameter-name-alias.asm.comp @@ -0,0 +1,102 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 6 +; Bound: 61 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %id_1 + OpExecutionMode %main LocalSize 1 1 1 + OpSource HLSL 500 + OpName %main "main" + OpName %Load_u1_ "Load(u1;" + OpName %size "size" + OpName %_main_vu3_ "@main(vu3;" + OpName %id "id" + OpName %data "data" + OpName %byteAddrTemp "byteAddrTemp" + OpName %ssbo "ssbo" + OpMemberName %ssbo 0 "@data" + OpName %ssbo_0 "ssbo" + OpName %param "param" + OpName %id_0 "id" + OpName %id_1 "id" + OpName %param_0 "param" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %ssbo 0 NonWritable + OpMemberDecorate %ssbo 0 Offset 0 + OpDecorate %ssbo BufferBlock + OpDecorate %ssbo_0 DescriptorSet 0 + OpDecorate %ssbo_0 Binding 1 + OpDecorate %id_1 BuiltIn GlobalInvocationId + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %8 = OpTypeFunction %void %_ptr_Function_uint + %v3uint = OpTypeVector %uint 3 +%_ptr_Function_v3uint = OpTypePointer Function %v3uint + %14 = OpTypeFunction %void %_ptr_Function_v3uint + %v4uint = OpTypeVector %uint 4 +%_ptr_Function_v4uint = OpTypePointer Function %v4uint + %int = OpTypeInt 32 1 +%_ptr_Function_int = OpTypePointer Function %int + %int_2 = OpConstant %int 2 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %ssbo = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_ssbo = OpTypePointer Uniform %ssbo + %ssbo_0 = OpVariable %_ptr_Uniform_ssbo Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %int_1 = OpConstant %int 1 + %int_3 = OpConstant %int 3 + %uint_4 = OpConstant %uint 4 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %id_1 = OpVariable %_ptr_Input_v3uint Input + %main = OpFunction %void None %3 + %5 = OpLabel + %id_0 = OpVariable %_ptr_Function_v3uint Function + %param_0 = OpVariable %_ptr_Function_v3uint Function + %57 = OpLoad %v3uint %id_1 + OpStore %id_0 %57 + %59 = OpLoad %v3uint %id_0 + OpStore %param_0 %59 + %60 = OpFunctionCall %void %_main_vu3_ %param_0 + OpReturn + OpFunctionEnd + %Load_u1_ = OpFunction %void None %8 + %size = OpFunctionParameter %_ptr_Function_uint + %11 = OpLabel + %data = OpVariable %_ptr_Function_v4uint Function +%byteAddrTemp = OpVariable %_ptr_Function_int Function + %24 = OpLoad %uint %size + %26 = OpShiftRightLogical %int %24 %int_2 + OpStore %byteAddrTemp %26 + %32 = OpLoad %int %byteAddrTemp + %34 = OpAccessChain %_ptr_Uniform_uint %ssbo_0 %int_0 %32 + %35 = OpLoad %uint %34 + %36 = OpLoad %int %byteAddrTemp + %38 = OpIAdd %int %36 %int_1 + %39 = OpAccessChain %_ptr_Uniform_uint %ssbo_0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpLoad %int %byteAddrTemp + %42 = OpIAdd %int %41 %int_2 + %43 = OpAccessChain %_ptr_Uniform_uint %ssbo_0 %int_0 %42 + %44 = OpLoad %uint %43 + %45 = OpLoad %int %byteAddrTemp + %47 = OpIAdd %int %45 %int_3 + %48 = OpAccessChain %_ptr_Uniform_uint %ssbo_0 %int_0 %47 + %49 = OpLoad %uint %48 + %50 = OpCompositeConstruct %v4uint %35 %40 %44 %49 + OpStore %data %50 + OpReturn + OpFunctionEnd + %_main_vu3_ = OpFunction %void None %14 + %id = OpFunctionParameter %_ptr_Function_v3uint + %17 = OpLabel + %param = OpVariable %_ptr_Function_uint Function + OpStore %param %uint_4 + %53 = OpFunctionCall %void %Load_u1_ %param + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp b/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp new file mode 100644 index 000000000..8f759293e --- /dev/null +++ b/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp @@ -0,0 +1,75 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 44 +; Schema: 0 + OpCapability Shader + OpCapability StorageImageExtendedFormats + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %id_1 + OpExecutionMode %main LocalSize 1 1 1 + OpSource HLSL 500 + OpName %main "main" + OpName %_main_vu3_ "@main(vu3;" + OpName %id "id" + OpName %loaded "loaded" + OpName %TargetTexture "TargetTexture" + OpName %storeTemp "storeTemp" + OpName %id_0 "id" + OpName %id_1 "id" + OpName %param "param" + OpDecorate %TargetTexture DescriptorSet 0 + OpDecorate %TargetTexture Binding 0 + OpDecorate %id_1 BuiltIn WorkgroupId + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Function_v3uint = OpTypePointer Function %v3uint + %9 = OpTypeFunction %void %_ptr_Function_v3uint + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 +%_ptr_Function_v2float = OpTypePointer Function %v2float + %17 = OpTypeImage %float 2D 0 0 0 2 Rg32f +%_ptr_UniformConstant_17 = OpTypePointer UniformConstant %17 +%TargetTexture = OpVariable %_ptr_UniformConstant_17 UniformConstant + %v2uint = OpTypeVector %uint 2 + %float_1 = OpConstant %float 1 + %uint_1 = OpConstant %uint 1 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %id_1 = OpVariable %_ptr_Input_v3uint Input + %main = OpFunction %void None %3 + %5 = OpLabel + %id_0 = OpVariable %_ptr_Function_v3uint Function + %param = OpVariable %_ptr_Function_v3uint Function + %40 = OpLoad %v3uint %id_1 + OpStore %id_0 %40 + %42 = OpLoad %v3uint %id_0 + OpStore %param %42 + %43 = OpFunctionCall %void %_main_vu3_ %param + OpReturn + OpFunctionEnd + %_main_vu3_ = OpFunction %void None %9 + %id = OpFunctionParameter %_ptr_Function_v3uint + %12 = OpLabel + %loaded = OpVariable %_ptr_Function_v2float Function + %storeTemp = OpVariable %_ptr_Function_v2float Function + %20 = OpLoad %17 %TargetTexture + %22 = OpLoad %v3uint %id + %23 = OpVectorShuffle %v2uint %22 %22 0 1 + %24 = OpImageRead %v2float %20 %23 + OpStore %loaded %24 + %26 = OpLoad %v2float %loaded + %28 = OpCompositeConstruct %v2float %float_1 %float_1 + %29 = OpFAdd %v2float %26 %28 + OpStore %storeTemp %29 + %30 = OpLoad %17 %TargetTexture + %31 = OpLoad %v3uint %id + %32 = OpVectorShuffle %v2uint %31 %31 0 1 + %34 = OpCompositeConstruct %v2uint %uint_1 %uint_1 + %35 = OpIAdd %v2uint %32 %34 + %36 = OpLoad %v2float %storeTemp + OpImageWrite %30 %35 %36 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/op-spec-constant-op-vector-related.asm.comp b/shaders-opencl/asm/comp/op-spec-constant-op-vector-related.asm.comp new file mode 100644 index 000000000..65a7eedd9 --- /dev/null +++ b/shaders-opencl/asm/comp/op-spec-constant-op-vector-related.asm.comp @@ -0,0 +1,107 @@ +OpCapability Shader +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %main "main" %id +OpExecutionMode %main LocalSize 1 1 1 +OpName %main "main" +OpName %id "gl_GlobalInvocationID" +OpDecorate %id BuiltIn GlobalInvocationId +OpDecorate %sc_0 SpecId 0 +OpDecorate %sc_1 SpecId 1 +OpDecorate %sc_2 SpecId 2 +OpDecorate %i32arr ArrayStride 4 +OpDecorate %buf BufferBlock +OpDecorate %indata DescriptorSet 0 +OpDecorate %indata Binding 0 +OpDecorate %outdata DescriptorSet 0 +OpDecorate %outdata Binding 1 +OpDecorate %f32arr ArrayStride 4 +OpMemberDecorate %buf 0 Offset 0 +%bool = OpTypeBool +%void = OpTypeVoid +%voidf = OpTypeFunction %void +%u32 = OpTypeInt 32 0 +%i32 = OpTypeInt 32 1 +%f32 = OpTypeFloat 32 +%uvec3 = OpTypeVector %u32 3 +%fvec3 = OpTypeVector %f32 3 +%uvec3ptr = OpTypePointer Input %uvec3 +%i32ptr = OpTypePointer Uniform %i32 +%f32ptr = OpTypePointer Uniform %f32 +%i32arr = OpTypeRuntimeArray %i32 +%f32arr = OpTypeRuntimeArray %f32 +%ivec3 = OpTypeVector %i32 3 +%zero = OpConstant %i32 0 +%one = OpConstant %i32 1 +%two = OpConstant %i32 2 +%three = OpConstant %i32 3 +%iarr3 = OpTypeArray %i32 %three +%imat3 = OpTypeArray %iarr3 %three +%struct = OpTypeStruct %imat3 +%buf = OpTypeStruct %i32arr +%bufptr = OpTypePointer Uniform %buf +%indata = OpVariable %bufptr Uniform +%outdata = OpVariable %bufptr Uniform +%id = OpVariable %uvec3ptr Input +%ivec3_0 = OpConstantComposite %ivec3 %zero %zero %zero +%vec3_undef = OpUndef %ivec3 +%iarr3_0 = OpConstantComposite %iarr3 %zero %zero %zero +%imat3_0 = OpConstantComposite %imat3 %iarr3_0 %iarr3_0 %iarr3_0 +%struct_0 = OpConstantComposite %struct %imat3_0 +%sc_0 = OpSpecConstant %i32 0 +%sc_1 = OpSpecConstant %i32 0 +%sc_2 = OpSpecConstant %i32 0 +%iarr3_a = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_0 0 +%iarr3_b = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_a 1 +%iarr3_c = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_b 2 +%iarr3_d = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_0 0 +%iarr3_e = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_d 1 +%iarr3_f = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_e 2 +%iarr3_g = OpSpecConstantOp %iarr3 CompositeInsert %sc_2 %iarr3_0 0 +%iarr3_h = OpSpecConstantOp %iarr3 CompositeInsert %sc_0 %iarr3_g 1 +%iarr3_i = OpSpecConstantOp %iarr3 CompositeInsert %sc_1 %iarr3_h 2 +%imat3_a = OpSpecConstantOp %imat3 CompositeInsert %iarr3_c %imat3_0 0 +%imat3_b = OpSpecConstantOp %imat3 CompositeInsert %iarr3_f %imat3_a 1 +%imat3_c = OpSpecConstantOp %imat3 CompositeInsert %iarr3_i %imat3_b 2 +%struct_a = OpSpecConstantOp %struct CompositeInsert %imat3_c %struct_0 0 +%struct_b = OpSpecConstantOp %struct CompositeInsert %sc_2 %struct_a 0 1 2 +%comp_0_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 0 0 +%comp_1_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 1 0 +%comp_0_1 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 0 1 +%comp_2_2 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 2 2 +%comp_2_0 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 2 0 +%comp_1_1 = OpSpecConstantOp %i32 CompositeExtract %struct_a 0 1 1 +%cmpres_0 = OpSpecConstantOp %bool IEqual %comp_0_0 %comp_1_0 +%cmpres_1 = OpSpecConstantOp %bool IEqual %comp_0_1 %comp_2_2 +%cmpres_2 = OpSpecConstantOp %bool IEqual %comp_2_0 %comp_1_1 +%mustbe_0 = OpSpecConstantOp %i32 Select %cmpres_0 %one %zero +%mustbe_1 = OpSpecConstantOp %i32 Select %cmpres_1 %one %zero +%mustbe_2 = OpSpecConstantOp %i32 Select %cmpres_2 %two %one +%sc_vec3_0 = OpSpecConstantOp %ivec3 CompositeInsert %sc_0 %ivec3_0 0 +%sc_vec3_1 = OpSpecConstantOp %ivec3 CompositeInsert %sc_1 %ivec3_0 1 +%sc_vec3_2 = OpSpecConstantOp %ivec3 CompositeInsert %sc_2 %ivec3_0 2 +%sc_vec3_0_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0 %vec3_undef 0 0xFFFFFFFF 2 +%sc_vec3_1_s = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_1 %vec3_undef 0xFFFFFFFF 1 0 +%sc_vec3_2_s = OpSpecConstantOp %ivec3 VectorShuffle %vec3_undef %sc_vec3_2 5 0xFFFFFFFF 5 +%sc_vec3_01 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_0_s %sc_vec3_1_s 1 0 4 +%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle %sc_vec3_01 %sc_vec3_2_s 5 1 2 +%sc_ext_0 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 0 +%sc_ext_1 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 1 +%sc_ext_2 = OpSpecConstantOp %i32 CompositeExtract %sc_vec3_012 2 +%sc_sub = OpSpecConstantOp %i32 ISub %sc_ext_0 %sc_ext_1 +%sc_factor = OpSpecConstantOp %i32 IMul %sc_sub %sc_ext_2 +%main = OpFunction %void None %voidf +%label = OpLabel +%subf_a = OpISub %i32 %one %mustbe_0 +%subf_b = OpIMul %i32 %subf_a %mustbe_1 +%subf_c = OpISub %i32 %mustbe_2 %one +%factor = OpIMul %i32 %subf_b %subf_c +%sc_final = OpIMul %i32 %factor %sc_factor +%idval = OpLoad %uvec3 %id +%x = OpCompositeExtract %u32 %idval 0 +%inloc = OpAccessChain %i32ptr %indata %zero %x +%inval = OpLoad %i32 %inloc +%final = OpIAdd %i32 %inval %sc_final +%outloc = OpAccessChain %i32ptr %outdata %zero %x + OpStore %outloc %final + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/quantize.asm.comp b/shaders-opencl/asm/comp/quantize.asm.comp new file mode 100644 index 000000000..f5afc6570 --- /dev/null +++ b/shaders-opencl/asm/comp/quantize.asm.comp @@ -0,0 +1,67 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 38 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %4 "main" + OpExecutionMode %4 LocalSize 1 1 1 + OpSource ESSL 310 + OpName %4 "main" + OpName %10 "SSBO0" + OpMemberName %10 0 "scalar" + OpMemberName %10 1 "vec2_val" + OpMemberName %10 2 "vec3_val" + OpMemberName %10 3 "vec4_val" + OpName %12 "" + OpMemberDecorate %10 0 Offset 0 + OpMemberDecorate %10 1 Offset 8 + OpMemberDecorate %10 2 Offset 16 + OpMemberDecorate %10 3 Offset 32 + OpDecorate %10 BufferBlock + OpDecorate %12 DescriptorSet 0 + OpDecorate %12 Binding 0 + %2 = OpTypeVoid + %3 = OpTypeFunction %2 + %6 = OpTypeFloat 32 + %7 = OpTypeVector %6 2 + %8 = OpTypeVector %6 3 + %9 = OpTypeVector %6 4 + %10 = OpTypeStruct %6 %7 %8 %9 + %11 = OpTypePointer Uniform %10 + %12 = OpVariable %11 Uniform + %13 = OpTypeInt 32 1 + %14 = OpConstant %13 0 + %15 = OpTypePointer Uniform %6 + %20 = OpConstant %13 1 + %21 = OpTypePointer Uniform %7 + %26 = OpConstant %13 2 + %27 = OpTypePointer Uniform %8 + %32 = OpConstant %13 3 + %33 = OpTypePointer Uniform %9 + %4 = OpFunction %2 None %3 + %5 = OpLabel + %16 = OpAccessChain %15 %12 %14 + %17 = OpLoad %6 %16 + %18 = OpQuantizeToF16 %6 %17 + %19 = OpAccessChain %15 %12 %14 + OpStore %19 %18 + %22 = OpAccessChain %21 %12 %20 + %23 = OpLoad %7 %22 + %24 = OpQuantizeToF16 %7 %23 + %25 = OpAccessChain %21 %12 %20 + OpStore %25 %24 + %28 = OpAccessChain %27 %12 %26 + %29 = OpLoad %8 %28 + %30 = OpQuantizeToF16 %8 %29 + %31 = OpAccessChain %27 %12 %26 + OpStore %31 %30 + %34 = OpAccessChain %33 %12 %32 + %35 = OpLoad %9 %34 + %36 = OpQuantizeToF16 %9 %35 + %37 = OpAccessChain %33 %12 %32 + OpStore %37 %36 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp b/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp new file mode 100644 index 000000000..dd909426d --- /dev/null +++ b/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp @@ -0,0 +1,108 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 63 +; Schema: 0 + OpCapability Shader + OpCapability StorageBuffer16BitAccess + OpCapability StorageBuffer8BitAccess + OpCapability UniformAndStorageBuffer8BitAccess + OpExtension "SPV_KHR_8bit_storage" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_LocalInvocationID %gl_GlobalInvocationID %gl_WorkGroupID %gl_NumWorkGroups + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_shader_16bit_storage" + OpSourceExtension "GL_EXT_shader_8bit_storage" + OpName %main "main" + OpName %foo "foo" + OpMemberName %foo 0 "bar" + OpMemberName %foo 1 "baz" + OpMemberName %foo 2 "quux" + OpMemberName %foo 3 "blah" + OpMemberName %foo 4 "wibble" + OpName %_ "" + OpName %gl_LocalInvocationID "gl_LocalInvocationID" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpName %gl_WorkGroupID "gl_WorkGroupID" + OpName %gl_NumWorkGroups "gl_NumWorkGroups" + OpMemberDecorate %foo 0 Offset 0 + OpMemberDecorate %foo 1 Offset 4 + OpMemberDecorate %foo 2 Offset 16 + OpMemberDecorate %foo 3 Offset 17 + OpMemberDecorate %foo 4 Offset 22 + OpDecorate %foo BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupID BuiltIn WorkgroupId + OpDecorate %gl_NumWorkGroups BuiltIn NumWorkgroups + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 + %uchar = OpTypeInt 8 0 + %v4uchar = OpTypeVector %uchar 4 + %half = OpTypeFloat 16 + %v2half = OpTypeVector %half 2 + %foo = OpTypeStruct %uint %v3float %uchar %v4uchar %v2half +%_ptr_Uniform_foo = OpTypePointer Uniform %foo + %_ = OpVariable %_ptr_Uniform_foo Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_LocalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %int_1 = OpConstant %int 1 +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Uniform_v3float = OpTypePointer Uniform %v3float + %int_3 = OpConstant %int 3 +%_ptr_Uniform_v4uchar = OpTypePointer Uniform %v4uchar + %v4uint = OpTypeVector %uint 4 +%gl_WorkGroupID = OpVariable %_ptr_Input_v3uint Input + %int_4 = OpConstant %int 4 +%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half + %v2float = OpTypeVector %float 2 +%gl_NumWorkGroups = OpVariable %_ptr_Input_v3uint Input + %v2uint = OpTypeVector %uint 2 + %main = OpFunction %void None %3 + %5 = OpLabel + %23 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0 + %24 = OpLoad %uint %23 + %26 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + OpStore %26 %24 + %29 = OpLoad %v3uint %gl_GlobalInvocationID + %30 = OpConvertUToF %v3float %29 + %32 = OpAccessChain %_ptr_Uniform_v3float %_ %int_1 + OpStore %32 %30 + %35 = OpAccessChain %_ptr_Uniform_v4uchar %_ %int_3 + %36 = OpLoad %v4uchar %35 + %38 = OpUConvert %v4uint %36 + %39 = OpVectorShuffle %v3uint %38 %38 0 1 2 + %41 = OpLoad %v3uint %gl_WorkGroupID + %42 = OpIAdd %v3uint %39 %41 + %43 = OpCompositeExtract %uint %42 0 + %44 = OpCompositeExtract %uint %42 1 + %45 = OpCompositeExtract %uint %42 2 + %46 = OpCompositeConstruct %v4uint %43 %44 %45 %uint_0 + %47 = OpUConvert %v4uchar %46 + %48 = OpAccessChain %_ptr_Uniform_v4uchar %_ %int_3 + OpStore %48 %47 + %51 = OpAccessChain %_ptr_Uniform_v2half %_ %int_4 + %52 = OpLoad %v2half %51 + %54 = OpFConvert %v2float %52 + %57 = OpLoad %v3uint %gl_NumWorkGroups + %58 = OpVectorShuffle %v2uint %57 %57 0 1 + %59 = OpConvertUToF %v2float %58 + %60 = OpFMul %v2float %54 %59 + %61 = OpFConvert %v2half %60 + %62 = OpAccessChain %_ptr_Uniform_v2half %_ %int_4 + OpStore %62 %61 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp b/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp new file mode 100644 index 000000000..f6f699e74 --- /dev/null +++ b/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp @@ -0,0 +1,81 @@ +; SPIR-V +; Version: 1.6 +; Generator: Khronos Glslang Reference Front End; 11 +; Bound: 41 +; Schema: 0 + OpCapability Shader + OpCapability ReplicatedCompositesEXT + OpExtension "SPV_EXT_replicated_composites" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %ubo + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_float16" + OpSourceExtension "GL_KHR_memory_scope_semantics" + OpName %main "main" + OpName %a "a" + OpName %b "b" + OpName %c "c" + OpName %spec_const "spec_const" + OpName %array "array" + OpName %d "d" + OpName %UBO "UBO" + OpMemberName %UBO 0 "uniform_float" + OpName %ubo "ubo" + OpName %e "e" + OpName %f "f" + OpDecorate %spec_const SpecId 0 + OpDecorate %UBO Block + OpMemberDecorate %UBO 0 Offset 0 + OpDecorate %ubo Binding 0 + OpDecorate %ubo DescriptorSet 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Function_v4float = OpTypePointer Function %v4float + %float_0 = OpConstant %float 0 + %11 = OpConstantCompositeReplicateEXT %v4float %float_0 +%mat4v4float = OpTypeMatrix %v4float 4 +%_ptr_Function_mat4v4float = OpTypePointer Function %mat4v4float + %float_1 = OpConstant %float 1 + %16 = OpConstantCompositeReplicateEXT %v4float %float_1 + %17 = OpConstantCompositeReplicateEXT %mat4v4float %16 + %spec_const = OpSpecConstant %float 0 + %20 = OpSpecConstantCompositeReplicateEXT %v4float %spec_const + %uint = OpTypeInt 32 0 + %uint_8 = OpConstant %uint 8 +%_arr_float_uint_8 = OpTypeArray %float %uint_8 +%_ptr_Function__arr_float_uint_8 = OpTypePointer Function %_arr_float_uint_8 + %26 = OpConstantCompositeReplicateEXT %_arr_float_uint_8 %float_1 + %UBO = OpTypeStruct %float +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO + %ubo = OpVariable %_ptr_Uniform_UBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %main = OpFunction %void None %3 + %5 = OpLabel + %a = OpVariable %_ptr_Function_v4float Function + %b = OpVariable %_ptr_Function_mat4v4float Function + %c = OpVariable %_ptr_Function_v4float Function + %array = OpVariable %_ptr_Function__arr_float_uint_8 Function + %d = OpVariable %_ptr_Function_v4float Function + %e = OpVariable %_ptr_Function_mat4v4float Function + %f = OpVariable %_ptr_Function__arr_float_uint_8 Function + OpStore %a %11 + OpStore %b %17 + OpStore %c %20 + OpStore %array %26 + %34 = OpAccessChain %_ptr_Uniform_float %ubo %int_0 + %35 = OpLoad %float %34 + %36 = OpCompositeConstructReplicateEXT %v4float %35 + OpStore %d %36 + %38 = OpLoad %v4float %d + %39 = OpCompositeConstructReplicateEXT %mat4v4float %38 + OpStore %e %39 + %40 = OpCompositeConstructReplicateEXT %_arr_float_uint_8 %35 + OpStore %f %40 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/specialization-constant-workgroup.asm.comp b/shaders-opencl/asm/comp/specialization-constant-workgroup.asm.comp new file mode 100644 index 000000000..188e3fec3 --- /dev/null +++ b/shaders-opencl/asm/comp/specialization-constant-workgroup.asm.comp @@ -0,0 +1,47 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 1 +; Bound: 24 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 20 1 + OpSource ESSL 310 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %19 SpecId 10 + OpDecorate %21 SpecId 12 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %SSBO = OpTypeStruct %float +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %uint = OpTypeInt 32 0 + %19 = OpSpecConstant %uint 9 + %uint_20 = OpConstant %uint 20 + %21 = OpSpecConstant %uint 4 + %v3uint = OpTypeVector %uint 3 +%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %19 %uint_20 %21 + %main = OpFunction %void None %3 + %5 = OpLabel + %14 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %15 = OpLoad %float %14 + %16 = OpFAdd %float %15 %float_1 + %17 = OpAccessChain %_ptr_Uniform_float %_ %int_0 + OpStore %17 %16 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/struct-resource-name-aliasing.asm.comp b/shaders-opencl/asm/comp/struct-resource-name-aliasing.asm.comp new file mode 100644 index 000000000..384da305a --- /dev/null +++ b/shaders-opencl/asm/comp/struct-resource-name-aliasing.asm.comp @@ -0,0 +1,49 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 21 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 8 8 1 + OpSource HLSL 500 + OpName %main "main" + OpName %_main_ "@main(" + OpName %bufA "bufA" + OpMemberName %bufA 0 "@data" + OpName %bufA_0 "bufA" + OpName %bufB "bufB" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %bufA 0 Offset 0 + OpDecorate %bufA BufferBlock + OpDecorate %bufA_0 DescriptorSet 0 + OpDecorate %bufB DescriptorSet 0 + OpDecorate %bufA_0 Binding 0 + OpDecorate %bufB Binding 1 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %bufA = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_bufA = OpTypePointer Uniform %bufA + %bufA_0 = OpVariable %_ptr_Uniform_bufA Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_0 = OpConstant %uint 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %bufB = OpVariable %_ptr_Uniform_bufA Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %20 = OpFunctionCall %void %_main_ + OpReturn + OpFunctionEnd + %_main_ = OpFunction %void None %3 + %7 = OpLabel + %17 = OpAccessChain %_ptr_Uniform_uint %bufA_0 %int_0 %int_0 + OpStore %17 %uint_0 + %19 = OpAccessChain %_ptr_Uniform_uint %bufB %int_0 %int_0 + OpStore %19 %uint_0 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/uint_smulextended.asm.comp b/shaders-opencl/asm/comp/uint_smulextended.asm.comp new file mode 100644 index 000000000..32d483636 --- /dev/null +++ b/shaders-opencl/asm/comp/uint_smulextended.asm.comp @@ -0,0 +1,61 @@ + OpCapability Shader + + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationId + OpExecutionMode %main LocalSize 1 1 1 + + OpDecorate %gl_GlobalInvocationId BuiltIn GlobalInvocationId + OpDecorate %ra_uint ArrayStride 4 + OpDecorate %struct_uint4 BufferBlock + OpMemberDecorate %struct_uint4 0 Offset 0 + OpDecorate %input0 DescriptorSet 0 + OpDecorate %input0 Binding 0 + OpDecorate %input1 DescriptorSet 0 + OpDecorate %input1 Binding 1 + OpDecorate %output0 DescriptorSet 0 + OpDecorate %output0 Binding 2 + OpDecorate %output1 DescriptorSet 0 + OpDecorate %output1 Binding 3 + + %uint = OpTypeInt 32 0 + %ptr_uint = OpTypePointer Uniform %uint + %ptr_input_uint = OpTypePointer Input %uint + %uint3 = OpTypeVector %uint 3 + %ptr_input_uint3 = OpTypePointer Input %uint3 + %void = OpTypeVoid + %voidFn = OpTypeFunction %void + + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %ra_uint = OpTypeRuntimeArray %uint + %uint4 = OpTypeVector %uint 4 + %struct_uint4 = OpTypeStruct %ra_uint + %ptr_struct_uint4 = OpTypePointer Uniform %struct_uint4 + %resulttype = OpTypeStruct %uint %uint +%gl_GlobalInvocationId = OpVariable %ptr_input_uint3 Input + %input0 = OpVariable %ptr_struct_uint4 Uniform + %input1 = OpVariable %ptr_struct_uint4 Uniform + + %output0 = OpVariable %ptr_struct_uint4 Uniform + %output1 = OpVariable %ptr_struct_uint4 Uniform + + %main = OpFunction %void None %voidFn + %mainStart = OpLabel + %index_ptr = OpAccessChain %ptr_input_uint %gl_GlobalInvocationId %uint_0 + %index = OpLoad %uint %index_ptr + %in_ptr0 = OpAccessChain %ptr_uint %input0 %uint_0 %index + %invalue0 = OpLoad %uint %in_ptr0 + %in_ptr1 = OpAccessChain %ptr_uint %input1 %uint_0 %index + %invalue1 = OpLoad %uint %in_ptr1 + + %outvalue = OpSMulExtended %resulttype %invalue0 %invalue1 + %outvalue0 = OpCompositeExtract %uint %outvalue 0 + %out_ptr0 = OpAccessChain %ptr_uint %output0 %uint_0 %index + OpStore %out_ptr0 %outvalue0 + %outvalue1 = OpCompositeExtract %uint %outvalue 1 + %out_ptr1 = OpAccessChain %ptr_uint %output1 %uint_0 %index + OpStore %out_ptr1 %outvalue1 + + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/undefined-constant-composite.asm.comp b/shaders-opencl/asm/comp/undefined-constant-composite.asm.comp new file mode 100644 index 000000000..9de0501fe --- /dev/null +++ b/shaders-opencl/asm/comp/undefined-constant-composite.asm.comp @@ -0,0 +1,102 @@ +; +; The shader below is based on the following GLSL shader: +; +; #version 450 +; +; struct Pair { +; int first; +; int second; +; }; +; +; const Pair constant_pair = { 100, 200 }; +; +; layout(set=0, binding=0, std430) buffer InputBlock { +; int array[10]; +; } inputValues; +; +; layout(set=0, binding=1, std430) buffer OutputBlock { +; int array[10]; +; } outputValues; +; +; int add_second (int value, Pair pair) { +; return value + pair.second; +; } +; +; void main() { +; uint idx = gl_GlobalInvocationID.x; +; outputValues.array[idx] = add_second(inputValues.array[idx], constant_pair); +; } +; +; However, the first element of constant_pair has been modified to be undefined. +; + OpCapability Shader + %std450 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %_arr_int_uint_10 ArrayStride 4 + OpMemberDecorate %OutputBlock 0 Offset 0 + OpDecorate %OutputBlock BufferBlock + OpDecorate %outputValues DescriptorSet 0 + OpDecorate %outputValues Binding 1 + OpMemberDecorate %InputBlock 0 Offset 0 + OpDecorate %InputBlock BufferBlock + OpDecorate %inputValues DescriptorSet 0 + OpDecorate %inputValues Binding 0 + %void = OpTypeVoid + %void_func = OpTypeFunction %void + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_200 = OpConstant %int 200 + %uint_0 = OpConstant %uint 0 + %uint_10 = OpConstant %uint 10 + %_ptr_Function_int = OpTypePointer Function %int + %Pair = OpTypeStruct %int %int + %_ptr_Function_Pair = OpTypePointer Function %Pair + %add_second_func_type = OpTypeFunction %int %_ptr_Function_int %_ptr_Function_Pair + %_ptr_Function_uint = OpTypePointer Function %uint + %_ptr_Input_v3uint = OpTypePointer Input %v3uint + %_ptr_Input_uint = OpTypePointer Input %uint + %_arr_int_uint_10 = OpTypeArray %int %uint_10 + %OutputBlock = OpTypeStruct %_arr_int_uint_10 +%_ptr_Uniform_OutputBlock = OpTypePointer Uniform %OutputBlock + %outputValues = OpVariable %_ptr_Uniform_OutputBlock Uniform + %InputBlock = OpTypeStruct %_arr_int_uint_10 + %_ptr_Uniform_InputBlock = OpTypePointer Uniform %InputBlock + %inputValues = OpVariable %_ptr_Uniform_InputBlock Uniform + ; Replaced %int_100 with an undefined int. + %undef_int = OpUndef %int + ; Composed a constant Pair with the undefined int in the first member. + %const_Pair = OpConstantComposite %Pair %undef_int %int_200 + %_ptr_Uniform_int = OpTypePointer Uniform %int + %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %main = OpFunction %void None %void_func + %main_label = OpLabel + %param_1 = OpVariable %_ptr_Function_int Function + %param_2 = OpVariable %_ptr_Function_Pair Function + %gidx_ptr = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %gidx = OpLoad %uint %gidx_ptr + %input_value_ptr = OpAccessChain %_ptr_Uniform_int %inputValues %int_0 %gidx + %input_value = OpLoad %int %input_value_ptr + OpStore %param_1 %input_value + OpStore %param_2 %const_Pair + %retval = OpFunctionCall %int %add_second %param_1 %param_2 + %output_value_ptr = OpAccessChain %_ptr_Uniform_int %outputValues %int_0 %gidx + OpStore %output_value_ptr %retval + OpReturn + OpFunctionEnd + %add_second = OpFunction %int None %add_second_func_type + %value_ptr = OpFunctionParameter %_ptr_Function_int + %pair = OpFunctionParameter %_ptr_Function_Pair + %add_second_label = OpLabel + %value = OpLoad %int %value_ptr + ; Access the second struct member, which is defined. + %pair_second_ptr = OpAccessChain %_ptr_Function_int %pair %int_1 + %pair_second = OpLoad %int %pair_second_ptr + %add_result = OpIAdd %int %value %pair_second + OpReturnValue %add_result + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/undefined-spec-constant-composite.asm.comp b/shaders-opencl/asm/comp/undefined-spec-constant-composite.asm.comp new file mode 100644 index 000000000..d89a402bf --- /dev/null +++ b/shaders-opencl/asm/comp/undefined-spec-constant-composite.asm.comp @@ -0,0 +1,122 @@ +; +; The shader below is based on the following GLSL shader: +; +; #version 450 +; +; struct Pair { +; int first; +; int second; +; }; +; +; const Pair constant_pair = { 100, 200 }; +; +; layout (constant_id=0) const int constantFirst = 0; +; +; Pair spec_constant_pair = { constantFirst, 200 }; +; +; layout(set=0, binding=0, std430) buffer InputBlock { +; int array[10]; +; } inputValues; +; +; layout(set=0, binding=1, std430) buffer OutputBlock { +; int array[10]; +; } outputValues; +; +; int add_first_and_second (int value, Pair p1, Pair p2) { +; return value + p1.first + p2.second; +; } +; +; void main() { +; uint idx = gl_GlobalInvocationID.x; +; outputValues.array[idx] = add_first_and_second(inputValues.array[idx], spec_constant_pair, constant_pair); +; } +; +; However, both the constant_pair and the spec_constant_pair have one of their members replaced by undefined values. +; + OpCapability Shader + %std450 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %_arr_int_uint_10 ArrayStride 4 + OpMemberDecorate %OutputBlock 0 Offset 0 + OpDecorate %OutputBlock BufferBlock + OpDecorate %outputValues DescriptorSet 0 + OpDecorate %outputValues Binding 1 + OpMemberDecorate %InputBlock 0 Offset 0 + OpDecorate %InputBlock BufferBlock + OpDecorate %inputValues DescriptorSet 0 + OpDecorate %inputValues Binding 0 + OpDecorate %spec_constant SpecId 0 + %void = OpTypeVoid + %void_func = OpTypeFunction %void + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %int_200 = OpConstant %int 200 + %uint_0 = OpConstant %uint 0 + %uint_10 = OpConstant %uint 10 + %_ptr_Function_int = OpTypePointer Function %int + %Pair = OpTypeStruct %int %int + %_ptr_Function_Pair = OpTypePointer Function %Pair +%add_pair_members_func_type = OpTypeFunction %int %_ptr_Function_int %_ptr_Function_Pair %_ptr_Function_Pair + %_ptr_Function_uint = OpTypePointer Function %uint + %_ptr_Input_v3uint = OpTypePointer Input %v3uint + %_ptr_Input_uint = OpTypePointer Input %uint + %_arr_int_uint_10 = OpTypeArray %int %uint_10 + %OutputBlock = OpTypeStruct %_arr_int_uint_10 + %_ptr_Uniform_OutputBlock = OpTypePointer Uniform %OutputBlock + %outputValues = OpVariable %_ptr_Uniform_OutputBlock Uniform + %InputBlock = OpTypeStruct %_arr_int_uint_10 + %_ptr_Uniform_InputBlock = OpTypePointer Uniform %InputBlock + %inputValues = OpVariable %_ptr_Uniform_InputBlock Uniform + ; Replaced %int_100 with an undefined int. + %undef_int = OpUndef %int + ; Composed a spec constant Pair with an undefined int in the second member. + %spec_constant = OpSpecConstant %int 0 + %spec_const_Pair = OpSpecConstantComposite %Pair %spec_constant %undef_int + ; Composed a constant Pair with the undefined int in the first member. + %const_Pair = OpConstantComposite %Pair %undef_int %int_200 + %_ptr_Uniform_int = OpTypePointer Uniform %int + %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %main = OpFunction %void None %void_func + %main_label = OpLabel + %param_1 = OpVariable %_ptr_Function_int Function + %param_2 = OpVariable %_ptr_Function_Pair Function + %param_3 = OpVariable %_ptr_Function_Pair Function + %gidx_ptr = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %gidx = OpLoad %uint %gidx_ptr + %input_value_ptr = OpAccessChain %_ptr_Uniform_int %inputValues %int_0 %gidx + %input_value = OpLoad %int %input_value_ptr + OpStore %param_1 %input_value + OpStore %param_2 %spec_const_Pair + OpStore %param_3 %const_Pair + ; Pass the input value as the first argument. + ; Pass the specialization constant Pair as the second argument. + ; Pass the constant Pair as the third argument. + %retval = OpFunctionCall %int %add_pair_members %param_1 %param_2 %param_3 + %output_value_ptr = OpAccessChain %_ptr_Uniform_int %outputValues %int_0 %gidx + OpStore %output_value_ptr %retval + OpReturn + OpFunctionEnd + %add_pair_members = OpFunction %int None %add_pair_members_func_type + %value_ptr = OpFunctionParameter %_ptr_Function_int + %pair_1 = OpFunctionParameter %_ptr_Function_Pair + %pair_2 = OpFunctionParameter %_ptr_Function_Pair + %add_pair_members_label = OpLabel + %value = OpLoad %int %value_ptr + ; Access the first struct member from the first pair. + ; Access the second struct member from the second pair. + ; Both should be defined according to the function call above. + %pair_1_first_ptr = OpAccessChain %_ptr_Function_int %pair_1 %int_0 + %pair_2_second_ptr = OpAccessChain %_ptr_Function_int %pair_2 %int_1 + %pair_1_first = OpLoad %int %pair_1_first_ptr + %pair_2_second = OpLoad %int %pair_2_second_ptr + %partial_result = OpIAdd %int %value %pair_1_first + %final_result = OpIAdd %int %partial_result %pair_2_second + OpReturnValue %final_result + OpFunctionEnd + diff --git a/shaders-opencl/asm/comp/variable-pointers-2.asm.comp b/shaders-opencl/asm/comp/variable-pointers-2.asm.comp new file mode 100644 index 000000000..308162f0b --- /dev/null +++ b/shaders-opencl/asm/comp/variable-pointers-2.asm.comp @@ -0,0 +1,117 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 65 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointers + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID %gl_LocalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %foo "foo" + OpMemberName %foo 0 "a" + OpMemberName %foo 1 "b" + OpMemberName %foo 2 "c" + OpName %bar "bar" + OpMemberName %bar 0 "d" + OpName %buf "buf" + OpName %cb "cb" + OpName %select_buffer "select_buffer" + OpName %select_input "select_input" + OpName %a "a" + OpMemberDecorate %foo 0 Offset 0 + OpMemberDecorate %foo 1 Offset 512 + OpMemberDecorate %foo 2 Offset 520 + OpMemberDecorate %bar 0 Offset 0 + OpDecorate %foo Block + OpDecorate %bar Block + OpDecorate %buf DescriptorSet 0 + OpDecorate %buf Binding 0 + OpDecorate %cb DescriptorSet 0 + OpDecorate %cb Binding 1 + OpDecorate %_ptr_StorageBuffer_int ArrayStride 4 + OpDecorate %_arr_int_uint_128 ArrayStride 4 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId + %void = OpTypeVoid + %15 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%gl_LocalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_128 = OpConstant %uint 128 +%_arr_int_uint_128 = OpTypeArray %int %uint_128 + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %foo = OpTypeStruct %_arr_int_uint_128 %uint %v2float +%_ptr_StorageBuffer_foo = OpTypePointer StorageBuffer %foo + %buf = OpVariable %_ptr_StorageBuffer_foo StorageBuffer + %bar = OpTypeStruct %int +%_ptr_Uniform_bar = OpTypePointer Uniform %bar + %cb = OpVariable %_ptr_Uniform_bar Uniform + %uint_0 = OpConstant %uint 0 + %bool = OpTypeBool +%_ptr_Uniform_int = OpTypePointer Uniform %int + %28 = OpTypeFunction %_ptr_StorageBuffer_foo %_ptr_StorageBuffer_foo + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %31 = OpConstantNull %_ptr_StorageBuffer_foo + %32 = OpTypeFunction %_ptr_Input_v3uint +%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int +%_ptr_Function__ptr_StorageBuffer_foo = OpTypePointer Function %_ptr_StorageBuffer_foo +%select_buffer = OpFunction %_ptr_StorageBuffer_foo None %28 + %a = OpFunctionParameter %_ptr_StorageBuffer_foo + %33 = OpLabel + %34 = OpAccessChain %_ptr_Uniform_int %cb %uint_0 + %35 = OpLoad %int %34 + %36 = OpINotEqual %bool %35 %int_0 + %37 = OpSelect %_ptr_StorageBuffer_foo %36 %a %31 + OpReturnValue %37 + OpFunctionEnd +%select_input = OpFunction %_ptr_Input_v3uint None %32 + %38 = OpLabel + %39 = OpAccessChain %_ptr_Uniform_int %cb %uint_0 + %40 = OpLoad %int %39 + %41 = OpINotEqual %bool %40 %int_0 + %42 = OpSelect %_ptr_Input_v3uint %41 %gl_GlobalInvocationID %gl_LocalInvocationID + OpReturnValue %42 + OpFunctionEnd + %main = OpFunction %void None %15 + %43 = OpLabel + %65 = OpVariable %_ptr_Function__ptr_StorageBuffer_foo Function + %44 = OpFunctionCall %_ptr_StorageBuffer_foo %select_buffer %buf + OpStore %65 %44 + %45 = OpFunctionCall %_ptr_Input_v3uint %select_input + %66 = OpLoad %_ptr_StorageBuffer_foo %65 + %46 = OpAccessChain %_ptr_StorageBuffer_int %66 %uint_0 %uint_0 + %47 = OpAccessChain %_ptr_StorageBuffer_int %buf %uint_0 %uint_0 + OpBranch %48 + %48 = OpLabel + %49 = OpPhi %_ptr_StorageBuffer_int %46 %43 %50 %51 + %52 = OpPhi %_ptr_StorageBuffer_int %47 %43 %53 %51 + %54 = OpLoad %int %49 + %55 = OpLoad %int %52 + %56 = OpINotEqual %bool %54 %55 + OpLoopMerge %58 %51 None + OpBranchConditional %56 %57 %58 + %57 = OpLabel + %59 = OpIAdd %int %54 %55 + %60 = OpLoad %v3uint %45 + %61 = OpCompositeExtract %uint %60 0 + %62 = OpBitcast %int %61 + %63 = OpIAdd %int %59 %62 + OpStore %49 %63 + OpStore %52 %63 + OpBranch %51 + %51 = OpLabel + %50 = OpPtrAccessChain %_ptr_StorageBuffer_int %49 %uint_1 + %53 = OpPtrAccessChain %_ptr_StorageBuffer_int %52 %uint_1 + OpBranch %48 + %58 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp b/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp new file mode 100644 index 000000000..3dcb04f02 --- /dev/null +++ b/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp @@ -0,0 +1,75 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 40 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointers + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %foo "foo" + OpMemberName %foo 0 "a" + OpName %bar "bar" + OpMemberName %bar 0 "b" + OpName %x "x" + OpName %y "y" + OpName %a "a" + OpName %b "b" + OpMemberDecorate %foo 0 Offset 0 + OpMemberDecorate %bar 0 Offset 0 + OpDecorate %foo Block + OpDecorate %bar Block + OpDecorate %x DescriptorSet 0 + OpDecorate %x Binding 0 + OpDecorate %y DescriptorSet 0 + OpDecorate %y Binding 1 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + %void = OpTypeVoid + %11 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %foo = OpTypeStruct %int +%_ptr_StorageBuffer_foo = OpTypePointer StorageBuffer %foo + %x = OpVariable %_ptr_StorageBuffer_foo StorageBuffer + %bar = OpTypeStruct %int +%_ptr_StorageBuffer_bar = OpTypePointer StorageBuffer %bar + %y = OpVariable %_ptr_StorageBuffer_bar StorageBuffer + %uint_0 = OpConstant %uint 0 + %int_0 = OpConstant %int 0 + %bool = OpTypeBool +%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int + %22 = OpTypeFunction %_ptr_StorageBuffer_int %_ptr_StorageBuffer_foo %_ptr_StorageBuffer_bar +%_ptr_Function__ptr_StorageBuffer_int = OpTypePointer Function %_ptr_StorageBuffer_int + %24 = OpFunction %_ptr_StorageBuffer_int None %22 + %a = OpFunctionParameter %_ptr_StorageBuffer_foo + %b = OpFunctionParameter %_ptr_StorageBuffer_bar + %25 = OpLabel + %26 = OpLoad %v3uint %gl_GlobalInvocationID + %27 = OpCompositeExtract %uint %26 0 + %28 = OpINotEqual %bool %27 %uint_0 + %29 = OpAccessChain %_ptr_StorageBuffer_int %a %uint_0 + %30 = OpAccessChain %_ptr_StorageBuffer_int %b %uint_0 + %31 = OpSelect %_ptr_StorageBuffer_int %28 %29 %30 + OpReturnValue %31 + OpFunctionEnd + %main = OpFunction %void None %11 + %32 = OpLabel + %33 = OpVariable %_ptr_Function__ptr_StorageBuffer_int Function + %34 = OpFunctionCall %_ptr_StorageBuffer_int %24 %x %y + OpStore %33 %34 + %35 = OpLoad %_ptr_StorageBuffer_int %33 + %36 = OpAccessChain %_ptr_StorageBuffer_int %x %uint_0 + %37 = OpLoad %int %36 + OpStore %35 %int_0 + %38 = OpIAdd %int %37 %37 + %39 = OpAccessChain %_ptr_StorageBuffer_int %y %uint_0 + OpStore %39 %38 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp b/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp new file mode 100644 index 000000000..c01432b5d --- /dev/null +++ b/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp @@ -0,0 +1,147 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 6 +; Bound: 90 +; Schema: 0 + OpCapability Shader + OpCapability ImageQuery + OpCapability StorageImageWriteWithoutFormat + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_LocalInvocationID + OpExecutionMode %main LocalSize 16 16 1 + OpSource GLSL 450 + OpName %main "main" + OpName %get_texcoord_vi2_vi2_ "get_texcoord(vi2;vi2;" + OpName %base "base" + OpName %index "index" + OpName %gl_LocalInvocationID "gl_LocalInvocationID" + OpName %r0 "r0" + OpName %u0 "u0" + OpName %i "i" + OpName %j "j" + OpName %param "param" + OpName %param_0 "param" + OpName %cb1_struct "cb1_struct" + OpMemberName %cb1_struct 0 "_m0" + OpName %cb0_1 "cb0_1" + OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId + OpDecorate %u0 DescriptorSet 0 + OpDecorate %u0 Binding 1 + OpDecorate %u0 NonReadable + OpDecorate %_arr_v4float_uint_1 ArrayStride 16 + OpMemberDecorate %cb1_struct 0 Offset 0 + OpDecorate %cb1_struct Block + OpDecorate %cb0_1 DescriptorSet 0 + OpDecorate %cb0_1 Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %v2int = OpTypeVector %int 2 +%_ptr_Function_v2int = OpTypePointer Function %v2int + %9 = OpTypeFunction %v2int %_ptr_Function_v2int %_ptr_Function_v2int + %v3int = OpTypeVector %int 3 +%_ptr_Input_v3int = OpTypePointer Input %v3int +%gl_LocalInvocationID = OpVariable %_ptr_Input_v3int Input + %uint = OpTypeInt 32 0 + %v2uint = OpTypeVector %uint 2 + %float = OpTypeFloat 32 + %30 = OpTypeImage %float 2D 0 0 0 2 Unknown +%_ptr_UniformConstant_30 = OpTypePointer UniformConstant %30 + %u0 = OpVariable %_ptr_UniformConstant_30 UniformConstant + %uint_4 = OpConstant %uint 4 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %bool = OpTypeBool + %uint_0 = OpConstant %uint 0 + %v4float = OpTypeVector %float 4 +%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1 + %cb1_struct = OpTypeStruct %_arr_v4float_uint_1 +%_ptr_Uniform_cb1_struct = OpTypePointer Uniform %cb1_struct + %cb0_1 = OpVariable %_ptr_Uniform_cb1_struct Uniform +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_1 = OpConstant %int 1 + %uint_16 = OpConstant %uint 16 + %v3uint = OpTypeVector %uint 3 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_16 %uint_16 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %r0 = OpVariable %_ptr_Function_v2int Function + %i = OpVariable %_ptr_Function_int Function + %j = OpVariable %_ptr_Function_int Function + %param = OpVariable %_ptr_Function_v2int Function + %param_0 = OpVariable %_ptr_Function_v2int Function + %33 = OpLoad %30 %u0 + %34 = OpImageQuerySize %v2int %33 + %36 = OpCompositeConstruct %v2uint %uint_4 %uint_4 + %37 = OpShiftRightArithmetic %v2int %34 %36 + %38 = OpCompositeExtract %int %37 0 + %39 = OpCompositeExtract %int %37 1 + %40 = OpCompositeConstruct %v2int %38 %39 + OpStore %r0 %40 + OpStore %i %int_0 + OpBranch %44 + %44 = OpLabel + OpLoopMerge %46 %47 None + OpBranch %48 + %48 = OpLabel + %49 = OpLoad %int %i + %51 = OpAccessChain %_ptr_Function_int %r0 %uint_1 + %52 = OpLoad %int %51 + %54 = OpSLessThan %bool %49 %52 + OpBranchConditional %54 %45 %46 + %45 = OpLabel + OpStore %j %int_0 + OpBranch %56 + %56 = OpLabel + OpLoopMerge %58 %59 None + OpBranch %60 + %60 = OpLabel + %61 = OpLoad %int %j + %63 = OpAccessChain %_ptr_Function_int %r0 %uint_0 + %64 = OpLoad %int %63 + %65 = OpSLessThan %bool %61 %64 + OpBranchConditional %65 %57 %58 + %57 = OpLabel + %66 = OpLoad %30 %u0 + %67 = OpLoad %int %i + %68 = OpLoad %int %j + %69 = OpCompositeConstruct %v2int %67 %68 + %71 = OpLoad %v2int %r0 + OpStore %param %71 + OpStore %param_0 %69 + %73 = OpFunctionCall %v2int %get_texcoord_vi2_vi2_ %param %param_0 + %80 = OpAccessChain %_ptr_Uniform_v4float %cb0_1 %int_0 %int_0 + %81 = OpLoad %v4float %80 + %82 = OpVectorShuffle %v4float %81 %81 0 0 0 0 + OpImageWrite %66 %73 %82 + OpBranch %59 + %59 = OpLabel + %83 = OpLoad %int %j + %85 = OpIAdd %int %83 %int_1 + OpStore %j %85 + OpBranch %56 + %58 = OpLabel + OpBranch %47 + %47 = OpLabel + %86 = OpLoad %int %i + %87 = OpIAdd %int %86 %int_1 + OpStore %i %87 + OpBranch %44 + %46 = OpLabel + OpReturn + OpFunctionEnd +%get_texcoord_vi2_vi2_ = OpFunction %v2int None %9 + %base = OpFunctionParameter %_ptr_Function_v2int + %index = OpFunctionParameter %_ptr_Function_v2int + %13 = OpLabel + %14 = OpLoad %v2int %base + %20 = OpLoad %v3int %gl_LocalInvocationID + %21 = OpVectorShuffle %v2int %20 %20 0 1 + %23 = OpIMul %v2int %14 %21 + %24 = OpLoad %v2int %index + %25 = OpIAdd %v2int %23 %24 + OpReturnValue %25 + OpFunctionEnd diff --git a/shaders-opencl/asm/comp/vector-builtin-type-cast.asm.comp b/shaders-opencl/asm/comp/vector-builtin-type-cast.asm.comp new file mode 100644 index 000000000..e79354026 --- /dev/null +++ b/shaders-opencl/asm/comp/vector-builtin-type-cast.asm.comp @@ -0,0 +1,128 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 6 +; Bound: 78 +; Schema: 0 + OpCapability Shader + OpCapability ImageQuery + OpCapability StorageImageWriteWithoutFormat + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_LocalInvocationID + OpExecutionMode %main LocalSize 16 16 1 + OpSource GLSL 450 + OpName %main "main" + OpName %r0 "r0" + OpName %u0 "u0" + OpName %i "i" + OpName %j "j" + OpName %gl_LocalInvocationID "gl_LocalInvocationID" + OpName %cb1_struct "cb1_struct" + OpMemberName %cb1_struct 0 "_m0" + OpName %cb0_1 "cb0_1" + OpDecorate %u0 DescriptorSet 0 + OpDecorate %u0 Binding 1 + OpDecorate %u0 NonReadable + OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId + OpDecorate %_arr_v4float_uint_1 ArrayStride 16 + OpMemberDecorate %cb1_struct 0 Offset 0 + OpDecorate %cb1_struct Block + OpDecorate %cb0_1 DescriptorSet 0 + OpDecorate %cb0_1 Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %v2int = OpTypeVector %int 2 +%_ptr_Function_v2int = OpTypePointer Function %v2int + %float = OpTypeFloat 32 + %11 = OpTypeImage %float 2D 0 0 0 2 Unknown +%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11 + %u0 = OpVariable %_ptr_UniformConstant_11 UniformConstant + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 + %v2uint = OpTypeVector %uint 2 +%_ptr_Function_int = OpTypePointer Function %int + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %bool = OpTypeBool + %uint_0 = OpConstant %uint 0 + %v3int = OpTypeVector %int 3 +%_ptr_Input_v3int = OpTypePointer Input %v3int +%gl_LocalInvocationID = OpVariable %_ptr_Input_v3int Input + %v4float = OpTypeVector %float 4 +%_arr_v4float_uint_1 = OpTypeArray %v4float %uint_1 + %cb1_struct = OpTypeStruct %_arr_v4float_uint_1 +%_ptr_Uniform_cb1_struct = OpTypePointer Uniform %cb1_struct + %cb0_1 = OpVariable %_ptr_Uniform_cb1_struct Uniform +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %int_1 = OpConstant %int 1 + %uint_16 = OpConstant %uint 16 + %v3uint = OpTypeVector %uint 3 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_16 %uint_16 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %r0 = OpVariable %_ptr_Function_v2int Function + %i = OpVariable %_ptr_Function_int Function + %j = OpVariable %_ptr_Function_int Function + %14 = OpLoad %11 %u0 + %15 = OpImageQuerySize %v2int %14 + %19 = OpCompositeConstruct %v2uint %uint_4 %uint_4 + %20 = OpShiftRightArithmetic %v2int %15 %19 + %21 = OpCompositeExtract %int %20 0 + %22 = OpCompositeExtract %int %20 1 + %23 = OpCompositeConstruct %v2int %21 %22 + OpStore %r0 %23 + OpStore %i %int_0 + OpBranch %27 + %27 = OpLabel + OpLoopMerge %29 %30 None + OpBranch %31 + %31 = OpLabel + %32 = OpLoad %int %i + %34 = OpAccessChain %_ptr_Function_int %r0 %uint_1 + %35 = OpLoad %int %34 + %37 = OpSLessThan %bool %32 %35 + OpBranchConditional %37 %28 %29 + %28 = OpLabel + OpStore %j %int_0 + OpBranch %39 + %39 = OpLabel + OpLoopMerge %41 %42 None + OpBranch %43 + %43 = OpLabel + %44 = OpLoad %int %j + %46 = OpAccessChain %_ptr_Function_int %r0 %uint_0 + %47 = OpLoad %int %46 + %48 = OpSLessThan %bool %44 %47 + OpBranchConditional %48 %40 %41 + %40 = OpLabel + %49 = OpLoad %11 %u0 + %50 = OpLoad %v2int %r0 + %54 = OpLoad %v3int %gl_LocalInvocationID + %55 = OpVectorShuffle %v2int %54 %54 0 1 + %57 = OpIMul %v2int %50 %55 + %58 = OpLoad %int %i + %59 = OpLoad %int %j + %60 = OpCompositeConstruct %v2int %58 %59 + %61 = OpIAdd %v2int %57 %60 + %68 = OpAccessChain %_ptr_Uniform_v4float %cb0_1 %int_0 %int_0 + %69 = OpLoad %v4float %68 + %70 = OpVectorShuffle %v4float %69 %69 0 0 0 0 + OpImageWrite %49 %61 %70 + OpBranch %42 + %42 = OpLabel + %71 = OpLoad %int %j + %73 = OpIAdd %int %71 %int_1 + OpStore %j %73 + OpBranch %39 + %41 = OpLabel + OpBranch %30 + %30 = OpLabel + %74 = OpLoad %int %i + %75 = OpIAdd %int %74 %int_1 + OpStore %i %75 + OpBranch %27 + %29 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-opencl/comp/access-private-workgroup-in-function.comp b/shaders-opencl/comp/access-private-workgroup-in-function.comp new file mode 100644 index 000000000..7cb1e6f13 --- /dev/null +++ b/shaders-opencl/comp/access-private-workgroup-in-function.comp @@ -0,0 +1,31 @@ +#version 450 +layout(local_size_x = 1) in; + +int f; +shared int u; + +void set_f() +{ + f = 40; +} + +void set_shared_u() +{ + u = 50; +} + +void main() +{ + set_f(); + set_shared_u(); + if (gl_LocalInvocationIndex == 0u) + { + f = 10; + } + else + { + f = 30; + u = 20; + } +} + diff --git a/shaders-opencl/comp/arguments.comp b/shaders-opencl/comp/arguments.comp new file mode 100644 index 000000000..dd154edd3 --- /dev/null +++ b/shaders-opencl/comp/arguments.comp @@ -0,0 +1,13 @@ + #version 450 + layout(local_size_x = 64) in; + layout(set = 0, binding = 0) buffer Buf { uint data[]; }; +layout(push_constant) uniform parameter +{ + uint n; +} p; + + + void main() { + uint i = gl_GlobalInvocationID.x; + data[i] = i + p.n; + } diff --git a/shaders-opencl/comp/atomic.comp b/shaders-opencl/comp/atomic.comp new file mode 100644 index 000000000..e25c4f6d2 --- /dev/null +++ b/shaders-opencl/comp/atomic.comp @@ -0,0 +1,56 @@ +#version 310 es +#extension GL_OES_shader_image_atomic : require +layout(local_size_x = 1) in; + +layout(r32ui, binding = 0) uniform highp uimage2D uImage; +layout(r32i, binding = 1) uniform highp iimage2D iImage; +layout(binding = 2, std430) buffer SSBO +{ + uint u32; + int i32; +} ssbo; + +shared uint shared_u32; +shared int shared_i32; + +void main() +{ + atomicAdd(ssbo.u32, 1u); + atomicOr(ssbo.u32, 1u); + atomicXor(ssbo.u32, 1u); + atomicAnd(ssbo.u32, 1u); + atomicMin(ssbo.u32, 1u); + atomicMax(ssbo.u32, 1u); + atomicExchange(ssbo.u32, 1u); + atomicCompSwap(ssbo.u32, 10u, 2u); + + atomicAdd(ssbo.i32, 1); + atomicOr(ssbo.i32, 1); + atomicXor(ssbo.i32, 1); + atomicAnd(ssbo.i32, 1); + atomicMin(ssbo.i32, 1); + atomicMax(ssbo.i32, 1); + atomicExchange(ssbo.i32, 1); + atomicCompSwap(ssbo.i32, 10, 2); + + shared_u32 = 10u; + shared_i32 = 10; + atomicAdd(shared_u32, 1u); + atomicOr(shared_u32, 1u); + atomicXor(shared_u32, 1u); + atomicAnd(shared_u32, 1u); + atomicMin(shared_u32, 1u); + atomicMax(shared_u32, 1u); + atomicExchange(shared_u32, 1u); + atomicCompSwap(shared_u32, 10u, 2u); + + atomicAdd(shared_i32, 1); + atomicOr(shared_i32, 1); + atomicXor(shared_i32, 1); + atomicAnd(shared_i32, 1); + atomicMin(shared_i32, 1); + atomicMax(shared_i32, 1); + atomicExchange(shared_i32, 1); + atomicCompSwap(shared_i32, 10, 2); +} + diff --git a/shaders-opencl/comp/barriers.comp b/shaders-opencl/comp/barriers.comp new file mode 100644 index 000000000..7e0ea42d4 --- /dev/null +++ b/shaders-opencl/comp/barriers.comp @@ -0,0 +1,79 @@ +#version 310 es +layout(local_size_x = 4) in; + +void barrier_shared() +{ + memoryBarrierShared(); +} + +void full_barrier() +{ + memoryBarrier(); +} + +void image_barrier() +{ + memoryBarrierImage(); +} + +void buffer_barrier() +{ + memoryBarrierBuffer(); +} + +void group_barrier() +{ + groupMemoryBarrier(); +} + +void barrier_shared_exec() +{ + memoryBarrierShared(); + barrier(); +} + +void full_barrier_exec() +{ + memoryBarrier(); + barrier(); +} + +void image_barrier_exec() +{ + memoryBarrierImage(); + barrier(); +} + +void buffer_barrier_exec() +{ + memoryBarrierBuffer(); + barrier(); +} + +void group_barrier_exec() +{ + groupMemoryBarrier(); + barrier(); +} + +void exec_barrier() +{ + barrier(); +} + +void main() +{ + barrier_shared(); + full_barrier(); + image_barrier(); + buffer_barrier(); + group_barrier(); + + barrier_shared_exec(); + full_barrier_exec(); + image_barrier_exec(); + buffer_barrier_exec(); + group_barrier_exec(); + + exec_barrier(); +} diff --git a/shaders-opencl/comp/basic.comp b/shaders-opencl/comp/basic.comp new file mode 100644 index 000000000..f9bf55670 --- /dev/null +++ b/shaders-opencl/comp/basic.comp @@ -0,0 +1,28 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +layout(std430, binding = 2) buffer SSBO3 +{ + uint counter; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idata = in_data[ident]; + if (dot(idata, vec4(1.0, 5.0, 6.0, 2.0)) > 8.2) + { + out_data[atomicAdd(counter, 1u)] = idata; + } +} + diff --git a/shaders-opencl/comp/basic.dispatchbase.comp b/shaders-opencl/comp/basic.dispatchbase.comp new file mode 100644 index 000000000..2c873468c --- /dev/null +++ b/shaders-opencl/comp/basic.dispatchbase.comp @@ -0,0 +1,29 @@ +#version 310 es +layout(local_size_x_id = 10) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +layout(std430, binding = 2) buffer SSBO3 +{ + uint counter; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + uint workgroup = gl_WorkGroupID.x; + vec4 idata = in_data[ident]; + if (dot(idata, vec4(1.0, 5.0, 6.0, 2.0)) > 8.2) + { + out_data[atomicAdd(counter, 1u)] = idata; + } +} + diff --git a/shaders-opencl/comp/buffer-push-const.comp b/shaders-opencl/comp/buffer-push-const.comp new file mode 100644 index 000000000..d3f102e46 --- /dev/null +++ b/shaders-opencl/comp/buffer-push-const.comp @@ -0,0 +1,9 @@ +#version 450 +layout(local_size_x = 64) in; +layout(set = 0, binding = 0) buffer Buf { uint data[]; }; +layout(push_constant) uniform parameter { uint n; } p; + +void main() { + uint i = gl_GlobalInvocationID.x; + data[i] = i + p.n; +} diff --git a/shaders-opencl/comp/builtins.comp b/shaders-opencl/comp/builtins.comp new file mode 100644 index 000000000..88bb5951e --- /dev/null +++ b/shaders-opencl/comp/builtins.comp @@ -0,0 +1,12 @@ +#version 310 es +layout(local_size_x = 8, local_size_y = 4, local_size_z = 2) in; + +void main() +{ + uvec3 local_id = gl_LocalInvocationID; + uvec3 global_id = gl_GlobalInvocationID; + uint local_index = gl_LocalInvocationIndex; + uvec3 work_group_size = gl_WorkGroupSize; + uvec3 num_work_groups = gl_NumWorkGroups; + uvec3 work_group_id = gl_WorkGroupID; +} diff --git a/shaders-opencl/comp/cfg-preserve-parameter.comp b/shaders-opencl/comp/cfg-preserve-parameter.comp new file mode 100644 index 000000000..9ef909200 --- /dev/null +++ b/shaders-opencl/comp/cfg-preserve-parameter.comp @@ -0,0 +1,54 @@ +#version 310 es + +// We write in all paths (and no reads), so should just be out. +void out_test_0(int cond, inout int i) +{ + if (cond == 0) + i = 40; + else + i = 60; +} + +// We write in all paths (and no reads), so should just be out. +void out_test_1(int cond, inout int i) +{ + switch (cond) + { + case 40: + i = 40; + break; + + default: + i = 70; + break; + } +} + +// We don't write in all paths, so should be inout. +void inout_test_0(int cond, inout int i) +{ + if (cond == 0) + i = 40; +} + +void inout_test_1(int cond, inout int i) +{ + switch (cond) + { + case 40: + i = 40; + break; + } +} + + +void main() +{ + int cond = 40; + int i = 50; + + out_test_0(cond, i); + out_test_1(cond, i); + inout_test_0(cond, i); + inout_test_1(cond, i); +} diff --git a/shaders-opencl/comp/complex-type-alias.comp b/shaders-opencl/comp/complex-type-alias.comp new file mode 100644 index 000000000..4b9b6eddb --- /dev/null +++ b/shaders-opencl/comp/complex-type-alias.comp @@ -0,0 +1,41 @@ +#version 450 +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +struct Foo0 +{ + float a; +}; + +struct Foo1 +{ + Foo0 a; +}; + +void Zero(out Foo0 v) +{ + v.a = 0.0; +} + +struct Foo2 +{ + Foo1 a; + float weight; +}; + +layout(std430, binding = 0) buffer SSBO +{ + Foo2 outputs[]; +}; + +shared Foo2 coeffs[64]; + +void main() +{ + Foo2 data; + data.weight = 0.0; + Zero(data.a.a); + coeffs[gl_LocalInvocationIndex] = data; + barrier(); + if (gl_LocalInvocationIndex == 0u) + outputs[gl_WorkGroupID.x] = coeffs[0]; +} diff --git a/shaders-opencl/comp/composite-construct.comp b/shaders-opencl/comp/composite-construct.comp new file mode 100644 index 000000000..305477532 --- /dev/null +++ b/shaders-opencl/comp/composite-construct.comp @@ -0,0 +1,31 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO0 +{ + vec4 as[]; +}; + +layout(std430, binding = 1) buffer SSBO1 +{ + vec4 bs[]; +}; + +struct Composite +{ + vec4 a; + vec4 b; +}; + +const vec4 const_values[2] = vec4[](vec4(20.0), vec4(40.0)); + +void main() +{ + vec4 values[2] = vec4[](as[gl_GlobalInvocationID.x], bs[gl_GlobalInvocationID.x]); + vec4 copy_values[2]; + copy_values = const_values; + Composite c = Composite(values[0], copy_values[1]); + + as[0] = values[gl_LocalInvocationIndex]; + bs[1] = c.b; +} diff --git a/shaders-opencl/comp/culling.comp b/shaders-opencl/comp/culling.comp new file mode 100644 index 000000000..9f8331b10 --- /dev/null +++ b/shaders-opencl/comp/culling.comp @@ -0,0 +1,26 @@ +#version 310 es +layout(local_size_x = 4) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + float in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + float out_data[]; +}; + +layout(std430, binding = 2) buffer SSBO3 +{ + uint count; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = in_data[ident]; + if (idata > 12.0) + out_data[atomicAdd(count, 1u)] = idata; +} + diff --git a/shaders-opencl/comp/defer-parens.comp b/shaders-opencl/comp/defer-parens.comp new file mode 100644 index 000000000..4e8ea6b39 --- /dev/null +++ b/shaders-opencl/comp/defer-parens.comp @@ -0,0 +1,30 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + vec4 data; + int index; +}; + +void main() +{ + // Tests defer-parens behavior where a binary expression is OpCompositeExtracted chained together + // with an OpCompositeConstruct optimization. + vec4 d = data; + data = vec4(d.x, d.yz + 10.0, d.w); + + // Verify binary ops. + data = d + d + d; + + // Verify swizzles. + data = (d.yz + 10.0).xxyy; + + // OpCompositeExtract + float t = (d.yz + 10.0).y; + data = vec4(t); + + // OpVectorExtractDynamic + t = (d.zw + 10.0)[index]; + data = vec4(t); +} diff --git a/shaders-opencl/comp/dowhile.comp b/shaders-opencl/comp/dowhile.comp new file mode 100644 index 000000000..709db75a1 --- /dev/null +++ b/shaders-opencl/comp/dowhile.comp @@ -0,0 +1,31 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +int i; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + + i = 0; + vec4 idat = in_data[ident]; + do + { + idat = mvp * idat; + i++; + } while(i < 16); + + out_data[ident] = idat; +} + diff --git a/shaders-opencl/comp/expect-assume.comp b/shaders-opencl/comp/expect-assume.comp new file mode 100644 index 000000000..767019e5b --- /dev/null +++ b/shaders-opencl/comp/expect-assume.comp @@ -0,0 +1,19 @@ +#version 450 +#extension GL_EXT_spirv_intrinsics : require + +layout(local_size_x = 32) in; + +layout(std430, binding = 0) buffer buffer_t { + uint z; +} buf; + +spirv_instruction (extensions = ["SPV_KHR_expect_assume"], capabilities = [5629], id = 5630) +void assume_true(bool condition); + +spirv_instruction (extensions = ["SPV_KHR_expect_assume"], capabilities = [5629], id = 5631) +uint expect(uint value, uint exp_value); + +void main() { + assume_true(gl_WorkGroupID.x < 32); + buf.z = expect(gl_WorkGroupID.z, uint(0)); +} diff --git a/shaders-opencl/comp/force-recompile-hooks.swizzle.comp b/shaders-opencl/comp/force-recompile-hooks.swizzle.comp new file mode 100644 index 000000000..2752d3051 --- /dev/null +++ b/shaders-opencl/comp/force-recompile-hooks.swizzle.comp @@ -0,0 +1,9 @@ +#version 450 + +layout(binding = 0) uniform sampler2D foo; +layout(binding = 1, rgba8) uniform image2D bar; + +void main() { + vec4 a = texture(foo, vec2(1, 1)); + imageStore(bar, ivec2(0, 0), a); +} diff --git a/shaders-opencl/comp/functions.comp b/shaders-opencl/comp/functions.comp new file mode 100644 index 000000000..478c8ebe8 --- /dev/null +++ b/shaders-opencl/comp/functions.comp @@ -0,0 +1,12 @@ +#version 450 +shared int foo[1337]; + +void myfunc() +{ + foo[0]=13; +} + +void main() +{ + myfunc(); +} diff --git a/shaders-opencl/comp/global-invocation-id.comp b/shaders-opencl/comp/global-invocation-id.comp new file mode 100644 index 000000000..404ca36a8 --- /dev/null +++ b/shaders-opencl/comp/global-invocation-id.comp @@ -0,0 +1,9 @@ +#version 450 +layout(set = 0, binding = 0) buffer myBlock { + int a; + float b[1]; +} myStorage; +void main() { + myStorage.a = (myStorage.a + 1) % 256; + myStorage.b[gl_GlobalInvocationID.x] = myStorage.b[gl_GlobalInvocationID.x] + 0.02; +} diff --git a/shaders-opencl/comp/image.comp b/shaders-opencl/comp/image.comp new file mode 100644 index 000000000..e375534a5 --- /dev/null +++ b/shaders-opencl/comp/image.comp @@ -0,0 +1,12 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(rgba8, binding = 0) uniform readonly mediump image2D uImageIn; +layout(rgba8, binding = 1) uniform writeonly mediump image2D uImageOut; + +void main() +{ + vec4 v = imageLoad(uImageIn, ivec2(gl_GlobalInvocationID.xy) + imageSize(uImageIn)); + imageStore(uImageOut, ivec2(gl_GlobalInvocationID.xy), v); +} + diff --git a/shaders-opencl/comp/insert.comp b/shaders-opencl/comp/insert.comp new file mode 100644 index 000000000..07c1f8d7a --- /dev/null +++ b/shaders-opencl/comp/insert.comp @@ -0,0 +1,18 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) writeonly buffer SSBO +{ + vec4 out_data[]; +}; + +void main() +{ + vec4 v; + v.x = 10.0; + v.y = 30.0; + v.z = 70.0; + v.w = 90.0; + out_data[gl_GlobalInvocationID.x] = v; + out_data[gl_GlobalInvocationID.x].y = 20.0; +} diff --git a/shaders-opencl/comp/local-invocation-id.comp b/shaders-opencl/comp/local-invocation-id.comp new file mode 100644 index 000000000..ff2033f66 --- /dev/null +++ b/shaders-opencl/comp/local-invocation-id.comp @@ -0,0 +1,9 @@ +#version 450 +layout(set = 0, binding = 0) buffer myBlock { + int a; + float b[1]; +} myStorage; +void main() { + myStorage.a = (myStorage.a + 1) % 256; + myStorage.b[gl_LocalInvocationID.x] = myStorage.b[gl_LocalInvocationID.x] + 0.02; +} diff --git a/shaders-opencl/comp/local-invocation-index.comp b/shaders-opencl/comp/local-invocation-index.comp new file mode 100644 index 000000000..b661d9002 --- /dev/null +++ b/shaders-opencl/comp/local-invocation-index.comp @@ -0,0 +1,9 @@ +#version 450 +layout(set = 0, binding = 0) buffer myBlock { + int a; + float b[1]; +} myStorage; +void main() { + myStorage.a = (myStorage.a + 1) % 256; + myStorage.b[gl_LocalInvocationIndex.x] = myStorage.b[gl_LocalInvocationIndex.x] + 0.02; +} diff --git a/shaders-opencl/comp/local-size-duplicate-spec-id.comp b/shaders-opencl/comp/local-size-duplicate-spec-id.comp new file mode 100644 index 000000000..060858b97 --- /dev/null +++ b/shaders-opencl/comp/local-size-duplicate-spec-id.comp @@ -0,0 +1,15 @@ +#version 450 + +layout(constant_id=0) const int local_size_x_val = 1; +layout(constant_id=1) const int local_size_y_val = 1; +layout(constant_id=2) const int local_size_z_val = 1; + +layout(local_size_x_id=0, local_size_y_id=1, local_size_z_id=2) in; + +layout(set=0, binding=0) buffer StorageBuffer { + uint values[]; +} ssbo; + +void main() { + ssbo.values[gl_LocalInvocationIndex] = 1u; +} diff --git a/shaders-opencl/comp/mod.comp b/shaders-opencl/comp/mod.comp new file mode 100644 index 000000000..1631456e3 --- /dev/null +++ b/shaders-opencl/comp/mod.comp @@ -0,0 +1,26 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 v = mod(in_data[ident], out_data[ident]); + out_data[ident] = v; + + uvec4 vu = floatBitsToUint(in_data[ident]) % floatBitsToUint(out_data[ident]); + out_data[ident] = uintBitsToFloat(vu); + + ivec4 vi = floatBitsToInt(in_data[ident]) % floatBitsToInt(out_data[ident]); + out_data[ident] = intBitsToFloat(vi); +} + diff --git a/shaders-opencl/comp/modf.comp b/shaders-opencl/comp/modf.comp new file mode 100644 index 000000000..edadefcf0 --- /dev/null +++ b/shaders-opencl/comp/modf.comp @@ -0,0 +1,23 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 i; + //vec4 v = frexp(in_data[ident], i); + //out_data[ident] = ldexp(v, i); + vec4 v = modf(in_data[ident], i); + out_data[ident] = v; +} + diff --git a/shaders-opencl/comp/outer-product.comp b/shaders-opencl/comp/outer-product.comp new file mode 100644 index 000000000..9aba2a54b --- /dev/null +++ b/shaders-opencl/comp/outer-product.comp @@ -0,0 +1,37 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0, std430) writeonly buffer SSBO +{ + mat2 m22; + mat2x3 m23; + mat2x4 m24; + mat3x2 m32; + mat3 m33; + mat3x4 m34; + mat4x2 m42; + mat4x3 m43; + mat4 m44; +}; + +layout(set = 0, binding = 1, std430) readonly buffer ReadSSBO +{ + vec2 v2; + vec3 v3; + vec4 v4; +}; + +void main() +{ + m22 = outerProduct(v2, v2); + m23 = outerProduct(v3, v2); + m24 = outerProduct(v4, v2); + + m32 = outerProduct(v2, v3); + m33 = outerProduct(v3, v3); + m34 = outerProduct(v4, v3); + + m42 = outerProduct(v2, v4); + m43 = outerProduct(v3, v4); + m44 = outerProduct(v4, v4); +} diff --git a/shaders-opencl/comp/packing-test-1.comp b/shaders-opencl/comp/packing-test-1.comp new file mode 100644 index 000000000..1a8a39e21 --- /dev/null +++ b/shaders-opencl/comp/packing-test-1.comp @@ -0,0 +1,18 @@ +#version 450 +struct T1 +{ + vec3 a; + float b; +}; + +layout(std430, binding = 1) buffer Buffer0 { T1 buf0[]; }; +layout(std430, binding = 2) buffer Buffer1 { float buf1[]; }; + +layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; +void main() +{ + // broken case in Metal! + T1 v = buf0[0]; + float x = v.b; + buf1[gl_GlobalInvocationID.x] = x; +} diff --git a/shaders-opencl/comp/packing-test-2.comp b/shaders-opencl/comp/packing-test-2.comp new file mode 100644 index 000000000..73268beec --- /dev/null +++ b/shaders-opencl/comp/packing-test-2.comp @@ -0,0 +1,16 @@ +#version 450 +struct T1 +{ + vec3 a; + float b; +}; + +layout(std430, binding = 1) buffer Buffer0 { T1 buf0[]; }; +layout(std430, binding = 2) buffer Buffer1 { float buf1[]; }; + +layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; +void main() +{ + float x = buf0[0].b; + buf1[gl_GlobalInvocationID.x] = x; +} diff --git a/shaders-opencl/comp/read-write-only.comp b/shaders-opencl/comp/read-write-only.comp new file mode 100644 index 000000000..b224b6f12 --- /dev/null +++ b/shaders-opencl/comp/read-write-only.comp @@ -0,0 +1,26 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(binding = 0, std430) readonly buffer SSBO0 +{ + vec4 data0; + vec4 data1; +}; + +layout(binding = 1, std430) restrict buffer SSBO1 +{ + vec4 data2; + vec4 data3; +}; + +layout(binding = 2, std430) restrict writeonly buffer SSBO2 +{ + vec4 data4; + vec4 data5; +}; + +void main() +{ + data4 = data0 + data2; + data5 = data1 + data3; +} diff --git a/shaders-opencl/comp/rmw-opt.comp b/shaders-opencl/comp/rmw-opt.comp new file mode 100644 index 000000000..a6e1e7fe7 --- /dev/null +++ b/shaders-opencl/comp/rmw-opt.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + int a; +}; + +void main() +{ + a += 10; + a -= 10; + a *= 10; + a /= 10; + a <<= 2; + a >>= 3; + a &= 40; + a ^= 10; + a %= 40; + a |= 1; + + bool c = false; + bool d = true; + c = c && d; + d = d || c; + a = c && d ? 1 : 0; +} diff --git a/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp b/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp new file mode 100644 index 000000000..635463229 --- /dev/null +++ b/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp @@ -0,0 +1,20 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std430, set = 0, binding = 0) buffer SSBO +{ + float a; + float b; + float c; + float d; + float e; + float f; +}; + +void main() +{ + c = distance(a, b); + d = length(a); + e = normalize(a); + f = distance(a-1, b-2); +} diff --git a/shaders-opencl/comp/shared-std450.double.comp b/shaders-opencl/comp/shared-std450.double.comp new file mode 100644 index 000000000..07e96e6bc --- /dev/null +++ b/shaders-opencl/comp/shared-std450.double.comp @@ -0,0 +1,27 @@ +#version 450 +layout(local_size_x = 4) in; + +shared double sShared[gl_WorkGroupSize.x]; + +layout(std430, binding = 0) readonly buffer SSBO +{ + double in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + double out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + double idata = in_data[ident]; + + sShared[gl_LocalInvocationIndex] = idata; + memoryBarrierShared(); + barrier(); + + out_data[ident] = sShared[gl_WorkGroupSize.x - gl_LocalInvocationIndex - 1u]; +} + diff --git a/shaders-opencl/comp/shared-struct-bool-cast.comp b/shaders-opencl/comp/shared-struct-bool-cast.comp new file mode 100644 index 000000000..d6479b3e4 --- /dev/null +++ b/shaders-opencl/comp/shared-struct-bool-cast.comp @@ -0,0 +1,35 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, binding = 0) buffer block { highp uint passed; }; +struct S1 { + mediump ivec3 a; + highp uvec2 b; + bvec4 c; + mediump uint d; +}; + +bool compare_ivec3 (highp ivec3 a, highp ivec3 b) { return a == b; } +bool compare_uint (highp uint a, highp uint b) { return a == b; } +bool compare_uvec2 (highp uvec2 a, highp uvec2 b) { return a == b; } +bool compare_bvec4 (bvec4 a, bvec4 b) { return a == b; } + +shared S1 s1; + +void main (void) { + s1.a = ivec3(6, 8, 8); + s1.b = uvec2(4u, 4u); + s1.c = bvec4(false, false, false, true); + s1.d = 6u; + + barrier(); + memoryBarrier(); + bool allOk = true; + allOk = allOk && compare_ivec3(ivec3(6, 8, 8), s1.a); + allOk = allOk && compare_uvec2(uvec2(4u, 4u), s1.b); + allOk = allOk && compare_bvec4(bvec4(false, false, false, true), s1.c); + allOk = allOk && compare_uint(6u, s1.d); + if (allOk) + passed++; + +} diff --git a/shaders-opencl/comp/shared-zero-init-simple.comp b/shaders-opencl/comp/shared-zero-init-simple.comp new file mode 100644 index 000000000..fe9bac5ad --- /dev/null +++ b/shaders-opencl/comp/shared-zero-init-simple.comp @@ -0,0 +1,24 @@ +#version 450 +#extension GL_EXT_null_initializer : enable +layout(local_size_x = 4) in; + +shared float sShared = {}; + +layout(std430, binding = 0) readonly buffer SSBO +{ + float in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + float out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = in_data[ident]; + + out_data[ident] = sShared + idata; +} + diff --git a/shaders-opencl/comp/shared-zero-init.comp b/shaders-opencl/comp/shared-zero-init.comp new file mode 100644 index 000000000..f30522c77 --- /dev/null +++ b/shaders-opencl/comp/shared-zero-init.comp @@ -0,0 +1,28 @@ +#version 450 +#extension GL_EXT_null_initializer : enable +layout(local_size_x = 4) in; + +shared float sShared[gl_WorkGroupSize.x] = {}; + +layout(std430, binding = 0) readonly buffer SSBO +{ + float in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + float out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = in_data[ident]; + + sShared[gl_LocalInvocationIndex] += idata; + memoryBarrierShared(); + barrier(); + + out_data[ident] = sShared[gl_WorkGroupSize.x - gl_LocalInvocationIndex - 1u]; +} + diff --git a/shaders-opencl/comp/shared.comp b/shaders-opencl/comp/shared.comp new file mode 100644 index 000000000..4deff9359 --- /dev/null +++ b/shaders-opencl/comp/shared.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 4) in; + +shared float sShared[gl_WorkGroupSize.x]; + +layout(std430, binding = 0) readonly buffer SSBO +{ + float in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + float out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = in_data[ident]; + + sShared[gl_LocalInvocationIndex] = idata; + memoryBarrierShared(); + barrier(); + + out_data[ident] = sShared[gl_WorkGroupSize.x - gl_LocalInvocationIndex - 1u]; +} + diff --git a/shaders-opencl/comp/spec-constant-work-group-size.comp b/shaders-opencl/comp/spec-constant-work-group-size.comp new file mode 100644 index 000000000..09b65dc99 --- /dev/null +++ b/shaders-opencl/comp/spec-constant-work-group-size.comp @@ -0,0 +1,17 @@ +#version 450 +layout(local_size_x_id = 10, local_size_y = 20) in; + +layout(constant_id = 0) const int a = 1; +layout(constant_id = 1) const int b = 2; + +layout(set = 1, binding = 0) writeonly buffer SSBO +{ + int v[]; +}; + +void main() +{ + int spec_const_array_size[b]; + spec_const_array_size[a] = a; + v[a + gl_WorkGroupSize.x + gl_WorkGroupSize.y] = b + spec_const_array_size[1 - a]; +} diff --git a/shaders-opencl/comp/struct-layout.comp b/shaders-opencl/comp/struct-layout.comp new file mode 100644 index 000000000..5a2b7802d --- /dev/null +++ b/shaders-opencl/comp/struct-layout.comp @@ -0,0 +1,24 @@ +#version 310 es +layout(local_size_x = 1) in; + +struct Foo +{ + mat4 m; +}; + +layout(std430, binding = 0) readonly buffer SSBO +{ + Foo in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + Foo out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + out_data[ident].m = in_data[ident].m * in_data[ident].m; +} + diff --git a/shaders-opencl/comp/struct-nested.comp b/shaders-opencl/comp/struct-nested.comp new file mode 100644 index 000000000..d9645cbc4 --- /dev/null +++ b/shaders-opencl/comp/struct-nested.comp @@ -0,0 +1,20 @@ +#version 450 + +struct s1 +{ + int a; +}; + +struct s2 +{ + s1 b; +}; + +layout(std430, binding = 1) buffer dstbuffer{ s2 test[]; }; +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; +void main() +{ + s2 testVal; + testVal.b.a = 0; + test[0] = testVal; +} \ No newline at end of file diff --git a/shaders-opencl/comp/struct-packing.invalid.comp b/shaders-opencl/comp/struct-packing.invalid.comp new file mode 100644 index 000000000..5baf45cb3 --- /dev/null +++ b/shaders-opencl/comp/struct-packing.invalid.comp @@ -0,0 +1,77 @@ +#version 310 es +layout(local_size_x = 1) in; + +struct S0 +{ + vec2 a[1]; + float b; +}; + +struct S1 +{ + vec3 a; + float b; +}; + +struct S2 +{ + vec3 a[1]; + float b; +}; + +struct S3 +{ + vec2 a; + float b; +}; + +struct S4 +{ + vec2 c; +}; + +struct Content +{ + S0 m0s[1]; + S1 m1s[1]; + S2 m2s[1]; + S0 m0; + S1 m1; + S2 m2; + S3 m3; + float m4; + + S4 m3s[8]; +}; + +layout(binding = 1, std430) buffer SSBO1 +{ + Content content; + Content content1[2]; + Content content2; + + layout(column_major) mat2 m0; + layout(column_major) mat2 m1; + layout(column_major) mat2x3 m2[4]; + layout(column_major) mat3x2 m3; + layout(row_major) mat2 m4; + layout(row_major) mat2 m5[9]; + layout(row_major) mat2x3 m6[4][2]; + layout(row_major) mat3x2 m7; + float array[]; +} ssbo_430; + +layout(binding = 0, std140) buffer SSBO0 +{ + Content content; + Content content1[2]; + Content content2; + float array[]; +} ssbo_140; + +void main() +{ + ssbo_430.content = ssbo_140.content; + ssbo_430.content.m1.a = ssbo_430.m6[1][1] * ssbo_430.content.m3.a; // test packed matrix access +} + diff --git a/shaders-opencl/comp/torture-loop.comp b/shaders-opencl/comp/torture-loop.comp new file mode 100644 index 000000000..54a1221a1 --- /dev/null +++ b/shaders-opencl/comp/torture-loop.comp @@ -0,0 +1,40 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idat = in_data[ident]; + + int k = 0; + + // Continue with side effects. + while (++k < 10) + { + idat *= 2.0; + k++; + } + + // Again used here ... + for (uint i = 0u; i < 16u; i++, k++) + for (uint j = 0u; j < 30u; j++) + idat = mvp * idat; + + do + { + k++; + } while (k > 10); + out_data[ident] = idat; +} + diff --git a/shaders-opencl/comp/type-alias.comp b/shaders-opencl/comp/type-alias.comp new file mode 100644 index 000000000..343d350a2 --- /dev/null +++ b/shaders-opencl/comp/type-alias.comp @@ -0,0 +1,45 @@ +#version 310 es +layout(local_size_x = 1) in; + +struct S0 +{ + vec4 a; +}; + +struct S1 +{ + vec4 a; +}; + +vec4 overload(S0 s0) +{ + return s0.a; +} + +vec4 overload(S1 s1) +{ + return s1.a; +} + +layout(std430, binding = 0) buffer SSBO0 +{ + S0 s0s[]; +}; + +layout(std430, binding = 1) buffer SSBO1 +{ + S1 s1s[]; +}; + +layout(std430, binding = 2) buffer SSBO2 +{ + vec4 outputs[]; +}; + + +void main() +{ + S0 s0 = s0s[gl_GlobalInvocationID.x]; + S1 s1 = s1s[gl_GlobalInvocationID.x]; + outputs[gl_GlobalInvocationID.x] = overload(s0) + overload(s1); +} diff --git a/shaders-opencl/comp/udiv.comp b/shaders-opencl/comp/udiv.comp new file mode 100644 index 000000000..d4e1133bc --- /dev/null +++ b/shaders-opencl/comp/udiv.comp @@ -0,0 +1,17 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + uint inputs[]; +}; + +layout(std430, binding = 1) buffer SSBO2 +{ + uint outputs[]; +}; + +void main() +{ + outputs[gl_GlobalInvocationID.x] = inputs[gl_GlobalInvocationID.x] / 29u; +} diff --git a/shaders-opencl/comp/writable-ssbo.comp b/shaders-opencl/comp/writable-ssbo.comp new file mode 100644 index 000000000..d0cc18deb --- /dev/null +++ b/shaders-opencl/comp/writable-ssbo.comp @@ -0,0 +1,9 @@ +#version 450 +layout(set = 0, binding = 0) buffer myBlock { + int a; + float b; +} myStorage; +void main() { + myStorage.a = (myStorage.a + 1) % 256; + myStorage.b = myStorage.b + 0.02; +} diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index d6c20247a..4773595db 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -266,7 +266,7 @@ class CompilerGLSL : public Compiler // require_extension("GL_KHR_my_extension"); void require_extension(const std::string &ext); - // Returns the list of required extensions. After compilation this will contains any other + // Returns the list of required extensions. After compilation this will contains any other // extensions that the compiler used automatically, in addition to the user specified ones. const SmallVector &get_required_extensions() const; @@ -403,6 +403,16 @@ class CompilerGLSL : public Compiler // Virtualize methods which need to be overridden by subclass targets like C++ and such. virtual void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags); + // Called right after the opening { of a non-entry helper function body. + // Override to emit per-function preamble declarations (e.g. #define aliases). + virtual void emit_function_local_declarations(SPIRFunction &) + { + } + // Called right before the closing } of a non-entry helper function body. + // Override to clean up anything emitted by emit_function_local_declarations. + virtual void emit_function_local_epilogue(SPIRFunction &) + { + } SPIRBlock *current_emitting_block = nullptr; SmallVector current_emitting_switch_stack; @@ -451,9 +461,8 @@ class CompilerGLSL : public Compiler virtual void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier = "", uint32_t base_offset = 0); virtual std::string image_type_glsl(const SPIRType &type, uint32_t id = 0, bool member = false); - std::string constant_expression(const SPIRConstant &c, - bool inside_block_like_struct_scope = false, - bool inside_struct_scope = false); + virtual std::string constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope = false, + bool inside_struct_scope = false); virtual std::string constant_op_expression(const SPIRConstantOp &cop); virtual std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector); virtual void emit_fixup(); @@ -520,7 +529,7 @@ class CompilerGLSL : public Compiler } template - inline void statement_inner(T &&t, Ts &&... ts) + inline void statement_inner(T &&t, Ts &&...ts) { buffer << std::forward(t); statement_count++; @@ -528,7 +537,7 @@ class CompilerGLSL : public Compiler } template - inline void statement(Ts &&... ts) + inline void statement(Ts &&...ts) { if (is_forcing_recompilation()) { @@ -553,7 +562,7 @@ class CompilerGLSL : public Compiler } template - inline void statement_no_indent(Ts &&... ts) + inline void statement_no_indent(Ts &&...ts) { auto old_indent = indent; indent = 0; @@ -588,15 +597,14 @@ class CompilerGLSL : public Compiler void add_local_variable_name(uint32_t id); void add_resource_name(uint32_t id); void add_member_name(SPIRType &type, uint32_t name); - void add_function_overload(const SPIRFunction &func); + virtual void add_function_overload(const SPIRFunction &func); virtual bool is_non_native_row_major_matrix(uint32_t id); virtual bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index); bool member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const; bool member_is_packed_physical_type(const SPIRType &type, uint32_t index) const; virtual std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, - uint32_t physical_type_id, bool is_packed, - bool relaxed = false); + uint32_t physical_type_id, bool is_packed, bool relaxed = false); std::unordered_set local_variable_names; std::unordered_set resource_names; @@ -672,7 +680,7 @@ class CompilerGLSL : public Compiler bool supports_spec_constant_array_size = true; } backend; - void emit_struct(SPIRType &type); + virtual void emit_struct(SPIRType &type); void emit_resources(); void emit_extension_workarounds(ExecutionModel model); void emit_subgroup_arithmetic_workaround(const std::string &func, Op op, GroupOperation group_op); @@ -724,7 +732,8 @@ class CompilerGLSL : public Compiler const char *op); void emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); - void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op); + void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, + const char *op); void emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type); @@ -747,7 +756,8 @@ class CompilerGLSL : public Compiler void emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, bool negate, SPIRType::BaseType expected_type); void emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, - SPIRType::BaseType input_type, bool skip_cast_if_equal_type, bool implicit_integer_promotion); + SPIRType::BaseType input_type, bool skip_cast_if_equal_type, + bool implicit_integer_promotion); SPIRType binary_op_bitcast_helper(std::string &cast_op0, std::string &cast_op1, SPIRType::BaseType &input_type, uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type); @@ -781,8 +791,8 @@ class CompilerGLSL : public Compiler virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base); virtual bool check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type); - virtual bool prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, - StorageClass storage, bool &is_packed); + virtual bool prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, StorageClass storage, + bool &is_packed); std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, AccessChainMeta *meta = nullptr, bool ptr_chain = false); @@ -813,14 +823,14 @@ class CompilerGLSL : public Compiler SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id); virtual void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist); std::string to_non_uniform_aware_expression(uint32_t id); - std::string to_atomic_ptr_expression(uint32_t id); - std::string to_pretty_expression_if_int_constant( - uint32_t id, - const GlslConstantNameMapping *mapping_start, const GlslConstantNameMapping *mapping_end, - bool register_expression_read = true); + virtual std::string to_atomic_ptr_expression(uint32_t id); + std::string to_pretty_expression_if_int_constant(uint32_t id, const GlslConstantNameMapping *mapping_start, + const GlslConstantNameMapping *mapping_end, + bool register_expression_read = true); std::string to_expression(uint32_t id, bool register_expression_read = true); std::string to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type); - std::string to_rerolled_array_expression(const SPIRType &parent_type, const std::string &expr, const SPIRType &type); + std::string to_rerolled_array_expression(const SPIRType &parent_type, const std::string &expr, + const SPIRType &type); std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true); std::string to_unpacked_expression(uint32_t id, bool register_expression_read = true); std::string to_unpacked_row_major_matrix_expression(uint32_t id); @@ -837,9 +847,10 @@ class CompilerGLSL : public Compiler std::string address_of_expression(const std::string &expr); void strip_enclosed_expression(std::string &expr); std::string to_member_name(const SPIRType &type, uint32_t index); - virtual std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain_is_resolved); + virtual std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, + bool ptr_chain_is_resolved); std::string to_multi_member_reference(const SPIRType &type, const SmallVector &indices); - std::string type_to_glsl_constructor(const SPIRType &type); + virtual std::string type_to_glsl_constructor(const SPIRType &type); std::string argument_decl(const SPIRFunction::Parameter &arg); virtual std::string to_qualifiers_glsl(uint32_t id); void fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var); @@ -853,8 +864,8 @@ class CompilerGLSL : public Compiler std::string layout_for_variable(const SPIRVariable &variable); std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id); virtual bool skip_argument(uint32_t id) const; - virtual bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, - StorageClass lhs_storage, StorageClass rhs_storage); + virtual bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, StorageClass lhs_storage, + StorageClass rhs_storage); virtual void emit_block_hints(const SPIRBlock &block); virtual std::string to_initializer_expression(const SPIRVariable &var); virtual std::string to_zero_initialized_expression(uint32_t type_id); @@ -863,8 +874,7 @@ class CompilerGLSL : public Compiler bool buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t *failed_index = nullptr, uint32_t start_offset = 0, uint32_t end_offset = ~(0u)); - std::string buffer_to_packing_standard(const SPIRType &type, - bool support_std430_without_scalar_layout, + std::string buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout, bool support_enhanced_layouts); uint32_t type_to_packed_base_size(const SPIRType &type, BufferPackingStandard packing); diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp index 68c447aa3..522ba7d92 100644 --- a/spirv_opencl.cpp +++ b/spirv_opencl.cpp @@ -25,7 +25,6 @@ #include "GLSL.std.450.h" #include -#include #include using namespace SPIRV_CROSS_SPV_HEADER_NAMESPACE; @@ -59,6 +58,18 @@ string CompilerOpenCL::compile() ir.fixup_reserved_names(); + // Rename WorkgroupSize spec constants to "spvWorkgroupSize" so that builtin_to_glsl + // and the constant declaration both use the same name (Bug B fix for task #13). + ir.for_each_typed_id( + [&](uint32_t id, SPIRConstant &c) + { + if (c.specialization && has_decoration(c.self, DecorationBuiltIn) && + BuiltIn(get_decoration(c.self, DecorationBuiltIn)) == BuiltInWorkgroupSize) + { + ir.set_name(id, "spvWorkgroupSize"); + } + }); + options.vulkan_semantics = true; options.es = false; options.version = 450; @@ -82,7 +93,7 @@ string CompilerOpenCL::compile() backend.use_typed_initializer_list = true; backend.native_row_major_matrix = false; backend.unsized_array_supported = false; - backend.can_declare_arrays_inline = false; + backend.can_declare_arrays_inline = true; backend.allow_truncated_access_chain = true; backend.comparison_image_samples_scalar = true; backend.native_pointers = true; @@ -160,9 +171,294 @@ const char *CompilerOpenCL::to_storage_qualifiers_glsl(const SPIRVariable &) return ""; } +void CompilerOpenCL::compute_kernel_resources() +{ + // Collect all SSBOs/BufferBlocks that get flattened to __global T* kernel parameters. + flattened_buffer_vars.clear(); + flattened_var_type_decl.clear(); + + ir.for_each_typed_id( + [&](uint32_t var_id, SPIRVariable &var) + { + auto &type = get_variable_data_type(var); + if (var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock)) + { + Bitset flags = ir.get_buffer_block_flags(var); + bool is_readonly = flags.get(DecorationNonWritable); + + // Compute the element type string for __global T* (same logic as entry_point_args). + string subtype; + if (type.basetype == SPIRType::Struct && type.member_types.size() == 1) + { + const auto &member0_type = get(type.member_types.front()); + subtype = type_to_glsl(member0_type); + } + else + { + subtype = type_to_glsl(type); + } + + flattened_buffer_vars.insert(var_id); + flattened_var_type_decl[var_id] = join("__global ", is_readonly ? "const " : "", subtype, "* "); + } + }); + + // For each non-entry function, find which flattened buffer vars it accesses (direct + transitive). + func_flattened_args.clear(); + + // First pass: direct accesses. + unordered_map> direct_accesses; + ir.for_each_typed_id( + [&](uint32_t func_id, SPIRFunction &func) + { + if (func_id == ir.default_entry_point) + return; + + auto &accessed = direct_accesses[func_id]; + for (auto block_id : func.blocks) + { + auto &block = get(block_id); + for (auto &insn : block.ops) + { + const uint32_t *ops = stream(insn); + for (uint32_t i = 0; i < insn.length; i++) + { + if (flattened_buffer_vars.count(ops[i])) + accessed.insert(ops[i]); + } + } + } + }); + + // Second pass: propagate transitively through function calls. + bool changed = true; + while (changed) + { + changed = false; + ir.for_each_typed_id( + [&](uint32_t func_id, SPIRFunction &func) + { + if (func_id == ir.default_entry_point) + return; + auto &my_accesses = direct_accesses[func_id]; + for (auto block_id : func.blocks) + { + auto &block = get(block_id); + for (auto &insn : block.ops) + { + if (static_cast(insn.op) == OpFunctionCall) + { + const uint32_t *ops = stream(insn); + uint32_t callee_id = ops[2]; + if (callee_id != ir.default_entry_point && direct_accesses.count(callee_id)) + { + for (auto var_id : direct_accesses[callee_id]) + { + if (!my_accesses.count(var_id)) + { + my_accesses.insert(var_id); + changed = true; + } + } + } + } + } + } + }); + } + + // Convert to sorted vectors (stable ordering by var ID). + for (auto &kv : direct_accesses) + { + if (!kv.second.empty()) + { + SmallVector sorted; + for (auto var_id : kv.second) + sorted.push_back(var_id); + std::sort(sorted.begin(), sorted.end()); + func_flattened_args[kv.first] = sorted; + } + } + + // Collect workgroup (StorageClassWorkgroup) and private global (StorageClassPrivate) variables + // that are accessed in non-entry helper functions. In OpenCL C 1.2 these cannot be at file + // scope, so they must be declared in the kernel body and threaded as pointer parameters. + workgroup_var_ptr_type.clear(); + workgroup_scalar_vars.clear(); + threaded_input_builtins.clear(); + + unordered_set threadable_vars; + ir.for_each_typed_id( + [&](uint32_t var_id, SPIRVariable &var) + { + if (var.storage == StorageClassWorkgroup || var.storage == StorageClassPrivate) + { + auto &type = get_variable_data_type(var); + bool is_array = !type.array.empty(); + bool is_workgroup = (var.storage == StorageClassWorkgroup); + + // Determine element/base type for the pointer parameter. + string elem_type_str; + if (is_array) + { + // Strip outermost array dimension to get element type. + auto elem_type = type; + elem_type.array.pop_back(); + if (!elem_type.array_size_literal.empty()) + elem_type.array_size_literal.pop_back(); + elem_type_str = type_to_glsl(elem_type); + } + else + { + elem_type_str = type_to_glsl(type); + } + + string addr_space = is_workgroup ? "__local " : ""; + workgroup_var_ptr_type[var_id] = addr_space + elem_type_str + "*"; + if (!is_array) + workgroup_scalar_vars.insert(var_id); + + threadable_vars.insert(var_id); + } + // UBO (Uniform + Block) and PushConstant variables become kernel params. + // Helper functions can't see them, so they must be threaded as value params. + else if (var.storage == StorageClassPushConstant || + (var.storage == StorageClassUniform && !is_hidden_variable(var) && + has_decoration(get_variable_data_type(var).self, DecorationBlock) && + !has_decoration(get_variable_data_type(var).self, DecorationBufferBlock))) + { + auto &type = get_variable_data_type(var); + if (type.basetype == SPIRType::Struct) + { + // Pass by value — no pointer, no #define trick needed. + workgroup_var_ptr_type[var_id] = type_to_glsl(type); + // NOT added to workgroup_scalar_vars (no #define needed — pass by value) + threadable_vars.insert(var_id); + } + } + // Input builtin variables (gl_GlobalInvocationID, etc.) accessed in non-entry functions + // need to be materialized as __private local variables and threaded as pointers. + else if (var.storage == StorageClassInput && has_decoration(var_id, DecorationBuiltIn)) + { + auto &type = get_variable_data_type(var); + workgroup_var_ptr_type[var_id] = join("__private ", type_to_glsl(type), "*"); + workgroup_scalar_vars.insert(var_id); + threadable_vars.insert(var_id); + auto builtin = BuiltIn(get_decoration(var_id, DecorationBuiltIn)); + threaded_input_builtins[static_cast(builtin)] = var_id; + } + }); + + // Direct accesses of threadable vars in non-entry functions. + unordered_map> wg_direct; + ir.for_each_typed_id( + [&](uint32_t func_id, SPIRFunction &func) + { + if (func_id == ir.default_entry_point) + return; + auto &accessed = wg_direct[func_id]; + for (auto block_id : func.blocks) + { + auto &block = get(block_id); + for (auto &insn : block.ops) + { + const uint32_t *ops = stream(insn); + for (uint32_t i = 0; i < insn.length; i++) + { + if (threadable_vars.count(ops[i])) + accessed.insert(ops[i]); + } + } + } + }); + + // Transitively propagate. + changed = true; + while (changed) + { + changed = false; + ir.for_each_typed_id( + [&](uint32_t func_id, SPIRFunction &func) + { + if (func_id == ir.default_entry_point) + return; + auto &my = wg_direct[func_id]; + for (auto block_id : func.blocks) + { + auto &block = get(block_id); + for (auto &insn : block.ops) + { + if (static_cast(insn.op) == OpFunctionCall) + { + const uint32_t *ops = stream(insn); + uint32_t callee_id = ops[2]; + if (callee_id != ir.default_entry_point && wg_direct.count(callee_id)) + { + for (auto var_id : wg_direct[callee_id]) + { + if (!my.count(var_id)) + { + my.insert(var_id); + changed = true; + } + } + } + } + } + } + }); + } + + // Convert to sorted vectors. + func_workgroup_args.clear(); + for (auto &kv : wg_direct) + { + if (!kv.second.empty()) + { + SmallVector sorted; + for (auto var_id : kv.second) + sorted.push_back(var_id); + std::sort(sorted.begin(), sorted.end()); + func_workgroup_args[kv.first] = sorted; + } + } +} + void CompilerOpenCL::emit_resources() { replace_illegal_names(); + compute_kernel_resources(); + + // Task #14: Polyfills for packHalf2x16 / unpackHalf2x16. + // OpenCL C has vstore_half / vload_half which convert float ↔ float16 in memory. + // These flags are set by emit_glsl_op and trigger a recompile on the next pass. + if (needs_half_pack_polyfill) + { + statement("uint spvPackHalf2x16(float2 v) {"); + statement(" uint r;"); + statement(" vstore_half(v.x, 0, (__private half *)&r);"); + statement(" vstore_half(v.y, 1, (__private half *)&r);"); + statement(" return r;"); + statement("}"); + statement(""); + } + if (needs_half_unpack_polyfill) + { + statement("float2 spvUnpackHalf2x16(uint u) {"); + statement(" const __private uint *p = &u;"); + statement(" return (float2)(vload_half(0, (const __private half *)p),"); + statement(" vload_half(1, (const __private half *)p));"); + statement("}"); + statement(""); + } + + // Default sampler for combined image+sampler usage (OpenCL C requires file-scope const sampler_t). + if (needs_default_sampler) + { + statement("const sampler_t spvDefaultSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | " + "CLK_FILTER_NEAREST;"); + statement(""); + } } void CompilerOpenCL::replace_illegal_names() @@ -247,7 +543,7 @@ void CompilerOpenCL::replace_illegal_names() "quad8", "quad16", "complex", - "imaginary" + "imaginary", "__global", "global", "__local", @@ -329,50 +625,154 @@ void CompilerOpenCL::emit_entry_point_declarations() if (execution.model != ExecutionModelGLCompute) return; - bool need_workgroup_id = active_input_builtins.get(BuiltInWorkgroupId); - bool need_local_id = active_input_builtins.get(BuiltInLocalInvocationId); - bool need_global_id = active_input_builtins.get(BuiltInGlobalInvocationId); - bool need_num_workgroups = active_input_builtins.get(BuiltInNumWorkgroups); + // Bug A fix (task #13): builtins are now inline calls in builtin_to_glsl, so we only need + // spvWorkgroupSize when there is no spec-constant version (which lives at file scope). + // Check whether there is a specialization constant decorated BuiltInWorkgroupSize. + bool has_spec_workgroup_size = false; + ir.for_each_typed_id( + [&](uint32_t, const SPIRConstant &c) + { + if (c.specialization && has_decoration(c.self, DecorationBuiltIn) && + BuiltIn(get_decoration(c.self, DecorationBuiltIn)) == BuiltInWorkgroupSize) + has_spec_workgroup_size = true; + }); + bool need_workgroup_size = active_input_builtins.get(BuiltInWorkgroupSize); - bool need_local_invocation_index = active_input_builtins.get(BuiltInLocalInvocationIndex); - bool need_global_size = active_input_builtins.get(BuiltInGlobalSize); - - if (need_workgroup_id) - statement("uint3 spvWorkgroupId = (uint3)(get_group_id(0), get_group_id(1), get_group_id(2));"); - if (need_local_id) - statement("uint3 spvLocalInvocationId = (uint3)(get_local_id(0), get_local_id(1), get_local_id(2));"); - if (need_global_id) - statement("uint3 spvGlobalInvocationId = (uint3)(get_global_id(0), get_global_id(1), get_global_id(2));"); - if (need_num_workgroups) - statement("uint3 spvNumWorkgroups = (uint3)(get_num_groups(0), get_num_groups(1), get_num_groups(2));"); - if (need_workgroup_size) + if (!need_workgroup_size) + { + ir.for_each_typed_id( + [&](uint32_t, const SPIRConstant &c) + { + if (has_decoration(c.self, DecorationBuiltIn) && + BuiltIn(get_decoration(c.self, DecorationBuiltIn)) == BuiltInWorkgroupSize) + need_workgroup_size = true; + }); + } + + // Only emit the kernel-local spvWorkgroupSize variable when there is no file-scope spec constant. + // When a spec constant exists, it is already emitted as a file-scope `constant uint3 spvWorkgroupSize`. + if (need_workgroup_size && !has_spec_workgroup_size) statement("uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2));"); - if (need_local_invocation_index) - statement("uint spvLocalInvocationIndex = get_local_id(2) * get_local_size(0) * get_local_size(1) + " - "get_local_id(1) * get_local_size(0) + get_local_id(0);"); - if (need_global_size) - statement("uint3 spvGlobalSize = (uint3)(get_global_size(0), get_global_size(1), get_global_size(2));"); + + // Task #6: Emit __local declarations for workgroup (shared) variables. + // In OpenCL C 1.x, __local variables must be declared inside kernel functions. + ir.for_each_typed_id( + [&](uint32_t, SPIRVariable &var) + { + if (var.storage == StorageClassWorkgroup && !is_hidden_variable(var)) + { + auto &type = get_variable_data_type(var); + statement("__local ", variable_decl(type, to_name(var.self), var.self), ";"); + } + }); + + // Emit private global variables as kernel-local variables. + // OpenCL C 1.x doesn't support __private file-scope variables, so we move them inside. + for (auto global : global_variables) + { + auto &var = get(global); + if (var.storage == StorageClassPrivate && !is_hidden_variable(var, true)) + { + add_local_variable_name(var.self); + string initializer; + if (var.initializer) + initializer = join(" = ", to_expression(var.initializer)); + statement(CompilerGLSL::variable_decl(var), initializer, ";"); + } + } + + // Materialize Input builtin variables as local variables. + // In OpenCL C, builtins like get_global_id() are function calls, not variables. + // When code needs variable pointers to these builtins (either threaded to non-entry + // functions or used in pointer select within the entry point), we must create + // actual __private local variables. + // + // Collect all builtins that need materialization: union of threaded and entry-point sets. + unordered_map builtins_to_materialize; + for (auto &kv : threaded_input_builtins) + { + auto var_id = kv.second; + bool actually_threaded = false; + for (auto &fa : func_workgroup_args) + { + for (auto vid : fa.second) + { + if (vid == var_id) + { + actually_threaded = true; + break; + } + } + if (actually_threaded) + break; + } + if (actually_threaded) + builtins_to_materialize[kv.first] = kv.second; + } + for (auto &kv : entry_point_materialized_builtins) + builtins_to_materialize[kv.first] = kv.second; + + // Use a guard flag so builtin_to_glsl returns the function call form (not the variable name). + emitting_builtin_materialization = true; + for (auto &kv : builtins_to_materialize) + { + auto var_id = kv.second; + auto &type = get_variable_data_type(get(var_id)); + auto builtin = BuiltIn(kv.first); + string rhs = builtin_to_glsl(builtin, StorageClassInput); + // Builtins return uint3 but the SPIR-V variable may be declared as int3. + string var_type_str = type_to_glsl(type); + if (type.basetype == SPIRType::Int && type.vecsize == 3) + rhs = join("as_int3(", rhs, ")"); + else if (type.basetype == SPIRType::Int && type.vecsize == 2) + rhs = join("as_int2(", rhs, ")"); + statement(var_type_str, " ", to_name(var_id), " = ", rhs, ";"); + } + emitting_builtin_materialization = false; } string CompilerOpenCL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) { (void)storage; + if (!emitting_builtin_materialization) + { + auto key = static_cast(builtin); + // If this builtin is threaded as a pointer param to non-entry functions, + // return the variable name so the #define macro can dereference it. + if (!processing_entry_point) + { + auto it = threaded_input_builtins.find(key); + if (it != threaded_input_builtins.end()) + return to_name(it->second); + } + // If this builtin is materialized as a local variable in the entry point, + // return the variable name so that &var_name gives a valid lvalue pointer. + if (processing_entry_point) + { + auto it = entry_point_materialized_builtins.find(key); + if (it != entry_point_materialized_builtins.end()) + return to_name(it->second); + } + } switch (builtin) { case BuiltInWorkgroupId: - return "spvWorkgroupId"; + return "((uint3)(get_group_id(0), get_group_id(1), get_group_id(2)))"; case BuiltInLocalInvocationId: - return "spvLocalInvocationId"; + return "((uint3)(get_local_id(0), get_local_id(1), get_local_id(2)))"; case BuiltInGlobalInvocationId: - return "spvGlobalInvocationId"; + return "((uint3)(get_global_id(0), get_global_id(1), get_global_id(2)))"; case BuiltInNumWorkgroups: - return "spvNumWorkgroups"; + return "((uint3)(get_num_groups(0), get_num_groups(1), get_num_groups(2)))"; case BuiltInWorkgroupSize: + // spvWorkgroupSize is either a kernel-local variable or a file-scope spec constant; + // both are named "spvWorkgroupSize" so returning this name works in both cases. return "spvWorkgroupSize"; case BuiltInLocalInvocationIndex: - return "spvLocalInvocationIndex"; + return "((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) " + "+ get_local_id(0)))"; case BuiltInGlobalSize: - return "spvGlobalSize"; + return "((uint3)(get_global_size(0), get_global_size(1), get_global_size(2)))"; case BuiltInNumSubgroups: case BuiltInSubgroupId: case BuiltInSubgroupSize: @@ -396,7 +796,7 @@ string CompilerOpenCL::get_variable_address_space(const SPIRVariable &argument) return get_type_address_space(type, argument.self, true); } -string CompilerOpenCL::get_type_address_space(const SPIRType &type, uint32_t id, bool argument) +string CompilerOpenCL::get_type_address_space(const SPIRType &type, uint32_t id, bool) { // This can be called for variable pointer contexts as well, so be very careful about which method we choose. Bitset flags; @@ -429,6 +829,10 @@ string CompilerOpenCL::get_type_address_space(const SPIRType &type, uint32_t id, case StorageClassWorkgroup: addr_space = "__local"; break; + case StorageClassInput: + // Input builtins materialized as __private local variables. + addr_space = "__private"; + break; default: // __private is default and would be redundant break; @@ -532,7 +936,9 @@ string CompilerOpenCL::type_to_glsl(const SPIRType &type, uint32_t id, bool memb // Scalars case SPIRType::Boolean: - type_name = "bool"; + // OpenCL C has no bool vector types (bool2/bool4 etc.). Map bool vectors to int. + // Scalar bool is fine, but vector bool must be int (comparisons return intN in OpenCL). + type_name = (type.vecsize > 1) ? "int" : "bool"; break; case SPIRType::Char: @@ -590,13 +996,40 @@ string CompilerOpenCL::type_to_glsl(const SPIRType &type, uint32_t id) string CompilerOpenCL::image_type_glsl(const SPIRType &type, uint32_t id, bool member) { - (void)id; (void)member; - if (type.basetype != SPIRType::Image) + if (type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage) return ""; - bool readonly = type.image.sampled != 2; - const char *access = readonly ? "read_only" : "write_only"; + // Determine access qualifier. + // SampledImage or sampled==1 means the image is used with a sampler (read-only). + // sampled==2 means storage image (check decorations for read/write). + const char *access; + if (type.basetype == SPIRType::SampledImage || type.image.sampled == 1) + { + access = "read_only"; + } + else + { + auto *var = (id != 0) ? maybe_get(id) : nullptr; + if (var) + { + bool non_readable = has_decoration(id, DecorationNonReadable); + bool non_writable = has_decoration(id, DecorationNonWritable); + if (non_readable) + access = "write_only"; + else if (non_writable) + access = "read_only"; + else if (opencl_options.opencl_version >= 200) + access = "read_write"; + else + access = "write_only"; // OCL 1.2: default to write_only when no decoration + } + else + { + access = "write_only"; + } + } + switch (type.image.dim) { case Dim1D: @@ -614,102 +1047,390 @@ string CompilerOpenCL::image_type_glsl(const SPIRType &type, uint32_t id, bool m } } +// Mirrors CompilerMSL::get_physical_type_id_stride so that OpPtrAccessChain +// (used by VariablePointers) does not throw on the OpenCL backend. +uint32_t CompilerOpenCL::get_physical_type_id_stride(TypeID type_id) const +{ + auto &type = get(type_id); + + // PhysicalStorageBuffer pointers are 64-bit (8 bytes). + if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer) + return 8u; + + switch (type.basetype) + { + case SPIRType::Unknown: + case SPIRType::Void: + case SPIRType::AtomicCounter: + case SPIRType::Image: + case SPIRType::SampledImage: + case SPIRType::Sampler: + SPIRV_CROSS_THROW("Querying stride of opaque type."); + + default: + break; + } + + if (type.basetype == SPIRType::Struct) + return (uint32_t)get_declared_struct_size(type); + + // Scalar / vector / matrix: width in bits → bytes, with vec3 padded to vec4. + uint32_t vecsize = type.vecsize; + if (vecsize == 3) + vecsize = 4; + return vecsize * type.columns * (type.width / 8u); +} + std::string CompilerOpenCL::type_to_glsl_constructor(const SPIRType &type) { string ret = CompilerGLSL::type_to_glsl_constructor(type); - printf("type_to_glsl_constructor: %s\n", ret.c_str()); if (!ret.empty()) ret = join("(", ret, ")"); return ret; } -// GCC workaround of lambdas calling protected funcs -std::string CompilerOpenCL::variable_decl(const SPIRType &type, const std::string &name, uint32_t id) +// OpenCL C requires cast syntax for replicated vector/matrix constants: (float4)(val) not float4(val). +// constant_expression is not virtual in GLSL, so we override it here to fix replicated composites. +std::string CompilerOpenCL::constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope, + bool inside_struct_scope) { - return CompilerGLSL::variable_decl(type, name, id); + auto &type = get(c.constant_type); + if (c.replicated && type.op != OpTypeArray) + { + auto sub_expr = to_expression(c.subconstants[0]); + if (type.op == OpTypeMatrix) + { + // OpenCL C has no native matrix type; matrices are represented as their column vector type. + // For a replicated matrix constant, just use the column value directly. + return sub_expr; + } + else + { + // Vector replicate: (float4)(scalar) + return join(type_to_glsl_constructor(type), "(", sub_expr, ")"); + } + } + return CompilerGLSL::constant_expression(c, inside_block_like_struct_scope, inside_struct_scope); } -std::string CompilerOpenCL::entry_point_args(bool append_comma) +// OpenCL C requires cast syntax for vector construction: (float4)(1.0, 2.0, 3.0, 4.0) +// The GLSL base emits: float4(1.0, 2.0, 3.0, 4.0) which is invalid in OpenCL C. +std::string CompilerOpenCL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) { - // Reset flattening maps for this compilation pass - flattened_buffer_vars.clear(); - push_const_member_map.clear(); + string res = CompilerGLSL::constant_expression_vector(c, vector); - std::string ep_args; + auto type = get(c.constant_type); + type.columns = 1; - struct Resource + if (type.vecsize > 1) { - SPIRVariable *var; - SPIRVariable *discrete_descriptor_alias; - string name; - SPIRType::BaseType basetype; - uint32_t index; - uint32_t plane; - uint32_t secondary_index; - }; + // The base class emits: typename(args). OpenCL needs: (typename)(args). + auto type_name = type_to_glsl(type); + if (res.size() > type_name.size() + 1 && res.substr(0, type_name.size()) == type_name && + res[type_name.size()] == '(') + { + res = "(" + type_name + ")(" + res.substr(type_name.size() + 1); + } + } - SmallVector resources; + return res; +} - ir.for_each_typed_id( - [&](uint32_t var_id, SPIRVariable &var) - { - auto &type = get_variable_data_type(var); - /* - if (var.storage == StorageClassPushConstant) - { - for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++) - { - if (!ep_args.empty()) - ep_args += ", "; +// Override GLSLstd450 extension op handling for OpenCL-specific fixes. +void CompilerOpenCL::emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, + uint32_t count) +{ + auto glsl_op = static_cast(op); - auto mbr_name = to_member_name(type, mbr_idx); - const auto &member_type = this->get(type.member_types[mbr_idx]); - ep_args += join(this->type_to_glsl(member_type), " ", mbr_name); - // Record the mapping so emit_instruction can rewrite access chains - push_const_member_map[var_id][mbr_idx] = mbr_name; - } - } - */ - if (var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock)) - { - Bitset flags = ir.get_buffer_block_flags(var); - bool is_readonly = flags.get(DecorationNonWritable); + switch (glsl_op) + { + case GLSLstd450Modf: + { + // OpenCL modf takes a pointer for the integer part: modf(x, &iptr) + register_call_out_argument(args[1]); + forced_temporaries.insert(result_id); + emit_op(result_type, result_id, join("modf(", to_expression(args[0]), ", &", to_expression(args[1]), ")"), + false); + break; + } - auto to_structuredbuffer_subtype_name = [this](const SPIRType &parent_type) -> std::string - { - if (parent_type.basetype == SPIRType::Struct && parent_type.member_types.size() == 1) - { - // Use type of first struct member as a StructuredBuffer will have only one '._m0' field in SPIR-V - const auto &member0_type = this->get(parent_type.member_types.front()); - return this->type_to_glsl(member0_type); - } - else - { - // Otherwise, this StructuredBuffer only has a basic subtype, e.g. StructuredBuffer - return this->type_to_glsl(parent_type); - } - }; - if (!ep_args.empty()) - ep_args += ", "; + case GLSLstd450ModfStruct: + { + // OpenCL modf: result._m0 = modf(x, &result._m1) + auto &type = get(result_type); + emit_uninitialized_temporary_expression(result_type, result_id); + statement(to_expression(result_id), ".", to_member_name(type, 0), " = modf(", to_expression(args[0]), ", &", + to_expression(result_id), ".", to_member_name(type, 1), ");"); + break; + } - ep_args += join("__global ", is_readonly ? "const " : "", to_structuredbuffer_subtype_name(type), "* ", - to_name(var_id)); - // Record so emit_instruction can rewrite OpAccessChain against this var - flattened_buffer_vars.insert(var_id); - } - else if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || - var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) && - !is_hidden_variable(var)) - { - switch (var.basetype) - { - case SPIRType::Struct: - { - break; - } - case SPIRType::Sampler: + // Task #14: Map GLSL half-precision pack/unpack to OpenCL polyfills. + // On the first pass the polyfill may not exist yet; set the flag and force a recompile + // so that emit_resources() will emit the helper functions before they are called. + case GLSLstd450PackHalf2x16: + if (!needs_half_pack_polyfill) + { + needs_half_pack_polyfill = true; + force_recompile(); + } + emit_unary_func_op(result_type, result_id, args[0], "spvPackHalf2x16"); + break; + case GLSLstd450UnpackHalf2x16: + if (!needs_half_unpack_polyfill) + { + needs_half_unpack_polyfill = true; + force_recompile(); + } + emit_unary_func_op(result_type, result_id, args[0], "spvUnpackHalf2x16"); + break; + + default: + CompilerGLSL::emit_glsl_op(result_type, result_id, op, args, count); + break; + } +} + +// Tasks #8: Map type-punning builtins to OpenCL as_TYPE() intrinsics. +// Also fix integral bitcasts: int4 → uint4 must use as_uint4(), not uint4(). +std::string CompilerOpenCL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) +{ + // Same basetype: no-op + if (out_type.basetype == in_type.basetype) + return ""; + + // All bitcasts (float↔int, int↔uint, half↔short, etc.) use as_TYPE() in OpenCL C. + // type_to_glsl gives us the full type name including vector size (e.g. "float4", "uint"). + auto out_name = type_to_glsl(out_type); + return "as_" + out_name; +} + +// Task #7: In OpenCL C, atomic functions take a pointer argument. +// Access chain expressions (access_chain = true) may be C lvalues (e.g. ssbo->u32) → need &. +// But single-member flattened SSBOs emit the raw pointer itself (e.g. _48 is __global uint*) +// which doesn't need & even though it has access_chain=true. +std::string CompilerOpenCL::to_atomic_ptr_expression(uint32_t id) +{ + auto *e = maybe_get(id); + if (e && e->access_chain) + { + // For SSBO access chains, we need a pointer. + // subscripted_deref_exprs marks access chains that are C values (e.g. _48[0]). + // For those, we need & to get a pointer (which simplifies to the base pointer _48). + // For non-subscripted access chains (pointer-typed), no & is needed. + if (subscripted_deref_exprs.count(id)) + return "&(" + to_expression(id) + ")"; + return "&" + to_expression(id); + } + + // Variable used directly as atomic operand (e.g. shared_u32, a workgroup variable). + // In C this is an lvalue, so we need & to get a pointer. + auto *var = maybe_get(id); + if (var && (var->storage == StorageClassWorkgroup || var->storage == StorageClassStorageBuffer || + var->storage == StorageClassUniform)) + { + return "&" + to_expression(id); + } + + return to_expression(id); +} + +// Task #3: In OpenCL C, pointer-to-struct member access uses -> instead of . +// ptr_chain_is_resolved == false means this is the first member access from the base. +std::string CompilerOpenCL::to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, + bool ptr_chain_is_resolved) +{ + if (!ptr_chain_is_resolved && !subscripted_deref_exprs.count(base)) + { + const auto &base_type = expression_type(base); + if (is_pointer(base_type)) + { + StorageClass sc = base_type.storage; + + // Function/Private storage: use -> only for actual function pointer parameters + // (out/inout params represented as __private T* in OpenCL C). + // Regular local variables (OpVariable Function) are emitted as value types, use '.'. + if (sc == StorageClassFunction || sc == StorageClassPrivate) + { + auto *var = maybe_get(base); + if (var && var->parameter != nullptr) + return join("->", to_member_name(type, index)); + } + + // StorageBuffer SSBOs / __global pointers: always use ->. + // Loaded values (OpLoad result) would have struct type, not pointer type, + // so is_pointer() above is false — we only reach here with actual pointers. + // Note: StorageClassWorkgroup is excluded because __local variables are emitted + // as value types in OpenCL C, so member access uses '.'. + if (sc == StorageClassStorageBuffer || sc == StorageClassCrossWorkgroup) + { + return join("->", to_member_name(type, index)); + } + // StorageClassUniform (UBO): emitted by value in OpenCL — use '.' + } + } + return join(".", to_member_name(type, index)); +} + +// Task #4: Emit typedef so structs can be referenced without the 'struct' keyword in OpenCL C. +void CompilerOpenCL::emit_struct(SPIRType &type) +{ + // Check whether the base class will actually emit this struct (it returns early for aliases). + bool will_emit = type.type_alias == TypeID(0) || + has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked); + + CompilerGLSL::emit_struct(type); + + if (will_emit) + { + auto name = to_name(type.self); + statement("typedef struct ", name, " ", name, ";"); + statement(""); + } +} + +// GCC workaround of lambdas calling protected funcs +std::string CompilerOpenCL::variable_decl(const SPIRType &type, const std::string &name, uint32_t id) +{ + return CompilerGLSL::variable_decl(type, name, id); +} + +// OpenCL C does not support function overloading. If two functions share a name but differ in +// signature (different type hashes), the GLSL base class would allow both to keep the same name +// (since GLSL allows overloading). Override to always rename when a name is already taken. +void CompilerOpenCL::add_function_overload(const SPIRFunction &func) +{ + // Let the base class do its normal work first. + CompilerGLSL::add_function_overload(func); + + // After base class runs, check if another function already has our (possibly newly assigned) name. + // function_overloads maps name → set of type hashes. If this name maps to more than one hash, + // the base class already handled the conflict. But if this is the SECOND function with the same + // base name but different hash (GLSL would allow this), we still have a name collision in C. + // Re-check: if more than one unique-hash entry shares our name, force a rename on this function. + auto current_name = to_name(func.self); + auto itr = function_overloads.find(current_name); + if (itr != end(function_overloads) && itr->second.size() > 1) + { + // Two (or more) different signatures share this name. Rename this function. + add_resource_name(func.self); + function_overloads[to_name(func.self)].insert(0); // sentinel + } +} + +// For out/inout function parameters (pointer types in SPIR-V), we emit the function parameter as +// '__private T *param'. At call sites we must pass '&arg' (take address) so the pointer is valid. +std::string CompilerOpenCL::to_func_call_arg(const SPIRFunction::Parameter &callee_param, uint32_t id) +{ + // Check if the callee parameter expects a pointer (out/inout). + auto ¶m_type = expression_type(callee_param.id); + if (is_pointer(param_type) && param_type.storage == StorageClassFunction) + { + // Pass address of the argument variable. + return join("&", to_expression(id)); + } + + // Flattened buffer vars are already pointers (__global T*). + // Don't take their address when passing to functions expecting buffer pointers. + if (flattened_buffer_vars.count(id)) + { + auto &arg_type = expression_type(id); + auto &callee_type = expression_type(callee_param.id); + if (is_pointer(arg_type) && is_pointer(callee_type) && + (callee_type.storage == StorageClassStorageBuffer || callee_type.storage == StorageClassUniform)) + { + // The flattened var is __global T* but the callee expects __global struct_type*. + // Cast to the expected type. + auto callee_type_name = type_to_glsl(callee_type); + return join("(", callee_type_name, ")", to_expression(id)); + } + } + + return CompilerGLSL::to_func_call_arg(callee_param, id); +} + +std::string CompilerOpenCL::entry_point_args(bool append_comma) +{ + // Note: flattened_buffer_vars is already populated by compute_kernel_resources() in emit_resources(). + // Only reset push_const_member_map here. + push_const_member_map.clear(); + + std::string ep_args; + + struct Resource + { + SPIRVariable *var; + SPIRVariable *discrete_descriptor_alias; + string name; + SPIRType::BaseType basetype; + uint32_t index; + uint32_t plane; + uint32_t secondary_index; + }; + + SmallVector resources; + + ir.for_each_typed_id( + [&](uint32_t var_id, SPIRVariable &var) + { + auto &type = get_variable_data_type(var); + // Push constants: emit as struct value parameter. + if (var.storage == StorageClassPushConstant) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join(type_to_glsl(type), " ", to_name(var_id)); + } + else if (var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock)) + { + Bitset flags = ir.get_buffer_block_flags(var); + bool is_readonly = flags.get(DecorationNonWritable); + + auto to_structuredbuffer_subtype_name = [this](const SPIRType &parent_type) -> std::string + { + if (parent_type.basetype == SPIRType::Struct && parent_type.member_types.size() == 1) + { + // Use type of first struct member as a StructuredBuffer will have only one '._m0' field in SPIR-V + const auto &member0_type = this->get(parent_type.member_types.front()); + return this->type_to_glsl(member0_type); + } + else + { + // Otherwise, this StructuredBuffer only has a basic subtype, e.g. StructuredBuffer + return this->type_to_glsl(parent_type); + } + }; + if (!ep_args.empty()) + ep_args += ", "; + + ep_args += join("__global ", is_readonly ? "const " : "", to_structuredbuffer_subtype_name(type), "* ", + to_name(var_id)); + // Record so emit_instruction can rewrite OpAccessChain against this var + flattened_buffer_vars.insert(var_id); + } + else if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || + var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) && + !is_hidden_variable(var)) + { + switch (type.basetype) + { + case SPIRType::Struct: + { + // UBO (Uniform + Block): emit as value parameter + if (var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock)) + { + if (!ep_args.empty()) + ep_args += ", "; + ep_args += join(type_to_glsl(type), " ", to_name(var_id)); + } + break; + } + case SPIRType::Sampler: + if (!ep_args.empty()) + ep_args += ", "; + ep_args += "sampler_t " + to_name(var_id); break; case SPIRType::Image: + case SPIRType::SampledImage: { if (!ep_args.empty()) ep_args += ", "; @@ -748,17 +1469,24 @@ void CompilerOpenCL::emit_function_prototype(SPIRFunction &func, const Bitset &r if (func.self != ir.default_entry_point) add_function_overload(func); - // Entry point: __kernel void name(...) - emit_workgroup_size_attribute(); + bool is_entry_point = (func.self == ir.default_entry_point); + string decl; - decl += "__kernel void "; - if (func.self == ir.default_entry_point) + if (is_entry_point) { + // Emit work group size attribute and __kernel qualifier for entry point + emit_workgroup_size_attribute(); + decl += "__kernel void "; decl += get_inner_entry_point_name(); processing_entry_point = true; } else + { + // Regular helper function + auto &type = get(func.return_type); + decl += type_to_glsl(type) + " "; decl += to_name(func.self); + } decl += "("; if (processing_entry_point) @@ -773,7 +1501,11 @@ void CompilerOpenCL::emit_function_prototype(SPIRFunction &func, const Bitset &r { add_local_variable_name(arg.id); - decl += argument_decl(arg); + // OpenCL C has no in/out/inout qualifiers — skip direction prefix from argument_decl. + auto &arg_type = expression_type(arg.id); + decl += to_qualifiers_glsl(arg.id); + decl += variable_decl(arg_type, to_name(arg.id), arg.id); + if (&arg != &func.arguments.back()) decl += ", "; @@ -783,15 +1515,119 @@ void CompilerOpenCL::emit_function_prototype(SPIRFunction &func, const Bitset &r var->parameter = &arg; } + // For non-entry helper functions: append extra __global T* params for any flattened buffer + // vars that this function (directly or transitively) accesses. This "threads" kernel resources + // down through the call chain since OpenCL C has no global address space for buffer pointers. + if (!is_entry_point) + { + bool first_resource = func.arguments.empty(); + + auto it = func_flattened_args.find(func.self); + if (it != func_flattened_args.end()) + { + for (auto var_id : it->second) + { + auto type_it = flattened_var_type_decl.find(var_id); + if (type_it != flattened_var_type_decl.end()) + { + if (!first_resource) + decl += ", "; + first_resource = false; + decl += type_it->second + to_name(var_id); + } + } + } + + // Also thread workgroup/private global vars as pointer params. + auto wg_it = func_workgroup_args.find(func.self); + if (wg_it != func_workgroup_args.end()) + { + for (auto var_id : wg_it->second) + { + auto type_it = workgroup_var_ptr_type.find(var_id); + if (type_it != workgroup_var_ptr_type.end()) + { + if (!first_resource) + decl += ", "; + first_resource = false; + bool is_scalar = workgroup_scalar_vars.count(var_id) != 0; + string param_name = is_scalar ? (to_name(var_id) + "_ptr") : to_name(var_id); + decl += type_it->second + " " + param_name; + } + } + } + } + decl += ")"; statement(decl); } -void CompilerOpenCL::emit_specialization_constants_and_structs() +void CompilerOpenCL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist) +{ + // First, call the base class to handle combined image samplers and other shadow args. + CompilerGLSL::append_global_func_args(func, index, arglist); + + // Then append flattened kernel buffer vars threaded through helper functions. + auto it = func_flattened_args.find(func.self); + if (it != func_flattened_args.end()) + { + for (auto var_id : it->second) + { + if (flattened_var_type_decl.count(var_id)) + arglist.push_back(to_name(var_id)); + } + } + + // Thread workgroup/private global vars. + auto wg_it = func_workgroup_args.find(func.self); + if (wg_it != func_workgroup_args.end()) + { + for (auto var_id : wg_it->second) + { + if (workgroup_var_ptr_type.count(var_id)) + { + bool is_scalar = workgroup_scalar_vars.count(var_id) != 0; + // Arrays decay to pointer; scalars need address-of. + arglist.push_back(is_scalar ? ("&" + to_name(var_id)) : to_name(var_id)); + } + } + } +} + +void CompilerOpenCL::emit_function_local_declarations(SPIRFunction &func) +{ + // For helper functions that access workgroup/private global scalar variables via pointer params: + // emit #define var_name (*var_name_ptr) so that existing expressions (e.g. "u = 50;") + // transparently dereference the pointer parameter. + auto wg_it = func_workgroup_args.find(func.self); + if (wg_it != func_workgroup_args.end()) + { + for (auto var_id : wg_it->second) + { + if (workgroup_scalar_vars.count(var_id)) + { + auto var_name = to_name(var_id); + statement("#define ", var_name, " (*", var_name, "_ptr)"); + } + } + } +} + +void CompilerOpenCL::emit_function_local_epilogue(SPIRFunction &func) { - SpecializationConstant wg_x, wg_y, wg_z; - ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + auto wg_it = func_workgroup_args.find(func.self); + if (wg_it != func_workgroup_args.end()) + { + for (auto var_id : wg_it->second) + { + if (workgroup_scalar_vars.count(var_id)) + statement("#undef ", to_name(var_id)); + } + } +} +void CompilerOpenCL::emit_specialization_constants_and_structs() +{ bool emitted = false; unordered_set declared_structs; unordered_set aligned_structs; @@ -869,14 +1705,32 @@ void CompilerOpenCL::emit_specialization_constants_and_structs() } emitted = true; } + else + { + // Non-specialization constant arrays need to be declared at file scope + // because OpenCL C does not support arrays as value types (can't inline them). + auto &type = get(c.constant_type); + if (is_array(type)) + { + add_resource_name(c.self); + auto name = to_name(c.self); + statement("constant ", variable_decl(type, name, c.self), " = ", constant_expression(c), ";"); + emitted = true; + } + } } else if (id.get_type() == TypeConstantOp) { + // OpSpecConstantOp results are derived from spec constants via arithmetic ops. + // In OpenCL C, "constant T name = expr;" requires a compile-time constant initializer, + // but expressions like "as_uint(spec_const)" (function calls) and "vec.x" (component + // access on a constant variable) are NOT constant expressions in OpenCL C. + // Emit as a #define macro so the expression is inlined at each use site (evaluated at + // runtime when used in a function body, which is the only valid use location). auto &c = id.get(); - auto &type = get(c.basetype); add_resource_name(c.self); auto name = to_name(c.self); - statement("constant ", variable_decl(type, name), " = ", constant_op_expression(c), ";"); + statement("#define ", name, " (", constant_op_expression(c), ")"); emitted = true; } else if (id.get_type() == TypeType) @@ -913,6 +1767,19 @@ void CompilerOpenCL::emit_specialization_constants_and_structs() // OpUndef can be void for some reason ... if (type.basetype == SPIRType::Void) return; + // Emit a zero-initialized constant so composite uses of this undef can compile. + // OpUndef values are semantically undefined; zero is a safe placeholder. + add_resource_name(undef.self); + auto name = to_name(undef.self); + string zero_expr; + if (type.basetype == SPIRType::Struct) + zero_expr = join("(", type_to_glsl(type), "){ 0 }"); + else if (type.vecsize > 1) + zero_expr = join(type_to_glsl_constructor(type), "(0)"); + else + zero_expr = "0"; + statement("constant ", type_to_glsl(type), " ", name, " = ", zero_expr, ";"); + emitted = true; } } @@ -920,11 +1787,65 @@ void CompilerOpenCL::emit_specialization_constants_and_structs() statement(""); } +bool CompilerOpenCL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, StorageClass, StorageClass) +{ + // OpenCL C does not support array assignment (array_is_value_type = false). + // Emit element-by-element copy using a for loop. + string lhs; + if (expr) + lhs = expr; + else + lhs = to_expression(lhs_id); + + auto rhs_expr = to_expression(rhs_id); + auto &type = expression_type(rhs_id); + + // Get the array size + if (!is_array(type) || type.array.empty()) + { + // Not actually an array; fall back to simple assignment + statement(lhs, " = ", rhs_expr, ";"); + return true; + } + + uint32_t array_size = type.array.back(); + if (!type.array_size_literal.back()) + { + // Spec constant sized array — use simple assignment and hope for the best + statement(lhs, " = ", rhs_expr, ";"); + return true; + } + + // Emit element-by-element copy + for (uint32_t i = 0; i < array_size; i++) + statement(lhs, "[", i, "] = ", rhs_expr, "[", i, "];"); + + return true; +} + void CompilerOpenCL::emit_instruction(const Instruction &instruction) { auto ops = stream(instruction); auto opcode = static_cast(instruction.op); + // Task #5: Handle barrier/fence ops with OpenCL C equivalents. + // Returns the CLK_*_MEM_FENCE flags string for the given memory semantics. + auto opencl_mem_fence_flags = [](uint32_t semantics) -> string + { + // We only care about workgroup and uniform/image memory. + bool local = (semantics & MemorySemanticsWorkgroupMemoryMask) != 0; + bool global = (semantics & (MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask | + MemorySemanticsCrossWorkgroupMemoryMask)) != 0; + if (local && global) + return "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE"; + else if (local) + return "CLK_LOCAL_MEM_FENCE"; + else if (global) + return "CLK_GLOBAL_MEM_FENCE"; + else + return "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE"; // default: fence everything + }; + // Map buffer atomics to OpenCL C names (atomic_add, atomic_sub, etc.) auto opencl_atomic = [this, ops](const char *opencl_op) { @@ -933,8 +1854,241 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], opencl_op); }; + // Helper: cast integer operand to the target signedness using as_TYPE() for OpenCL C. + // OpenCL C forbids implicit conversion between integer vector types of different signedness. + auto cast_int_for_icmp = [this](uint32_t id, bool want_signed) -> string + { + auto &t = expression_type(id); + if (type_is_integral(t)) + { + bool is_signed = t.basetype == SPIRType::SByte || t.basetype == SPIRType::Short || + t.basetype == SPIRType::Int || t.basetype == SPIRType::Int64; + if (is_signed != want_signed) + { + auto target_type = t; + target_type.basetype = want_signed ? to_signed_basetype(t.width) : to_unsigned_basetype(t.width); + return join("as_", type_to_glsl(target_type), "(", to_expression(id), ")"); + } + } + return to_enclosed_expression(id); + }; + + // Helper: returns true if 'id' is a function parameter that carries a pointer type. + // In GLSL, out/inout params are emitted as 'out T', but in OpenCL C they are '__private T *'. + // Loads and stores through such params need explicit pointer dereference. + auto is_func_ptr_param = [&](uint32_t id) -> bool + { + auto *var = maybe_get(id); + return var && var->parameter != nullptr && is_pointer(expression_type(id)) && + expression_type(id).storage == StorageClassFunction; + }; + switch (opcode) { + // OpLoad from an out/inout function parameter pointer: dereference. + case OpLoad: + { + uint32_t ptr = ops[2]; + if (is_func_ptr_param(ptr)) + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + emit_op(result_type, result_id, join("(*", to_name(ptr), ")"), true); + inherit_expression_dependencies(result_id, ptr); + break; + } + // Loading the whole struct from a flattened buffer pointer (or OpCopyObject of one) + // needs dereference. Only applies to direct loads from the variable, not access chains. + if (flattened_buffer_vars.count(ptr) || + (maybe_get(ptr) && !get(ptr).access_chain && + maybe_get_backing_variable(ptr) && flattened_buffer_vars.count(maybe_get_backing_variable(ptr)->self))) + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + emit_op(result_type, result_id, join("(*", to_expression(ptr), ")"), true); + inherit_expression_dependencies(result_id, ptr); + break; + } + CompilerGLSL::emit_instruction(instruction); + break; + } + + // OpStore to an out/inout function parameter pointer or flattened buffer: dereference. + case OpStore: + { + uint32_t ptr = ops[0]; + if (is_func_ptr_param(ptr)) + { + statement("*", to_name(ptr), " = ", to_expression(ops[1]), ";"); + register_write(ptr); + break; + } + // Flattened buffer vars are __global T* pointers; storing to them needs dereference. + if (flattened_buffer_vars.count(ptr)) + { + statement("*", to_name(ptr), " = ", to_expression(ops[1]), ";"); + register_write(ptr); + break; + } + CompilerGLSL::emit_instruction(instruction); + break; + } + + // OpenCL C uses fmod() instead of GLSL's mod(). + case OpFMod: + emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], "fmod"); + break; + + // SPV_KHR_fma: fused multiply-add — OpenCL C has a native fma() builtin. + case OpFmaKHR: + emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], "fma"); + break; + + // SPV_KHR_expect_assume: no equivalent in OpenCL C. + // OpAssumeTrueKHR: hint that a condition is always true — emit nothing. + case OpAssumeTrueKHR: + break; + // OpExpectKHR: hint that value has an expected value — emit the value unchanged. + case OpExpectKHR: + emit_op(ops[0], ops[1], to_expression(ops[2]), should_forward(ops[2])); + inherit_expression_dependencies(ops[1], ops[2]); + break; + + // Type conversion ops: use OpenCL C convert_TYPE() for numeric value conversions. + // The GLSL base class emits (TYPE)(expr) which in OpenCL C is a bitcast for vector types, + // not a value conversion. convert_TYPE() is correct for both scalar and vector operands. + case OpConvertUToF: + case OpConvertSToF: + case OpConvertFToU: + case OpConvertFToS: + case OpFConvert: + case OpUConvert: + case OpSConvert: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + auto convert_func = join("convert_", type_to_glsl(get(result_type))); + emit_unary_func_op(result_type, result_id, ops[2], convert_func.c_str()); + break; + } + + // OpOuterProduct: no OpenCL builtin and no native matrix type. + // The result matrix type is represented as its column vector type in OpenCL C. + // Emit only the first column (col_vec * row_vec.x). + case OpOuterProduct: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t col_vec = ops[2]; // column vector + uint32_t row_vec = ops[3]; // row vector + auto &row_type = expression_type(row_vec); + + // First column of the outer product: col_vec * row_vec.x + string first_row_elem = + row_type.vecsize > 1 ? join(to_expression(row_vec), ".", index_to_swizzle(0)) : to_expression(row_vec); + string expr = join(to_expression(col_vec), " * ", first_row_elem); + emit_op(result_type, result_id, expr, should_forward(col_vec) && should_forward(row_vec)); + inherit_expression_dependencies(result_id, col_vec); + inherit_expression_dependencies(result_id, row_vec); + break; + } + + // Task #9: Map GLSL vector comparison functions to OpenCL C operators. + // GLSL: lessThan(a, b) → OpenCL: (a < b) + // For integer ops, add explicit as_TYPE() casts so operands match the comparison signedness. + // OpenCL C does not allow implicit conversion between signed and unsigned vector types. +#define SPIRV_OPENCL_ICMP_OP(signed_op, unsigned_op, float_op_1, float_op_2, op_str) \ + case signed_op: \ + { \ + auto left = cast_int_for_icmp(ops[2], true); \ + auto right = cast_int_for_icmp(ops[3], true); \ + bool fwd = should_forward(ops[2]) && should_forward(ops[3]); \ + emit_op(ops[0], ops[1], join(left, " " op_str " ", right), fwd); \ + inherit_expression_dependencies(ops[1], ops[2]); \ + inherit_expression_dependencies(ops[1], ops[3]); \ + break; \ + } \ + case unsigned_op: \ + { \ + auto left = cast_int_for_icmp(ops[2], false); \ + auto right = cast_int_for_icmp(ops[3], false); \ + bool fwd = should_forward(ops[2]) && should_forward(ops[3]); \ + emit_op(ops[0], ops[1], join(left, " " op_str " ", right), fwd); \ + inherit_expression_dependencies(ops[1], ops[2]); \ + inherit_expression_dependencies(ops[1], ops[3]); \ + break; \ + } \ + case float_op_1: \ + case float_op_2: \ + emit_binary_op(ops[0], ops[1], ops[2], ops[3], op_str); \ + break; + + SPIRV_OPENCL_ICMP_OP(OpSLessThan, OpULessThan, OpFOrdLessThan, OpFUnordLessThan, "<") + SPIRV_OPENCL_ICMP_OP(OpSLessThanEqual, OpULessThanEqual, OpFOrdLessThanEqual, OpFUnordLessThanEqual, "<=") + SPIRV_OPENCL_ICMP_OP(OpSGreaterThan, OpUGreaterThan, OpFOrdGreaterThan, OpFUnordGreaterThan, ">") + SPIRV_OPENCL_ICMP_OP(OpSGreaterThanEqual, OpUGreaterThanEqual, OpFOrdGreaterThanEqual, OpFUnordGreaterThanEqual, + ">=") +#undef SPIRV_OPENCL_ICMP_OP + + case OpIEqual: + case OpFOrdEqual: + case OpFUnordEqual: + case OpLogicalEqual: + emit_binary_op(ops[0], ops[1], ops[2], ops[3], "=="); + break; + case OpINotEqual: + case OpFOrdNotEqual: + case OpFUnordNotEqual: + case OpLogicalNotEqual: + emit_binary_op(ops[0], ops[1], ops[2], ops[3], "!="); + break; + + case OpControlBarrier: + { + // ops[0]=execution_scope, ops[1]=memory_scope, ops[2]=semantics + uint32_t semantics = evaluate_constant_u32(ops[2]); + semantics = mask_relevant_memory_semantics(semantics); + + flush_control_dependent_expressions(current_emitting_block->self); + flush_all_active_variables(); + + // Emit memory fence before the execution barrier if needed + string fence_flags = opencl_mem_fence_flags(semantics); + if (semantics != 0) + { + if (opencl_options.supports_opencl_version(2, 0)) + statement("work_group_barrier(", fence_flags, ");"); + else + statement("barrier(", fence_flags, ");"); + } + else + { + // Execution barrier with default local fence + if (opencl_options.supports_opencl_version(2, 0)) + statement("work_group_barrier(CLK_LOCAL_MEM_FENCE);"); + else + statement("barrier(CLK_LOCAL_MEM_FENCE);"); + } + break; + } + + case OpMemoryBarrier: + { + // ops[0]=memory_scope, ops[1]=semantics + uint32_t semantics = evaluate_constant_u32(ops[1]); + semantics = mask_relevant_memory_semantics(semantics); + + flush_control_dependent_expressions(current_emitting_block->self); + flush_all_active_variables(); + + if (semantics != 0) + { + string fence_flags = opencl_mem_fence_flags(semantics); + statement("mem_fence(", fence_flags, ");"); + } + break; + } + case OpAtomicExchange: if (check_atomic_image(ops[2])) SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); @@ -943,8 +2097,13 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) case OpAtomicCompareExchange: if (check_atomic_image(ops[2])) SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); - // OpenCL atomic_cmpxchg(ptr, expected, desired) - emit_atomic_func_op(ops[0], ops[1], ops[2], ops[7], ops[6], "atomic_cmpxchg"); + // OpenCL atomic_cmpxchg(&ptr, expected, desired) + forced_temporaries.insert(ops[1]); + emit_op(ops[0], ops[1], + join("atomic_cmpxchg(", to_atomic_ptr_expression(ops[2]), ", ", to_unpacked_expression(ops[7]), ", ", + to_unpacked_expression(ops[6]), ")"), + false); + flush_all_atomic_capable_variables(); break; case OpAtomicIAdd: case OpAtomicFAddEXT: @@ -1007,7 +2166,7 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) bool unsigned_type = (type.basetype == SPIRType::UInt); const char *inc = (opcode == OpAtomicIIncrement && unsigned_type) ? "1u" : (opcode == OpAtomicIIncrement) ? "1" : - unsigned_type ? "uint(-1)" : + unsigned_type ? "(uint)(-1)" : "-1"; emit_op(ops[0], ops[1], join("atomic_add(", to_atomic_ptr_expression(ops[2]), ", ", inc, ")"), false); flush_all_atomic_capable_variables(); @@ -1021,23 +2180,91 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) uint32_t base_id = ops[2]; uint32_t length = instruction.length; - // SSBO flattened to __global T*: rewrite [base, member_0, element_idx] → base[element_idx] - if (flattened_buffer_vars.count(base_id) && length >= 5) + if (flattened_buffer_vars.count(base_id)) { - // ops[3] = struct member index (always 0 for single-member SSBO) — skip - // ops[4] = element index within the runtime array - auto expr = join(to_name(base_id), "[", to_expression(ops[4]), "]"); - auto &e = set(result_id, std::move(expr), result_type, true); - auto *backing_var = maybe_get_backing_variable(base_id); - e.loaded_from = backing_var ? backing_var->self : ID(base_id); - e.access_chain = true; - forwarded_temporaries.insert(result_id); - suppressed_usage_tracking.insert(result_id); - for (uint32_t i = 2; i < length; i++) - inherit_expression_dependencies(result_id, ops[i]); - if (get(result_id).expression_dependencies.empty()) - forwarded_temporaries.erase(result_id); - break; + // Handle SSBO access chains for buffer vars. + // Get the original SPIR-V struct type to determine single vs multi-member. + auto *base_var = maybe_get(base_id); + const SPIRType *struct_type = base_var ? &get_variable_data_type(*base_var) : nullptr; + bool is_single_member = struct_type && struct_type->member_types.size() == 1; + + string expr; + bool handled = false; + + bool is_subscript_deref = false; // result is a C value (subscripted), not a pointer + + if (length >= 5 && is_single_member) + { + // Single-member SSBO flattened to __global T*: ptr[element_idx][.member]* + // ops[3] = struct member index (always 0, skip) + // ops[4] = element index within the runtime array + // ops[5+] = optional sub-member indices + expr = join(to_name(base_id), "[", to_expression(ops[4]), "]"); + is_subscript_deref = true; + // Walk additional sub-member indices using type info. + if (length >= 6 && struct_type) + { + const SPIRType *cur_type = &get(struct_type->member_types[0]); + for (uint32_t i = 5; i < length; i++) + { + if (cur_type->basetype == SPIRType::Struct) + { + uint32_t mbr_idx = get(ops[i]).scalar(); + expr += join(".", to_member_name(*cur_type, mbr_idx)); + cur_type = &get(cur_type->member_types[mbr_idx]); + } + else + { + // Array or other type - fall back to index notation + expr += join("[", to_expression(ops[i]), "]"); + } + } + } + handled = true; + } + else if (length == 5 && !is_single_member && struct_type) + { + // Multi-member SSBO: ptr->member_name[element_idx] + // ops[3] = member index, ops[4] = array element index + uint32_t mbr_idx = get(ops[3]).scalar(); + auto mbr_name = to_member_name(*struct_type, mbr_idx); + expr = join(to_name(base_id), "->", mbr_name, "[", to_expression(ops[4]), "]"); + is_subscript_deref = true; + handled = true; + } + else if (length == 4 && is_single_member) + { + // Single-member SSBO flattened to T*: accessing the one member gives element 0. + expr = join(to_name(base_id), "[0]"); + is_subscript_deref = true; + handled = true; + } + else if (length == 4 && !is_single_member && struct_type) + { + // Multi-member SSBO: ptr->member_name (lvalue, not address-of) + uint32_t mbr_idx = get(ops[3]).scalar(); + auto mbr_name = to_member_name(*struct_type, mbr_idx); + expr = join(to_name(base_id), "->", mbr_name); + is_subscript_deref = true; // result is a struct value (accessed through ->), use . for children + handled = true; + } + + if (handled) + { + auto &e = set(result_id, std::move(expr), result_type, true); + auto *backing_var = maybe_get_backing_variable(base_id); + e.loaded_from = backing_var ? backing_var->self : ID(base_id); + e.access_chain = true; + if (is_subscript_deref) + subscripted_deref_exprs.insert(result_id); + forwarded_temporaries.insert(result_id); + suppressed_usage_tracking.insert(result_id); + for (uint32_t i = 2; i < length; i++) + inherit_expression_dependencies(result_id, ops[i]); + if (get(result_id).expression_dependencies.empty()) + forwarded_temporaries.erase(result_id); + break; + } } // Push constant expanded to scalar params: rewrite [p_var, member_idx] → scalar param name @@ -1055,8 +2282,377 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) } } + // If the base expression is already a subscripted/dereferenced C value (e.g. ptr[idx]), + // the result of further member access is also a C value. Propagate the tracking so + // to_member_reference continues to use '.' instead of '->'. + bool base_is_deref = subscripted_deref_exprs.count(base_id) != 0; + // Fall through to base class for all other access chains CompilerGLSL::emit_instruction(instruction); + + if (base_is_deref) + subscripted_deref_exprs.insert(result_id); + break; + } + + case OpSelect: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t condition = ops[2]; + uint32_t true_val = ops[3]; + uint32_t false_val = ops[4]; + auto &cond_type = expression_type(condition); + auto &res_type = get(result_type); + + if (res_type.pointer) + { + // If result is a pointer, the pointed-to values may be written through it. + register_write(true_val); + register_write(false_val); + + // Pointer select in OpenCL C: need special handling because + // flattened buffer vars are already pointers (no & needed), + // Input builtins are function calls (can't take &), and + // null pointer constants need to be emitted as NULL. + auto make_ptr_expr = [&](uint32_t val) -> string + { + // Null pointer constant + if (ir.ids[val].get_type() == TypeConstant) + return "NULL"; + // Flattened buffer var — already a pointer value + if (flattened_buffer_vars.count(val)) + return to_enclosed_expression(val); + // Input builtin variable — materialize as local var and take address + auto *var = maybe_get(val); + if (var && var->storage == StorageClassInput && has_decoration(val, DecorationBuiltIn)) + { + if (processing_entry_point) + { + // Entry point: materialize the builtin as a local variable. + auto builtin = BuiltIn(get_decoration(val, DecorationBuiltIn)); + auto key = static_cast(builtin); + if (entry_point_materialized_builtins.emplace(key, val).second) + force_recompile(); + return "&" + to_name(val); + } + else + { + // Non-entry function: builtins are threaded via #define trick, + // so to_name(val) is a valid lvalue via the macro. + return "&" + to_name(val); + } + } + // Default: use base class pointer expression + return to_enclosed_pointer_expression(val); + }; + + auto expr = join(to_enclosed_expression(condition), " ? ", make_ptr_expr(true_val), " : ", + make_ptr_expr(false_val)); + emit_op(result_type, result_id, expr, + should_forward(condition) && should_forward(true_val) && should_forward(false_val)); + inherit_expression_dependencies(result_id, condition); + inherit_expression_dependencies(result_id, true_val); + inherit_expression_dependencies(result_id, false_val); + } + else if (cond_type.vecsize > 1 && cond_type.basetype == SPIRType::Boolean && res_type.vecsize > 1) + { + // In OpenCL C, vector ternary and bool-to-int casts don't work like GLSL. + // Use OpenCL's select(false_val, true_val, cond) instead. + emit_trinary_func_op(result_type, result_id, false_val, true_val, condition, "select"); + } + else + { + CompilerGLSL::emit_instruction(instruction); + } + break; + } + + case OpCompositeConstructReplicateEXT: + { + // GLSL base uses type(value) for vector splat, but OpenCL C needs (type)(value). + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + auto &type = get(result_type); + if (type.op == OpTypeMatrix) + { + // OpenCL C has no native matrix type; matrices are represented as their column vector type. + // Just use the sub-value directly (representing the first/only column). + emit_op(result_type, result_id, to_expression(ops[2]), should_forward(ops[2])); + inherit_expression_dependencies(result_id, ops[2]); + } + else if (type.op != OpTypeArray && type.vecsize > 1) + { + // Vector replicate: (float4)(scalar_value) + auto rhs = join(type_to_glsl_constructor(type), "(", to_expression(ops[2]), ")"); + emit_op(result_type, result_id, rhs, true); + inherit_expression_dependencies(result_id, ops[2]); + } + else + { + // Array replicate: delegate to base + CompilerGLSL::emit_instruction(instruction); + } + break; + } + + // Map GLSL imulExtended/umulExtended to OpenCL C mul_hi + multiply. + case OpUMulExtended: + case OpSMulExtended: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + auto &type = get(result_type); + emit_uninitialized_temporary_expression(result_type, result_id); + // _m0 = low bits (a * b), _m1 = high bits (mul_hi(a, b)) + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", to_expression(op0), " * ", + to_expression(op1), ";"); + statement(to_expression(result_id), ".", to_member_name(type, 1), " = mul_hi(", to_expression(op0), ", ", + to_expression(op1), ");"); + break; + } + + case OpQuantizeToF16: + { + // GLSL emits unpackHalf2x16/packHalf2x16 which aren't OpenCL builtins. + // Use our polyfill functions instead. + if (!needs_half_pack_polyfill || !needs_half_unpack_polyfill) + { + needs_half_pack_polyfill = true; + needs_half_unpack_polyfill = true; + force_recompile(); + } + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t arg = ops[2]; + string op; + auto &type = get(result_type); + switch (type.vecsize) + { + case 1: + op = join("spvUnpackHalf2x16(spvPackHalf2x16((float2)(", to_expression(arg), ", 0.0f))).x"); + break; + case 2: + op = join("spvUnpackHalf2x16(spvPackHalf2x16(", to_expression(arg), "))"); + break; + case 3: + { + auto op0 = join("spvUnpackHalf2x16(spvPackHalf2x16(", to_expression(arg), ".xy))"); + auto op1 = join("spvUnpackHalf2x16(spvPackHalf2x16((float2)(", to_expression(arg), ".z, 0.0f))).x"); + op = join("(float3)(", op0, ", ", op1, ")"); + break; + } + case 4: + { + auto op0 = join("spvUnpackHalf2x16(spvPackHalf2x16(", to_expression(arg), ".xy))"); + auto op1 = join("spvUnpackHalf2x16(spvPackHalf2x16(", to_expression(arg), ".zw))"); + op = join("(float4)(", op0, ", ", op1, ")"); + break; + } + default: + SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); + } + emit_op(result_type, id, op, should_forward(arg)); + inherit_expression_dependencies(id, arg); + break; + } + + // Map OpImageSample* (texture sampling) to OpenCL read_image* with sampler. + case OpImageSampleExplicitLod: + case OpImageSampleImplicitLod: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t combined_id = ops[2]; + uint32_t coord_id = ops[3]; + + if (!needs_default_sampler) + { + needs_default_sampler = true; + force_recompile(); + } + + auto &result_spirtype = get(result_type); + const char *read_func; + switch (result_spirtype.basetype) + { + case SPIRType::UInt: + read_func = "read_imageui"; + break; + case SPIRType::Int: + read_func = "read_imagei"; + break; + default: + read_func = "read_imagef"; + break; + } + + // For combined image+sampler, get the underlying image expression. + auto img_expr = to_expression(combined_id); + + // Sampler-based read_image* takes float coordinates. + auto &coord_type = expression_type(coord_id); + string coord_expr; + if (coord_type.basetype == SPIRType::Float) + coord_expr = to_expression(coord_id); + else + coord_expr = join("convert_float", coord_type.vecsize > 1 ? to_string(coord_type.vecsize) : "", "(", + to_expression(coord_id), ")"); + + auto raw_expr = join(read_func, "(", img_expr, ", spvDefaultSampler, ", coord_expr, ")"); + auto swizzled = remap_swizzle(result_spirtype, 4, raw_expr); + + bool forward = should_forward(combined_id) && should_forward(coord_id); + emit_op(result_type, result_id, swizzled, forward); + inherit_expression_dependencies(result_id, combined_id); + inherit_expression_dependencies(result_id, coord_id); + break; + } + + // Task #10: Map image read/write/query ops to OpenCL C equivalents. + case OpImageRead: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t image_id = ops[2]; + uint32_t coord_id = ops[3]; + + auto &img_type = expression_type(image_id); + // SubpassData is not supported; fall through to base class. + if (img_type.image.dim == DimSubpassData) + { + CompilerGLSL::emit_instruction(instruction); + break; + } + + auto &result_spirtype = get(result_type); + const char *read_func; + switch (result_spirtype.basetype) + { + case SPIRType::UInt: + read_func = "read_imageui"; + break; + case SPIRType::Int: + read_func = "read_imagei"; + break; + default: + read_func = "read_imagef"; + break; + } + + // Convert coordinate to int. + auto coord_type = expression_type(coord_id); + coord_type.basetype = SPIRType::Int; + auto coord_expr = bitcast_expression(coord_type, expression_type(coord_id).basetype, to_expression(coord_id)); + + // OpenCL read functions always return a vec4; swizzle down to the required vecsize. + auto raw_expr = join(read_func, "(", to_expression(image_id), ", ", coord_expr, ")"); + // Build a temporary vec4 type for the result of the read function. + SPIRType vec4_type = result_spirtype; + vec4_type.vecsize = 4; + auto swizzled = remap_swizzle(result_spirtype, 4, raw_expr); + + bool forward = should_forward(image_id) && should_forward(coord_id); + emit_op(result_type, result_id, swizzled, forward); + inherit_expression_dependencies(result_id, image_id); + inherit_expression_dependencies(result_id, coord_id); + break; + } + + case OpImageWrite: + { + uint32_t image_id = ops[0]; + uint32_t coord_id = ops[1]; + uint32_t texel_id = ops[2]; + + // Unset NonWritable so the variable can be written (mirroring GLSL backend). + auto *image_var = maybe_get_backing_variable(image_id); + if (image_var) + unset_decoration(image_var->self, DecorationNonWritable); + + auto &value_type = expression_type(texel_id); + const char *write_func; + switch (value_type.basetype) + { + case SPIRType::UInt: + write_func = "write_imageui"; + break; + case SPIRType::Int: + write_func = "write_imagei"; + break; + default: + write_func = "write_imagef"; + break; + } + + // Convert coordinate to int. + auto coord_type = expression_type(coord_id); + coord_type.basetype = SPIRType::Int; + auto coord_expr = bitcast_expression(coord_type, expression_type(coord_id).basetype, to_expression(coord_id)); + + // OpenCL write functions expect a vec4 texel; expand if necessary. + // Use (vec4_type)(expr) C-style cast which is valid for scalar-to-vector broadcast. + SPIRType vec4_type = value_type; + vec4_type.vecsize = 4; + string texel_raw = to_expression(texel_id); + string texel_expr; + if (value_type.vecsize == 4) + texel_expr = texel_raw; + else + texel_expr = join("(", type_to_glsl(vec4_type), ")(", texel_raw, ")"); + + statement(write_func, "(", to_expression(image_id), ", ", coord_expr, ", ", texel_expr, ");"); + + if (image_var && variable_storage_is_aliased(*image_var)) + flush_all_aliased_variables(); + break; + } + + case OpImageQuerySize: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t image_id = ops[2]; + + auto &img_type = expression_type(image_id); + if (img_type.basetype != SPIRType::Image) + { + CompilerGLSL::emit_instruction(instruction); + break; + } + + auto img_expr = to_expression(image_id); + string size_expr; + auto dim = img_type.image.dim; + bool arrayed = img_type.image.arrayed; + + if (dim == Dim1D || dim == DimBuffer) + { + size_expr = join("get_image_width(", img_expr, ")"); + } + else if (dim == Dim2D || dim == DimCube) + { + if (arrayed) + size_expr = join("(int3)(get_image_width(", img_expr, "), get_image_height(", img_expr, + "), get_image_array_size(", img_expr, "))"); + else + size_expr = join("(int2)(get_image_width(", img_expr, "), get_image_height(", img_expr, "))"); + } + else if (dim == Dim3D) + { + size_expr = join("(int3)(get_image_width(", img_expr, "), get_image_height(", img_expr, + "), get_image_depth(", img_expr, "))"); + } + else + { + CompilerGLSL::emit_instruction(instruction); + break; + } + + emit_op(result_type, result_id, size_expr, true); + inherit_expression_dependencies(result_id, image_id); break; } diff --git a/spirv_opencl.hpp b/spirv_opencl.hpp index da64673d8..90a4291e0 100644 --- a/spirv_opencl.hpp +++ b/spirv_opencl.hpp @@ -46,10 +46,16 @@ class CompilerOpenCL : public CompilerGLSL bool enable_fp64 = false; // Enable cl_khr_int64_extended_atomics extension bool enable_64bit_atomics = false; + // Enable cl_khr_subgroups extension + bool enable_subgroups = false; + // Enable cl_khr_subgroup_shuffle extension + bool enable_shuffle = false; void set_opencl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) { opencl_version = make_opencl_version(major, minor, patch); + if (opencl_version >= 200 && opencl_version < 300) + enable_subgroups = true; } bool supports_opencl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) const @@ -98,12 +104,30 @@ class CompilerOpenCL : public CompilerGLSL std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override; void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; void emit_instruction(const Instruction &instruction) override; + std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, + bool ptr_chain_is_resolved) override; + std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; + void add_function_overload(const SPIRFunction &func) override; + void emit_struct(SPIRType &type) override; + std::string type_to_glsl_constructor(const SPIRType &type) override; + bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, StorageClass lhs_storage, + StorageClass rhs_storage) override; + std::string constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope = false, + bool inside_struct_scope = false) override; + std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector) override; + std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; + std::string to_atomic_ptr_expression(uint32_t id) override; + void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, + uint32_t count) override; virtual bool builtin_translates_to_nonarray(BuiltIn builtin) const override; std::string get_variable_address_space(const SPIRVariable &argument); std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false); const char *to_restrict(uint32_t id, bool space); + uint32_t get_physical_type_id_stride(TypeID type_id) const override; void replace_illegal_names() override; + void emit_function_local_declarations(SPIRFunction &func) override; + void emit_function_local_epilogue(SPIRFunction &func) override; Options opencl_options; @@ -114,6 +138,37 @@ class CompilerOpenCL : public CompilerGLSL std::unordered_set constant_macro_ids; + // Expression IDs that were produced by subscripting a flattened SSBO pointer (e.g. ptr[idx]). + // These are C values (not pointers), so subsequent member accesses must use '.' not '->'. + std::unordered_set subscripted_deref_exprs; + + // Set when packHalf2x16/unpackHalf2x16 polyfill helpers are needed. + bool needs_half_pack_polyfill = false; + bool needs_half_unpack_polyfill = false; + // Set when a default sampler is needed for combined image+sampler usage. + bool needs_default_sampler = false; + + // For each non-entry function, the ordered list of flattened buffer var IDs to thread as extra params. + std::unordered_map> func_flattened_args; + // Map from flattened buffer var ID to its OpenCL type declaration prefix ("__global T*" etc.) + std::unordered_map flattened_var_type_decl; + + // For each non-entry function, workgroup/private global vars accessed and needing pointer threading. + std::unordered_map> func_workgroup_args; + // Map from workgroup/private var ID to its pointer type declaration prefix + std::unordered_map workgroup_var_ptr_type; + // Set of scalar (non-array) workgroup/private vars that need #define dereference inside callees + std::unordered_set workgroup_scalar_vars; + + // Input builtin variables threaded to non-entry functions (BuiltIn enum → variable ID) + std::unordered_map threaded_input_builtins; + // Input builtin variables materialized as local vars in the entry point (BuiltIn enum → variable ID) + std::unordered_map entry_point_materialized_builtins; + // Guard flag to avoid circular reference during builtin materialization emission + bool emitting_builtin_materialization = false; + + void compute_kernel_resources(); + void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist) override; void emit_workgroup_size_attribute(); std::string entry_point_args(bool append_comma); diff --git a/test_shaders.py b/test_shaders.py index b3f87fc7f..dbc38ba5c 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -613,6 +613,12 @@ def path_to_opencl_standard_cli(shader): def validate_shader_opencl(shader, opt, paths): shader = reference_path(shader[0], shader[1], opt) extensions = [] + if '.double.' in shader: + extensions.append('cl_khr_fp64') + if '.subgroup.' in shader: + extensions.append('cl_khr_subgroups') + if '.shuffle.' in shader: + extensions.append('cl_khr_subgroup_shuffle') global ignore_clang try: @@ -675,8 +681,17 @@ def cross_compile_opencl(shader, spirv, opt, iterations, paths): opencl_args = [spirv_cross_path, '--output', opencl_path, spirv_path, '--opencl', '--iterations', str(iterations)] opencl_args.append('--opencl-version') opencl_args.append(path_to_opencl_standard_cli(shader)) - - subprocess.check_call(opencl_args) + if '.double.' in shader: + opencl_args.append('--opencl-fp64') + if '.subgroup.' in shader: + opencl_args.append('--opencl-subgroups') + if '.shuffle.' in shader: + opencl_args.append('--opencl-shuffle') + + if shader_is_invalid_spirv(shader): + subprocess.run(opencl_args) + else: + subprocess.check_call(opencl_args) if not shader_is_invalid_spirv(opencl_path): subprocess.check_call([paths.spirv_val, '--allow-localsizeid', '--scalar-block-layout', '--target-env', spirv_env, spirv_path]) diff --git a/test_shaders.sh b/test_shaders.sh index 54bf700ca..a054710ed 100755 --- a/test_shaders.sh +++ b/test_shaders.sh @@ -21,6 +21,8 @@ echo "Using SPIRV-Cross in: \"$SPIRV_CROSS_PATH\"." ./test_shaders.py shaders-msl ${OPTS} --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 ./test_shaders.py shaders-msl ${OPTS} --msl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 ./test_shaders.py shaders-msl-no-opt ${OPTS} --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-opencl ${OPTS} --opencl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-opencl ${OPTS} --opencl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 ./test_shaders.py shaders-hlsl ${OPTS} --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 ./test_shaders.py shaders-hlsl ${OPTS} --hlsl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 ./test_shaders.py shaders-hlsl-no-opt ${OPTS} --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 From 7308550f9e29aa9fe1a2230de36463f96e8420b3 Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Thu, 12 Mar 2026 18:19:39 -0700 Subject: [PATCH 03/16] Adding shaders-opencl opt references --- .../asm/comp/atomic-decrement.asm.comp | 17 ++++ .../asm/comp/atomic-increment.asm.comp | 17 ++++ .../asm/comp/bitcast_iadd.asm.comp | 32 ++++++++ .../asm/comp/bitcast_icmp.asm.comp | 32 ++++++++ .../asm/comp/bitcast_sar.asm.comp | 34 ++++++++ .../asm/comp/bitcast_sdiv.asm.comp | 34 ++++++++ .../asm/comp/bitcast_slr.asm.comp | 34 ++++++++ .../asm/comp/block-name-alias-global.asm.comp | 48 ++++++++++++ .../comp/buffer-write-relative-addr.asm.comp | 21 +++++ .../asm/comp/buffer-write.asm.comp | 16 ++++ .../comp/copy-object-ssbo-to-ssbo.asm.comp | 24 ++++++ .../asm/comp/copy-object-ubo-to-ssbo.asm.comp | 24 ++++++ .../asm/comp/duplicate-spec-id.asm.comp | 26 +++++++ .../asm/comp/fma.spv16.asm.comp | 23 ++++++ .../comp/global-parameter-name-alias.asm.comp | 8 ++ ...e-load-store-short-vector.invalid.asm.comp | 18 +++++ ...p-spec-constant-op-vector-related.asm.comp | 77 +++++++++++++++++++ .../shaders-opencl/asm/comp/quantize.asm.comp | 35 +++++++++ .../asm/comp/relaxed-block-layout.asm.comp | 23 ++++++ .../comp/replicated-composites.spv16.asm.comp | 30 ++++++++ ...specialization-constant-workgroup.asm.comp | 26 +++++++ .../struct-resource-name-aliasing.asm.comp | 17 ++++ .../asm/comp/uint_smulextended.asm.comp | 28 +++++++ .../undefined-constant-composite.asm.comp | 33 ++++++++ ...undefined-spec-constant-composite.asm.comp | 38 +++++++++ .../asm/comp/variable-pointers-2.asm.comp | 56 ++++++++++++++ ...ariable-pointers-store-forwarding.asm.comp | 28 +++++++ .../vector-builtin-type-cast-func.asm.comp | 28 +++++++ .../comp/vector-builtin-type-cast.asm.comp | 28 +++++++ .../access-private-workgroup-in-function.comp | 9 +++ .../opt/shaders-opencl/comp/arguments.comp | 24 ++++++ reference/opt/shaders-opencl/comp/atomic.comp | 53 +++++++++++++ .../opt/shaders-opencl/comp/barriers.comp | 25 ++++++ reference/opt/shaders-opencl/comp/basic.comp | 36 +++++++++ .../comp/basic.dispatchbase.comp | 41 ++++++++++ .../comp/buffer-push-const.comp | 24 ++++++ .../opt/shaders-opencl/comp/builtins.comp | 9 +++ .../comp/cfg-preserve-parameter.comp | 8 ++ .../comp/complex-type-alias.comp | 46 +++++++++++ .../comp/composite-construct.comp | 26 +++++++ .../opt/shaders-opencl/comp/culling.comp | 36 +++++++++ .../opt/shaders-opencl/comp/defer-parens.comp | 24 ++++++ .../opt/shaders-opencl/comp/dowhile.comp | 44 +++++++++++ .../shaders-opencl/comp/expect-assume.comp | 17 ++++ .../comp/force-recompile-hooks.swizzle.comp | 11 +++ .../opt/shaders-opencl/comp/functions.comp | 8 ++ .../comp/global-invocation-id.comp | 18 +++++ reference/opt/shaders-opencl/comp/image.comp | 11 +++ reference/opt/shaders-opencl/comp/insert.comp | 18 +++++ .../comp/local-invocation-id.comp | 18 +++++ .../comp/local-invocation-index.comp | 18 +++++ .../comp/local-size-duplicate-spec-id.comp | 30 ++++++++ reference/opt/shaders-opencl/comp/mod.comp | 27 +++++++ reference/opt/shaders-opencl/comp/modf.comp | 34 ++++++++ .../shaders-opencl/comp/outer-product.comp | 45 +++++++++++ .../shaders-opencl/comp/packing-test-1.comp | 32 ++++++++ .../shaders-opencl/comp/packing-test-2.comp | 32 ++++++++ .../shaders-opencl/comp/read-write-only.comp | 35 +++++++++ .../opt/shaders-opencl/comp/rmw-opt.comp | 27 +++++++ ...alar-std450-distance-length-normalize.comp | 25 ++++++ .../comp/shared-std450.double.comp | 29 +++++++ .../comp/shared-struct-bool-cast.comp | 65 ++++++++++++++++ .../comp/shared-zero-init-simple.comp | 25 ++++++ .../shaders-opencl/comp/shared-zero-init.comp | 30 ++++++++ reference/opt/shaders-opencl/comp/shared.comp | 28 +++++++ .../comp/spec-constant-work-group-size.comp | 39 ++++++++++ .../shaders-opencl/comp/struct-layout.comp | 31 ++++++++ .../shaders-opencl/comp/struct-nested.comp | 31 ++++++++ .../comp/struct-packing.invalid.comp | 0 .../opt/shaders-opencl/comp/torture-loop.comp | 46 +++++++++++ .../opt/shaders-opencl/comp/type-alias.comp | 45 +++++++++++ reference/opt/shaders-opencl/comp/udiv.comp | 24 ++++++ .../shaders-opencl/comp/writable-ssbo.comp | 18 +++++ 73 files changed, 2077 insertions(+) create mode 100644 reference/opt/shaders-opencl/asm/comp/atomic-decrement.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/atomic-increment.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/bitcast_iadd.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/bitcast_icmp.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/bitcast_sar.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/bitcast_sdiv.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/bitcast_slr.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/block-name-alias-global.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/buffer-write-relative-addr.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/buffer-write.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/copy-object-ubo-to-ssbo.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/duplicate-spec-id.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/fma.spv16.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/global-parameter-name-alias.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/op-spec-constant-op-vector-related.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/quantize.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/specialization-constant-workgroup.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/struct-resource-name-aliasing.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/uint_smulextended.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/undefined-constant-composite.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/undefined-spec-constant-composite.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/variable-pointers-2.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp create mode 100644 reference/opt/shaders-opencl/asm/comp/vector-builtin-type-cast.asm.comp create mode 100644 reference/opt/shaders-opencl/comp/access-private-workgroup-in-function.comp create mode 100644 reference/opt/shaders-opencl/comp/arguments.comp create mode 100644 reference/opt/shaders-opencl/comp/atomic.comp create mode 100644 reference/opt/shaders-opencl/comp/barriers.comp create mode 100644 reference/opt/shaders-opencl/comp/basic.comp create mode 100644 reference/opt/shaders-opencl/comp/basic.dispatchbase.comp create mode 100644 reference/opt/shaders-opencl/comp/buffer-push-const.comp create mode 100644 reference/opt/shaders-opencl/comp/builtins.comp create mode 100644 reference/opt/shaders-opencl/comp/cfg-preserve-parameter.comp create mode 100644 reference/opt/shaders-opencl/comp/complex-type-alias.comp create mode 100644 reference/opt/shaders-opencl/comp/composite-construct.comp create mode 100644 reference/opt/shaders-opencl/comp/culling.comp create mode 100644 reference/opt/shaders-opencl/comp/defer-parens.comp create mode 100644 reference/opt/shaders-opencl/comp/dowhile.comp create mode 100644 reference/opt/shaders-opencl/comp/expect-assume.comp create mode 100644 reference/opt/shaders-opencl/comp/force-recompile-hooks.swizzle.comp create mode 100644 reference/opt/shaders-opencl/comp/functions.comp create mode 100644 reference/opt/shaders-opencl/comp/global-invocation-id.comp create mode 100644 reference/opt/shaders-opencl/comp/image.comp create mode 100644 reference/opt/shaders-opencl/comp/insert.comp create mode 100644 reference/opt/shaders-opencl/comp/local-invocation-id.comp create mode 100644 reference/opt/shaders-opencl/comp/local-invocation-index.comp create mode 100644 reference/opt/shaders-opencl/comp/local-size-duplicate-spec-id.comp create mode 100644 reference/opt/shaders-opencl/comp/mod.comp create mode 100644 reference/opt/shaders-opencl/comp/modf.comp create mode 100644 reference/opt/shaders-opencl/comp/outer-product.comp create mode 100644 reference/opt/shaders-opencl/comp/packing-test-1.comp create mode 100644 reference/opt/shaders-opencl/comp/packing-test-2.comp create mode 100644 reference/opt/shaders-opencl/comp/read-write-only.comp create mode 100644 reference/opt/shaders-opencl/comp/rmw-opt.comp create mode 100644 reference/opt/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp create mode 100644 reference/opt/shaders-opencl/comp/shared-std450.double.comp create mode 100644 reference/opt/shaders-opencl/comp/shared-struct-bool-cast.comp create mode 100644 reference/opt/shaders-opencl/comp/shared-zero-init-simple.comp create mode 100644 reference/opt/shaders-opencl/comp/shared-zero-init.comp create mode 100644 reference/opt/shaders-opencl/comp/shared.comp create mode 100644 reference/opt/shaders-opencl/comp/spec-constant-work-group-size.comp create mode 100644 reference/opt/shaders-opencl/comp/struct-layout.comp create mode 100644 reference/opt/shaders-opencl/comp/struct-nested.comp create mode 100644 reference/opt/shaders-opencl/comp/struct-packing.invalid.comp create mode 100644 reference/opt/shaders-opencl/comp/torture-loop.comp create mode 100644 reference/opt/shaders-opencl/comp/type-alias.comp create mode 100644 reference/opt/shaders-opencl/comp/udiv.comp create mode 100644 reference/opt/shaders-opencl/comp/writable-ssbo.comp diff --git a/reference/opt/shaders-opencl/asm/comp/atomic-decrement.asm.comp b/reference/opt/shaders-opencl/asm/comp/atomic-decrement.asm.comp new file mode 100644 index 000000000..8560908e5 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/atomic-decrement.asm.comp @@ -0,0 +1,17 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct u0_counters +{ + uint c; +}; + +typedef struct u0_counters u0_counters; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(write_only image1d_buffer_t u0, __global uint* u0_counter) +{ + uint _24 = atomic_add(&(u0_counter[0]), (uint)(-1)); + write_imageui(u0, as_int(as_float(_24)), (uint4)(as_uint(as_int(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x)))); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/atomic-increment.asm.comp b/reference/opt/shaders-opencl/asm/comp/atomic-increment.asm.comp new file mode 100644 index 000000000..8ddebf840 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/atomic-increment.asm.comp @@ -0,0 +1,17 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct u0_counters +{ + uint c; +}; + +typedef struct u0_counters u0_counters; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(write_only image1d_buffer_t u0, __global uint* u0_counter) +{ + uint _24 = atomic_add(&(u0_counter[0]), 1u); + write_imageui(u0, as_int(as_float(_24)), (uint4)(as_uint(as_int(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x)))); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/bitcast_iadd.asm.comp b/reference/opt/shaders-opencl/asm/comp/bitcast_iadd.asm.comp new file mode 100644 index 000000000..5c0520b3a --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/bitcast_iadd.asm.comp @@ -0,0 +1,32 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +typedef struct _3 _3; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +typedef struct _4 _4; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _3* _5, __global _4* _6) +{ + _6->_m0 = _5->_m1 + as_uint4(_5->_m0); + _6->_m0 = as_uint4(_5->_m0) + _5->_m1; + _6->_m0 = _5->_m1 + _5->_m1; + _6->_m0 = as_uint4(_5->_m0 + _5->_m0); + _6->_m1 = as_int4(_5->_m1 + _5->_m1); + _6->_m1 = _5->_m0 + _5->_m0; + _6->_m1 = as_int4(_5->_m1) + _5->_m0; + _6->_m1 = _5->_m0 + as_int4(_5->_m1); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/bitcast_icmp.asm.comp b/reference/opt/shaders-opencl/asm/comp/bitcast_icmp.asm.comp new file mode 100644 index 000000000..c2195a52c --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/bitcast_icmp.asm.comp @@ -0,0 +1,32 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +typedef struct _3 _3; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +typedef struct _4 _4; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _3* _5, __global _4* _6) +{ + _6->_m0 = select((uint4)(0u), (uint4)(1u), as_int4(_5->_m1) < _5->_m0); + _6->_m0 = select((uint4)(0u), (uint4)(1u), as_int4(_5->_m1) <= _5->_m0); + _6->_m0 = select((uint4)(0u), (uint4)(1u), _5->_m1 < as_uint4(_5->_m0)); + _6->_m0 = select((uint4)(0u), (uint4)(1u), _5->_m1 <= as_uint4(_5->_m0)); + _6->_m0 = select((uint4)(0u), (uint4)(1u), as_int4(_5->_m1) > _5->_m0); + _6->_m0 = select((uint4)(0u), (uint4)(1u), as_int4(_5->_m1) >= _5->_m0); + _6->_m0 = select((uint4)(0u), (uint4)(1u), _5->_m1 > as_uint4(_5->_m0)); + _6->_m0 = select((uint4)(0u), (uint4)(1u), _5->_m1 >= as_uint4(_5->_m0)); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/bitcast_sar.asm.comp b/reference/opt/shaders-opencl/asm/comp/bitcast_sar.asm.comp new file mode 100644 index 000000000..93916384b --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/bitcast_sar.asm.comp @@ -0,0 +1,34 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +typedef struct _3 _3; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +typedef struct _4 _4; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _3* _5, __global _4* _6) +{ + int4 _22 = _5->_m0; + uint4 _23 = _5->_m1; + _6->_m0 = as_uint4(as_int4(_23) >> _22); + _6->_m0 = as_uint4(_22 >> as_int4(_23)); + _6->_m0 = as_uint4(as_int4(_23) >> as_int4(_23)); + _6->_m0 = as_uint4(_22 >> _22); + _6->_m1 = as_int4(_23) >> as_int4(_23); + _6->_m1 = _22 >> _22; + _6->_m1 = as_int4(_23) >> _22; + _6->_m1 = _22 >> as_int4(_23); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/bitcast_sdiv.asm.comp b/reference/opt/shaders-opencl/asm/comp/bitcast_sdiv.asm.comp new file mode 100644 index 000000000..f5a1a3a67 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/bitcast_sdiv.asm.comp @@ -0,0 +1,34 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +typedef struct _3 _3; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +typedef struct _4 _4; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _3* _5, __global _4* _6) +{ + int4 _22 = _5->_m0; + uint4 _23 = _5->_m1; + _6->_m0 = as_uint4(as_int4(_23) / _22); + _6->_m0 = as_uint4(_22 / as_int4(_23)); + _6->_m0 = as_uint4(as_int4(_23) / as_int4(_23)); + _6->_m0 = as_uint4(_22 / _22); + _6->_m1 = as_int4(_23) / as_int4(_23); + _6->_m1 = _22 / _22; + _6->_m1 = as_int4(_23) / _22; + _6->_m1 = _22 / as_int4(_23); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/bitcast_slr.asm.comp b/reference/opt/shaders-opencl/asm/comp/bitcast_slr.asm.comp new file mode 100644 index 000000000..525761cc2 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/bitcast_slr.asm.comp @@ -0,0 +1,34 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _3 +{ + int4 _m0; + uint4 _m1; +}; + +typedef struct _3 _3; + +struct _4 +{ + uint4 _m0; + int4 _m1; +}; + +typedef struct _4 _4; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _3* _5, __global _4* _6) +{ + int4 _22 = _5->_m0; + uint4 _23 = _5->_m1; + _6->_m0 = _23 >> as_uint4(_22); + _6->_m0 = as_uint4(_22) >> _23; + _6->_m0 = _23 >> _23; + _6->_m0 = as_uint4(_22) >> as_uint4(_22); + _6->_m1 = as_int4(_23 >> _23); + _6->_m1 = as_int4(as_uint4(_22) >> as_uint4(_22)); + _6->_m1 = as_int4(_23 >> as_uint4(_22)); + _6->_m1 = as_int4(as_uint4(_22) >> _23); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/block-name-alias-global.asm.comp b/reference/opt/shaders-opencl/asm/comp/block-name-alias-global.asm.comp new file mode 100644 index 000000000..166f01b62 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/block-name-alias-global.asm.comp @@ -0,0 +1,48 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct A +{ + int a; + int b; +}; + +typedef struct A A; + +struct A_1 +{ + A Data[1]; +}; + +typedef struct A_1 A_1; + +struct A_2 +{ + A Data[1024]; +}; + +typedef struct A_2 A_2; + +struct B +{ + A Data[1]; +}; + +typedef struct B B; + +struct B_1 +{ + A Data[1024]; +}; + +typedef struct B_1 B_1; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global A* C1, A_2 C2, __global A* C3, B_1 C4) +{ + C1[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].a = C2.Data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].a; + C1[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].b = C2.Data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].b; + C3[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].a = C4.Data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].a; + C3[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].b = C4.Data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].b; +} + diff --git a/reference/opt/shaders-opencl/asm/comp/buffer-write-relative-addr.asm.comp b/reference/opt/shaders-opencl/asm/comp/buffer-write-relative-addr.asm.comp new file mode 100644 index 000000000..af86ed757 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/buffer-write-relative-addr.asm.comp @@ -0,0 +1,21 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct cb5_struct +{ + float4 _m0[5]; +}; + +typedef struct cb5_struct cb5_struct; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(cb5_struct cb0_5, write_only image1d_buffer_t u0) +{ + uint _41 = as_uint(as_float(as_int(((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))).x) << 4)) >> 2u; + uint4 _50 = as_uint4(cb0_5._m0[as_uint(as_int(as_float(as_int(((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))).x)))) + 1u]); + write_imageui(u0, as_int(_41), _50.xxxx); + write_imageui(u0, as_int(_41 + 1u), _50.yyyy); + write_imageui(u0, as_int(_41 + 2u), _50.zzzz); + write_imageui(u0, as_int(_41 + 3u), _50.wwww); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/buffer-write.asm.comp b/reference/opt/shaders-opencl/asm/comp/buffer-write.asm.comp new file mode 100644 index 000000000..ce88fd4e3 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/buffer-write.asm.comp @@ -0,0 +1,16 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct cb +{ + float value; +}; + +typedef struct cb cb; + +__attribute__((reqd_work_group_size(32, 1, 1))) +__kernel void comp_main(cb _8, write_only image1d_buffer_t _buffer) +{ + write_imagef(_buffer, as_int((32u * ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x) + ((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))), (float4)(_8.value)); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp b/reference/opt/shaders-opencl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp new file mode 100644 index 000000000..8da6f6cfa --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/copy-object-ssbo-to-ssbo.asm.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _19 +{ +}; +typedef struct _19 _19; + +struct _5 +{ + int _m0; + _19 _m1; + _19 _m2; + int _m3; +}; + +typedef struct _5 _5; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _5* _3, __global _5* _4) +{ + *_4 = (*_3); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/copy-object-ubo-to-ssbo.asm.comp b/reference/opt/shaders-opencl/asm/comp/copy-object-ubo-to-ssbo.asm.comp new file mode 100644 index 000000000..3ab995c11 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/copy-object-ubo-to-ssbo.asm.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _19 +{ +}; +typedef struct _19 _19; + +struct _5 +{ + int _m0; + _19 _m1; + _19 _m2; + int _m3; +}; + +typedef struct _5 _5; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(_5 _3, __global _5* _4) +{ + *_4 = _3; +} + diff --git a/reference/opt/shaders-opencl/asm/comp/duplicate-spec-id.asm.comp b/reference/opt/shaders-opencl/asm/comp/duplicate-spec-id.asm.comp new file mode 100644 index 000000000..177a60dc6 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/duplicate-spec-id.asm.comp @@ -0,0 +1,26 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct StorageBuffer +{ + float values[1]; +}; + +typedef struct StorageBuffer StorageBuffer; + +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 1 +#endif +constant int foo = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 2.0f +#endif +constant float bar = SPIRV_CROSS_CONSTANT_ID_0; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float* ssbo) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + ssbo[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = convert_float(foo) + bar; +} + diff --git a/reference/opt/shaders-opencl/asm/comp/fma.spv16.asm.comp b/reference/opt/shaders-opencl/asm/comp/fma.spv16.asm.comp new file mode 100644 index 000000000..9343d7f25 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/fma.spv16.asm.comp @@ -0,0 +1,23 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO +{ + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float4* _4, __global const float4* _6) +{ + _4[0] = fma(_6[0], _6[1], _6[1]); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/global-parameter-name-alias.asm.comp b/reference/opt/shaders-opencl/asm/comp/global-parameter-name-alias.asm.comp new file mode 100644 index 000000000..7135f7ae1 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/global-parameter-name-alias.asm.comp @@ -0,0 +1,8 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main() +{ +} + diff --git a/reference/opt/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp b/reference/opt/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp new file mode 100644 index 000000000..f7d65805e --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp @@ -0,0 +1,18 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +void _main( uint3* id) +{ + float2 loaded = read_imagef(TargetTexture, as_int2((*id).xy)).xy; + float2 storeTemp = loaded + (float2)(1.0f); + write_imagef(TargetTexture, as_int2((*id).xy + (uint2)(1u)), (float4)(storeTemp)); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(write_only image2d_t TargetTexture) +{ + uint3 id_1 = ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))); + uint3 param = id_1; + _main(¶m); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/op-spec-constant-op-vector-related.asm.comp b/reference/opt/shaders-opencl/asm/comp/op-spec-constant-op-vector-related.asm.comp new file mode 100644 index 000000000..b2059cd0d --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/op-spec-constant-op-vector-related.asm.comp @@ -0,0 +1,77 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _29 +{ + int _m0[3][3]; +}; + +typedef struct _29 _29; + +struct _7 +{ + int _m0[1]; +}; + +typedef struct _7 _7; + +constant int3 _32 = (int3)(0); +constant int _33[3] = { 0, 0, 0 }; +constant int _34[3][3] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } }; +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 0 +#endif +constant int _3 = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 0 +#endif +constant int _4 = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 0 +#endif +constant int _5 = SPIRV_CROSS_CONSTANT_ID_2; +#define _36 ({ _3, 0, 0 }) +#define _37 ({ _3, _4, 0 }) +#define _38 ({ _3, _4, _5 }) +#define _39 ({ _4, 0, 0 }) +#define _40 ({ _4, _5, 0 }) +#define _41 ({ _4, _5, _3 }) +#define _42 ({ _5, 0, 0 }) +#define _43 ({ _5, _3, 0 }) +#define _44 ({ _5, _3, _4 }) +#define _45 ({ { _3, _4, _5 }, { 0, 0, 0 }, { 0, 0, 0 } }) +#define _46 ({ { _3, _4, _5 }, { _4, _5, _3 }, { 0, 0, 0 } }) +#define _47 ({ { _3, _4, _5 }, { _4, _5, _3 }, { _5, _3, _4 } }) +#define _48 ((_29){ { { _3, _4, _5 }, { _4, _5, _3 }, { _5, _3, _4 } } }) +#define _50 (_48._m0[0][0]) +#define _51 (_48._m0[1][0]) +#define _52 (_48._m0[0][1]) +#define _53 (_48._m0[2][2]) +#define _54 (_48._m0[2][0]) +#define _55 (_48._m0[1][1]) +#define _56 ((_50 == _51)) +#define _57 ((_52 == _53)) +#define _58 ((_54 == _55)) +#define _59 ((int)(_56)) +#define _60 ((int)(_57)) +#define _61 (_58 ? 2 : 1) +#define _62 ((int3)(_3, 0, 0)) +#define _63 ((int3)(0, _4, 0)) +#define _64 ((int3)(0, 0, _5)) +#define _65 ((int3)(_62.x, 0, _62.z)) +#define _66 ((int3)(0, _63.y, _63.x)) +#define _67 ((int3)(_64.z, 0, _64.z)) +#define _68 ((int3)(_65.y, _65.x, _66.y)) +#define _69 ((int3)(_67.z, _68.y, _68.z)) +#define _70 (_69.x) +#define _71 (_69.y) +#define _72 (_69.z) +#define _73 ((_70 - _71)) +#define _74 ((_73 * _72)) + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global int* _8, __global int* _9) +{ + _9[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _8[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] + ((((1 - _59) * _60) * (_61 - 1)) * _74); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/quantize.asm.comp b/reference/opt/shaders-opencl/asm/comp/quantize.asm.comp new file mode 100644 index 000000000..3743c7776 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/quantize.asm.comp @@ -0,0 +1,35 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO0 +{ + float scalar; + float2 vec2_val; + float3 vec3_val; + float4 vec4_val; +}; + +typedef struct SSBO0 SSBO0; + +uint spvPackHalf2x16(float2 v) { + uint r; + vstore_half(v.x, 0, (__private half *)&r); + vstore_half(v.y, 1, (__private half *)&r); + return r; +} + +float2 spvUnpackHalf2x16(uint u) { + const __private uint *p = &u; + return (float2)(vload_half(0, (const __private half *)p), + vload_half(1, (const __private half *)p)); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO0* _12) +{ + _12->scalar = spvUnpackHalf2x16(spvPackHalf2x16((float2)(_12->scalar, 0.0f))).x; + _12->vec2_val = spvUnpackHalf2x16(spvPackHalf2x16(_12->vec2_val)); + _12->vec3_val = (float3)(spvUnpackHalf2x16(spvPackHalf2x16(_12->vec3_val.xy)), spvUnpackHalf2x16(spvPackHalf2x16((float2)(_12->vec3_val.z, 0.0f))).x); + _12->vec4_val = (float4)(spvUnpackHalf2x16(spvPackHalf2x16(_12->vec4_val.xy)), spvUnpackHalf2x16(spvPackHalf2x16(_12->vec4_val.zw))); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp b/reference/opt/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp new file mode 100644 index 000000000..ddae4bb54 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp @@ -0,0 +1,23 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct foo +{ + uint bar; + float3 baz; + uchar quux; + uchar4 blah; + half2 wibble; +}; + +typedef struct foo foo; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global foo* _10) +{ + _10->bar = ((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))).x; + _10->baz = convert_float3(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2)))); + _10->blah = convert_uchar4((uint4)(convert_uint4(_10->blah).xyz + ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))), 0u)); + _10->wibble = convert_half2(convert_float2(_10->wibble) * convert_float2(((uint3)(get_num_groups(0), get_num_groups(1), get_num_groups(2))).xy)); +} + diff --git a/reference/opt/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp b/reference/opt/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp new file mode 100644 index 000000000..545ecf547 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp @@ -0,0 +1,30 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 0.0f +#endif +constant float spec_const = SPIRV_CROSS_CONSTANT_ID_0; +constant float4 _20 = (float4)(spec_const); +constant float _26[8] = { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f }; + +struct UBO +{ + float uniform_float; +}; + +typedef struct UBO UBO; + +constant float _42 = 0; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(UBO ubo) +{ + float4 a_1 = (float4)(0.0f); + float4 b_1 = (float4)(1.0f); + float4 c_1 = _20; + float4 _36 = (float4)(ubo.uniform_float); + float4 d_1 = _36; + float4 e_1 = _36; +} + diff --git a/reference/opt/shaders-opencl/asm/comp/specialization-constant-workgroup.asm.comp b/reference/opt/shaders-opencl/asm/comp/specialization-constant-workgroup.asm.comp new file mode 100644 index 000000000..20235cb7f --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/specialization-constant-workgroup.asm.comp @@ -0,0 +1,26 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float a; +}; + +typedef struct SSBO SSBO; + +#ifndef SPIRV_CROSS_CONSTANT_ID_10 +#define SPIRV_CROSS_CONSTANT_ID_10 9u +#endif +constant uint _19 = SPIRV_CROSS_CONSTANT_ID_10; +#ifndef SPIRV_CROSS_CONSTANT_ID_12 +#define SPIRV_CROSS_CONSTANT_ID_12 4u +#endif +constant uint _21 = SPIRV_CROSS_CONSTANT_ID_12; +constant uint3 spvWorkgroupSize = (uint3)(_19, 20u, _21); + +__attribute__((reqd_work_group_size(9, 20, 4))) +__kernel void comp_main(__global float* _6) +{ + _6[0] += 1.0f; +} + diff --git a/reference/opt/shaders-opencl/asm/comp/struct-resource-name-aliasing.asm.comp b/reference/opt/shaders-opencl/asm/comp/struct-resource-name-aliasing.asm.comp new file mode 100644 index 000000000..853e0afac --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/struct-resource-name-aliasing.asm.comp @@ -0,0 +1,17 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct bufA +{ + uint _data[1]; +}; + +typedef struct bufA bufA; + +__attribute__((reqd_work_group_size(8, 8, 1))) +__kernel void comp_main(__global uint* bufA, __global uint* bufB) +{ + bufA[0] = 0u; + bufB[0] = 0u; +} + diff --git a/reference/opt/shaders-opencl/asm/comp/uint_smulextended.asm.comp b/reference/opt/shaders-opencl/asm/comp/uint_smulextended.asm.comp new file mode 100644 index 000000000..ab2d4a703 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/uint_smulextended.asm.comp @@ -0,0 +1,28 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _4 +{ + uint _m0[1]; +}; + +typedef struct _4 _4; + +struct _20 +{ + uint _m0; + uint _m1; +}; + +typedef struct _20 _20; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _5, __global uint* _6, __global uint* _7, __global uint* _8) +{ + _20 _28; + _28._m0 = _5[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] * _6[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + _28._m1 = mul_hi(_5[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x], _6[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]); + _7[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _28._m0; + _8[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _28._m1; +} + diff --git a/reference/opt/shaders-opencl/asm/comp/undefined-constant-composite.asm.comp b/reference/opt/shaders-opencl/asm/comp/undefined-constant-composite.asm.comp new file mode 100644 index 000000000..53694c4b8 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/undefined-constant-composite.asm.comp @@ -0,0 +1,33 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _20 +{ + int _m0; + int _m1; +}; + +typedef struct _20 _20; + +struct _5 +{ + int _m0[10]; +}; + +typedef struct _5 _5; + +struct _7 +{ + int _m0[10]; +}; + +typedef struct _7 _7; + +constant int _28 = 0; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global int* _6, __global int* _8) +{ + _6[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _8[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] + ((_20){ _28, 200 })._m1; +} + diff --git a/reference/opt/shaders-opencl/asm/comp/undefined-spec-constant-composite.asm.comp b/reference/opt/shaders-opencl/asm/comp/undefined-spec-constant-composite.asm.comp new file mode 100644 index 000000000..852b7b315 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/undefined-spec-constant-composite.asm.comp @@ -0,0 +1,38 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _21 +{ + int _m0; + int _m1; +}; + +typedef struct _21 _21; + +struct _5 +{ + int _m0[10]; +}; + +typedef struct _5 _5; + +struct _7 +{ + int _m0[10]; +}; + +typedef struct _7 _7; + +constant int _29 = 0; +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 0 +#endif +constant int _9 = SPIRV_CROSS_CONSTANT_ID_0; +constant _21 _30 = (_21){ _9, _29 }; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global int* _6, __global int* _8) +{ + _6[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = (_8[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] + _30._m0) + ((_21){ _29, 200 })._m1; +} + diff --git a/reference/opt/shaders-opencl/asm/comp/variable-pointers-2.asm.comp b/reference/opt/shaders-opencl/asm/comp/variable-pointers-2.asm.comp new file mode 100644 index 000000000..1e39d3aab --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/variable-pointers-2.asm.comp @@ -0,0 +1,56 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct foo +{ + int a[128]; + uint b; + float2 c; +}; + +typedef struct foo foo; + +struct bar +{ + int d; +}; + +typedef struct bar bar; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global foo* buf, bar cb) +{ + uint3 _3 = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))); + uint3 _4 = ((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))); + bool _71 = cb.d != 0; + __global foo* _72 = _71 ? buf : NULL; + __global foo* _67 = _72; + __global foo* _65 = _72; + __private uint3* _79 = _71 ? &_3 : &_4; + __private uint3* _74 = _79; + __global int* _49; + __global int* _52; + _49 = &_72->a[0u]; + _52 = &buf->a[0u]; + int _54; + int _55; + for (;;) + { + _54 = *_49; + _55 = *_52; + if (_54 != _55) + { + int _63 = (_54 + _55) + as_int((*_79).x); + *_49 = _63; + *_52 = _63; + _49 = &_49[1u]; + _52 = &_52[1u]; + continue; + } + else + { + break; + } + } +} + diff --git a/reference/opt/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp b/reference/opt/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp new file mode 100644 index 000000000..1f27af228 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp @@ -0,0 +1,28 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct foo +{ + int a; +}; + +typedef struct foo foo; + +struct bar +{ + int b; +}; + +typedef struct bar bar; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global int* x, __global int* y) +{ + __global int* _47 = (((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x != 0u) ? &x[0] : &y[0]; + __global int* _40 = _47; + __global int* _33 = _47; + int _37 = x[0]; + *_47 = 0; + y[0] = _37 + _37; +} + diff --git a/reference/opt/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp b/reference/opt/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp new file mode 100644 index 000000000..6afcb643c --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp @@ -0,0 +1,28 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct cb1_struct +{ + float4 _RESERVED_IDENTIFIER_FIXUP_m0[1]; +}; + +typedef struct cb1_struct cb1_struct; + +__attribute__((reqd_work_group_size(16, 16, 1))) +__kernel void comp_main(write_only image2d_t u0, cb1_struct cb0_1) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + int2 _37 = (int2)(get_image_width(u0), get_image_height(u0)) >> as_int2((uint2)(4u)); + int _98; + _98 = 0; + for (; _98 < _37.y; _98++) + { + for (int _99 = 0; _99 < _37.x; ) + { + write_imagef(u0, (_37 * as_int3(((uint3)(get_local_id(0), get_local_id(1), get_local_id(2)))).xy) + (int2)(_98, _99), cb0_1._RESERVED_IDENTIFIER_FIXUP_m0[0].xxxx); + _99++; + continue; + } + } +} + diff --git a/reference/opt/shaders-opencl/asm/comp/vector-builtin-type-cast.asm.comp b/reference/opt/shaders-opencl/asm/comp/vector-builtin-type-cast.asm.comp new file mode 100644 index 000000000..549b251a5 --- /dev/null +++ b/reference/opt/shaders-opencl/asm/comp/vector-builtin-type-cast.asm.comp @@ -0,0 +1,28 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct cb1_struct +{ + float4 _RESERVED_IDENTIFIER_FIXUP_m0[1]; +}; + +typedef struct cb1_struct cb1_struct; + +__attribute__((reqd_work_group_size(16, 16, 1))) +__kernel void comp_main(write_only image2d_t u0, cb1_struct cb0_1) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + int2 _20 = (int2)(get_image_width(u0), get_image_height(u0)) >> as_int2((uint2)(4u)); + int _80; + _80 = 0; + for (; _80 < _20.y; _80++) + { + for (int _81 = 0; _81 < _20.x; ) + { + write_imagef(u0, (_20 * as_int3(((uint3)(get_local_id(0), get_local_id(1), get_local_id(2)))).xy) + (int2)(_80, _81), cb0_1._RESERVED_IDENTIFIER_FIXUP_m0[0].xxxx); + _81++; + continue; + } + } +} + diff --git a/reference/opt/shaders-opencl/comp/access-private-workgroup-in-function.comp b/reference/opt/shaders-opencl/comp/access-private-workgroup-in-function.comp new file mode 100644 index 000000000..25ff92694 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/access-private-workgroup-in-function.comp @@ -0,0 +1,9 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main() +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); +} + diff --git a/reference/opt/shaders-opencl/comp/arguments.comp b/reference/opt/shaders-opencl/comp/arguments.comp new file mode 100644 index 000000000..aa81c7a82 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/arguments.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Buf +{ + uint data[1]; +}; + +typedef struct Buf Buf; + +struct parameter +{ + uint n; +}; + +typedef struct parameter parameter; + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void comp_main(__global uint* _19, parameter p) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _19[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x + p.n; +} + diff --git a/reference/opt/shaders-opencl/comp/atomic.comp b/reference/opt/shaders-opencl/comp/atomic.comp new file mode 100644 index 000000000..5c5d824eb --- /dev/null +++ b/reference/opt/shaders-opencl/comp/atomic.comp @@ -0,0 +1,53 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + uint u32; + int i32; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* ssbo) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local uint shared_u32; + __local int shared_i32; + uint _16 = atomic_add(&(ssbo->u32), 1u); + uint _18 = atomic_or(&(ssbo->u32), 1u); + uint _20 = atomic_xor(&(ssbo->u32), 1u); + uint _22 = atomic_and(&(ssbo->u32), 1u); + uint _24 = atomic_min(&(ssbo->u32), 1u); + uint _26 = atomic_max(&(ssbo->u32), 1u); + uint _28 = atomic_xchg(&(ssbo->u32), 1u); + uint _32 = atomic_cmpxchg(&(ssbo->u32), 10u, 2u); + int _36 = atomic_add(&(ssbo->i32), 1); + int _38 = atomic_or(&(ssbo->i32), 1); + int _40 = atomic_xor(&(ssbo->i32), 1); + int _42 = atomic_and(&(ssbo->i32), 1); + int _44 = atomic_min(&(ssbo->i32), 1); + int _46 = atomic_max(&(ssbo->i32), 1); + int _48 = atomic_xchg(&(ssbo->i32), 1); + int _52 = atomic_cmpxchg(&(ssbo->i32), 10, 2); + shared_u32 = 10u; + shared_i32 = 10; + uint _57 = atomic_add(&shared_u32, 1u); + uint _58 = atomic_or(&shared_u32, 1u); + uint _59 = atomic_xor(&shared_u32, 1u); + uint _60 = atomic_and(&shared_u32, 1u); + uint _61 = atomic_min(&shared_u32, 1u); + uint _62 = atomic_max(&shared_u32, 1u); + uint _63 = atomic_xchg(&shared_u32, 1u); + uint _64 = atomic_cmpxchg(&shared_u32, 10u, 2u); + int _65 = atomic_add(&shared_i32, 1); + int _66 = atomic_or(&shared_i32, 1); + int _67 = atomic_xor(&shared_i32, 1); + int _68 = atomic_and(&shared_i32, 1); + int _69 = atomic_min(&shared_i32, 1); + int _70 = atomic_max(&shared_i32, 1); + int _71 = atomic_xchg(&shared_i32, 1); + int _72 = atomic_cmpxchg(&shared_i32, 10, 2); +} + diff --git a/reference/opt/shaders-opencl/comp/barriers.comp b/reference/opt/shaders-opencl/comp/barriers.comp new file mode 100644 index 000000000..1bd4de28a --- /dev/null +++ b/reference/opt/shaders-opencl/comp/barriers.comp @@ -0,0 +1,25 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main() +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + mem_fence(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + mem_fence(CLK_GLOBAL_MEM_FENCE); + mem_fence(CLK_GLOBAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_GLOBAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); +} + diff --git a/reference/opt/shaders-opencl/comp/basic.comp b/reference/opt/shaders-opencl/comp/basic.comp new file mode 100644 index 000000000..1c6c16212 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/basic.comp @@ -0,0 +1,36 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO3 +{ + uint counter; +}; + +typedef struct SSBO3 SSBO3; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const float4* _23, __global float4* _45, __global uint* _48) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float4 _29 = _23[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + if (dot(_29, (float4)(1.0f, 5.0f, 6.0f, 2.0f)) > 8.19999980926513671875f) + { + uint _52 = atomic_add(&(_48[0]), 1u); + _45[_52] = _29; + } +} + diff --git a/reference/opt/shaders-opencl/comp/basic.dispatchbase.comp b/reference/opt/shaders-opencl/comp/basic.dispatchbase.comp new file mode 100644 index 000000000..dfdb35d6f --- /dev/null +++ b/reference/opt/shaders-opencl/comp/basic.dispatchbase.comp @@ -0,0 +1,41 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO3 +{ + uint counter; +}; + +typedef struct SSBO3 SSBO3; + +#ifndef SPIRV_CROSS_CONSTANT_ID_10 +#define SPIRV_CROSS_CONSTANT_ID_10 1u +#endif +constant uint _59 = SPIRV_CROSS_CONSTANT_ID_10; +constant uint3 spvWorkgroupSize = (uint3)(_59, 1u, 1u); + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const float4* _27, __global float4* _49, __global uint* _52) +{ + float4 _33 = _27[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + if (dot(_33, (float4)(1.0f, 5.0f, 6.0f, 2.0f)) > 8.19999980926513671875f) + { + uint _56 = atomic_add(&(_52[0]), 1u); + _49[_56] = _33; + } +} + diff --git a/reference/opt/shaders-opencl/comp/buffer-push-const.comp b/reference/opt/shaders-opencl/comp/buffer-push-const.comp new file mode 100644 index 000000000..aa81c7a82 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/buffer-push-const.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Buf +{ + uint data[1]; +}; + +typedef struct Buf Buf; + +struct parameter +{ + uint n; +}; + +typedef struct parameter parameter; + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void comp_main(__global uint* _19, parameter p) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _19[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x + p.n; +} + diff --git a/reference/opt/shaders-opencl/comp/builtins.comp b/reference/opt/shaders-opencl/comp/builtins.comp new file mode 100644 index 000000000..1d457fdfa --- /dev/null +++ b/reference/opt/shaders-opencl/comp/builtins.comp @@ -0,0 +1,9 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(8, 4, 2))) +__kernel void comp_main() +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); +} + diff --git a/reference/opt/shaders-opencl/comp/cfg-preserve-parameter.comp b/reference/opt/shaders-opencl/comp/cfg-preserve-parameter.comp new file mode 100644 index 000000000..7135f7ae1 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/cfg-preserve-parameter.comp @@ -0,0 +1,8 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main() +{ +} + diff --git a/reference/opt/shaders-opencl/comp/complex-type-alias.comp b/reference/opt/shaders-opencl/comp/complex-type-alias.comp new file mode 100644 index 000000000..39e2347ae --- /dev/null +++ b/reference/opt/shaders-opencl/comp/complex-type-alias.comp @@ -0,0 +1,46 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Foo0 +{ + float a; +}; + +typedef struct Foo0 Foo0; + +struct Foo1 +{ + Foo0 a; +}; + +typedef struct Foo1 Foo1; + +struct Foo2 +{ + Foo1 a; + float weight; +}; + +typedef struct Foo2 Foo2; + +struct SSBO +{ + Foo2 outputs[1]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(8, 8, 1))) +__kernel void comp_main(__global Foo2* _53) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local Foo2 coeffs[64]; + coeffs[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = (Foo2){ (Foo1){ (Foo0){ 0.0f } }, 0.0f }; + barrier(CLK_LOCAL_MEM_FENCE); + if (((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))) == 0u) + { + _53[((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x].a.a.a = coeffs[0].a.a.a; + _53[((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x].weight = coeffs[0].weight; + } +} + diff --git a/reference/opt/shaders-opencl/comp/composite-construct.comp b/reference/opt/shaders-opencl/comp/composite-construct.comp new file mode 100644 index 000000000..6f9957e3b --- /dev/null +++ b/reference/opt/shaders-opencl/comp/composite-construct.comp @@ -0,0 +1,26 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO0 +{ + float4 as[1]; +}; + +typedef struct SSBO0 SSBO0; + +struct SSBO1 +{ + float4 bs[1]; +}; + +typedef struct SSBO1 SSBO1; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float4* _16, __global float4* _32) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float4 values[2] = { _16[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x], _32[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] }; + _16[0] = values[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]; + _32[1] = (float4)(40.0f); +} + diff --git a/reference/opt/shaders-opencl/comp/culling.comp b/reference/opt/shaders-opencl/comp/culling.comp new file mode 100644 index 000000000..93e215d06 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/culling.comp @@ -0,0 +1,36 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO3 +{ + uint count; +}; + +typedef struct SSBO3 SSBO3; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global const float* _22, __global float* _38, __global uint* _41) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float _28 = _22[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + if (_28 > 12.0f) + { + uint _45 = atomic_add(&(_41[0]), 1u); + _38[_45] = _28; + } +} + diff --git a/reference/opt/shaders-opencl/comp/defer-parens.comp b/reference/opt/shaders-opencl/comp/defer-parens.comp new file mode 100644 index 000000000..252986498 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/defer-parens.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 data; + int index; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _13) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float4 _17 = _13->data; + float2 _28 = _17.yz + (float2)(10.0f); + _13->data = (float4)(_17.x, _28, _17.w); + _13->data = (_17 + _17) + _17; + _13->data = _28.xxyy; + _13->data = (float4)(_28.y); + _13->data = (float4)((_17.zw + (float2)(10.0f))[_13->index]); +} + diff --git a/reference/opt/shaders-opencl/comp/dowhile.comp b/reference/opt/shaders-opencl/comp/dowhile.comp new file mode 100644 index 000000000..e5a51f6be --- /dev/null +++ b/reference/opt/shaders-opencl/comp/dowhile.comp @@ -0,0 +1,44 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 mvp; + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const SSBO* _28, __global float4* _52) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float4 _59; + int _60; + _60 = 0; + _59 = _28->in_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + float4 _42; + for (;;) + { + _42 = _28->mvp * _59; + int _44 = _60 + 1; + if (_44 < 16) + { + _60 = _44; + _59 = _42; + } + else + { + break; + } + } + _52[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _42; +} + diff --git a/reference/opt/shaders-opencl/comp/expect-assume.comp b/reference/opt/shaders-opencl/comp/expect-assume.comp new file mode 100644 index 000000000..a9415be79 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/expect-assume.comp @@ -0,0 +1,17 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct buffer_t +{ + uint z; +}; + +typedef struct buffer_t buffer_t; + +__attribute__((reqd_work_group_size(32, 1, 1))) +__kernel void comp_main(__global uint* buf) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + buf[0] = ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).z; +} + diff --git a/reference/opt/shaders-opencl/comp/force-recompile-hooks.swizzle.comp b/reference/opt/shaders-opencl/comp/force-recompile-hooks.swizzle.comp new file mode 100644 index 000000000..23990866b --- /dev/null +++ b/reference/opt/shaders-opencl/comp/force-recompile-hooks.swizzle.comp @@ -0,0 +1,11 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +const sampler_t spvDefaultSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(read_only image2d_t foo, write_only image2d_t bar) +{ + write_imagef(bar, (int2)(0), read_imagef(foo, spvDefaultSampler, (float2)(1.0f))); +} + diff --git a/reference/opt/shaders-opencl/comp/functions.comp b/reference/opt/shaders-opencl/comp/functions.comp new file mode 100644 index 000000000..7135f7ae1 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/functions.comp @@ -0,0 +1,8 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main() +{ +} + diff --git a/reference/opt/shaders-opencl/comp/global-invocation-id.comp b/reference/opt/shaders-opencl/comp/global-invocation-id.comp new file mode 100644 index 000000000..84693b0ee --- /dev/null +++ b/reference/opt/shaders-opencl/comp/global-invocation-id.comp @@ -0,0 +1,18 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct myBlock +{ + int a; + float b[1]; +}; + +typedef struct myBlock myBlock; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global myBlock* myStorage) +{ + myStorage->a = (myStorage->a + 1) % 256; + myStorage->b[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] += 0.0199999995529651641845703125f; +} + diff --git a/reference/opt/shaders-opencl/comp/image.comp b/reference/opt/shaders-opencl/comp/image.comp new file mode 100644 index 000000000..da5e16cf5 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/image.comp @@ -0,0 +1,11 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(read_only image2d_t uImageIn, write_only image2d_t uImageOut) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + int2 _23 = as_int2(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).xy); + write_imagef(uImageOut, _23, read_imagef(uImageIn, _23 + (int2)(get_image_width(uImageIn), get_image_height(uImageIn)))); +} + diff --git a/reference/opt/shaders-opencl/comp/insert.comp b/reference/opt/shaders-opencl/comp/insert.comp new file mode 100644 index 000000000..930313528 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/insert.comp @@ -0,0 +1,18 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 out_data[1]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float4* _27) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _27[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = (float4)(10.0f, 30.0f, 70.0f, 90.0f); + _27[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x][1u] = 20.0f; +} + diff --git a/reference/opt/shaders-opencl/comp/local-invocation-id.comp b/reference/opt/shaders-opencl/comp/local-invocation-id.comp new file mode 100644 index 000000000..0def2374c --- /dev/null +++ b/reference/opt/shaders-opencl/comp/local-invocation-id.comp @@ -0,0 +1,18 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct myBlock +{ + int a; + float b[1]; +}; + +typedef struct myBlock myBlock; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global myBlock* myStorage) +{ + myStorage->a = (myStorage->a + 1) % 256; + myStorage->b[((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))).x] += 0.0199999995529651641845703125f; +} + diff --git a/reference/opt/shaders-opencl/comp/local-invocation-index.comp b/reference/opt/shaders-opencl/comp/local-invocation-index.comp new file mode 100644 index 000000000..0a1a8ed3c --- /dev/null +++ b/reference/opt/shaders-opencl/comp/local-invocation-index.comp @@ -0,0 +1,18 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct myBlock +{ + int a; + float b[1]; +}; + +typedef struct myBlock myBlock; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global myBlock* myStorage) +{ + myStorage->a = (myStorage->a + 1) % 256; + myStorage->b[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += 0.0199999995529651641845703125f; +} + diff --git a/reference/opt/shaders-opencl/comp/local-size-duplicate-spec-id.comp b/reference/opt/shaders-opencl/comp/local-size-duplicate-spec-id.comp new file mode 100644 index 000000000..99b804f76 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/local-size-duplicate-spec-id.comp @@ -0,0 +1,30 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct StorageBuffer +{ + uint values[1]; +}; + +typedef struct StorageBuffer StorageBuffer; + +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 1u +#endif +constant uint _22 = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 1u +#endif +constant uint _23 = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 1u +#endif +constant uint _24 = SPIRV_CROSS_CONSTANT_ID_2; +constant uint3 spvWorkgroupSize = (uint3)(_22, _23, _24); + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* ssbo) +{ + ssbo[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = 1u; +} + diff --git a/reference/opt/shaders-opencl/comp/mod.comp b/reference/opt/shaders-opencl/comp/mod.comp new file mode 100644 index 000000000..c52e0e95f --- /dev/null +++ b/reference/opt/shaders-opencl/comp/mod.comp @@ -0,0 +1,27 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const float4* _23, __global float4* _33) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float4 _29 = _23[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + _33[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = fmod(_29, _33[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]); + _33[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = as_float4(as_uint4(_29) % as_uint4(_33[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x])); + _33[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = as_float4(as_int4(_29) % as_int4(_33[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x])); +} + diff --git a/reference/opt/shaders-opencl/comp/modf.comp b/reference/opt/shaders-opencl/comp/modf.comp new file mode 100644 index 000000000..de38e3aa2 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/modf.comp @@ -0,0 +1,34 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct ResType +{ + float4 _m0; + float4 _m1; +}; + +typedef struct ResType ResType; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const float4* _23, __global float4* _38) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + ResType _32; + _32._m0 = modf(_23[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x], &_32._m1); + _38[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _32._m0; +} + diff --git a/reference/opt/shaders-opencl/comp/outer-product.comp b/reference/opt/shaders-opencl/comp/outer-product.comp new file mode 100644 index 000000000..4462fc221 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/outer-product.comp @@ -0,0 +1,45 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float2 m22; + float3 m23; + float4 m24; + float2 m32; + float3 m33; + float4 m34; + float2 m42; + float3 m43; + float4 m44; +}; + +typedef struct SSBO SSBO; + +struct ReadSSBO +{ + float2 v2; + float3 v3; + float4 v4; +}; + +typedef struct ReadSSBO ReadSSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _21, __global const ReadSSBO* _26) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float2 _29 = _26->v2; + _21->m22 = _29 * _29.x; + float3 _38 = _26->v3; + _21->m23 = _38 * _29.x; + float4 _47 = _26->v4; + _21->m24 = _47 * _29.x; + _21->m32 = _29 * _38.x; + _21->m33 = _38 * _38.x; + _21->m34 = _47 * _38.x; + _21->m42 = _29 * _47.x; + _21->m43 = _38 * _47.x; + _21->m44 = _47 * _47.x; +} + diff --git a/reference/opt/shaders-opencl/comp/packing-test-1.comp b/reference/opt/shaders-opencl/comp/packing-test-1.comp new file mode 100644 index 000000000..9955dd0ea --- /dev/null +++ b/reference/opt/shaders-opencl/comp/packing-test-1.comp @@ -0,0 +1,32 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct T1 +{ + float3 a; + float b; +}; + +typedef struct T1 T1; + +struct Buffer0 +{ + T1 buf0[1]; +}; + +typedef struct Buffer0 Buffer0; + +struct Buffer1 +{ + float buf1[1]; +}; + +typedef struct Buffer1 Buffer1; + +__attribute__((reqd_work_group_size(32, 1, 1))) +__kernel void comp_main(__global T1* _15, __global float* _34) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _34[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _15[0].b; +} + diff --git a/reference/opt/shaders-opencl/comp/packing-test-2.comp b/reference/opt/shaders-opencl/comp/packing-test-2.comp new file mode 100644 index 000000000..224b89a54 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/packing-test-2.comp @@ -0,0 +1,32 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct T1 +{ + float3 a; + float b; +}; + +typedef struct T1 T1; + +struct Buffer0 +{ + T1 buf0[1]; +}; + +typedef struct Buffer0 Buffer0; + +struct Buffer1 +{ + float buf1[1]; +}; + +typedef struct Buffer1 Buffer1; + +__attribute__((reqd_work_group_size(32, 1, 1))) +__kernel void comp_main(__global T1* _14, __global float* _24) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _24[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _14[0].b; +} + diff --git a/reference/opt/shaders-opencl/comp/read-write-only.comp b/reference/opt/shaders-opencl/comp/read-write-only.comp new file mode 100644 index 000000000..6b54b862b --- /dev/null +++ b/reference/opt/shaders-opencl/comp/read-write-only.comp @@ -0,0 +1,35 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO2 +{ + float4 data4; + float4 data5; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO0 +{ + float4 data0; + float4 data1; +}; + +typedef struct SSBO0 SSBO0; + +struct SSBO1 +{ + float4 data2; + float4 data3; +}; + +typedef struct SSBO1 SSBO1; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO2* _10, __global const SSBO0* _15, __global SSBO1* _21) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _10->data4 = _15->data0 + _21->data2; + _10->data5 = _15->data1 + _21->data3; +} + diff --git a/reference/opt/shaders-opencl/comp/rmw-opt.comp b/reference/opt/shaders-opencl/comp/rmw-opt.comp new file mode 100644 index 000000000..4127d311c --- /dev/null +++ b/reference/opt/shaders-opencl/comp/rmw-opt.comp @@ -0,0 +1,27 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + int a; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global int* _9) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _9[0] += 10; + _9[0] -= 10; + _9[0] *= 10; + _9[0] /= 10; + _9[0] = _9[0] << 2; + _9[0] = _9[0] >> 3; + _9[0] &= 40; + _9[0] ^= 10; + _9[0] %= 40; + _9[0] |= 1; + _9[0] = 0; +} + diff --git a/reference/opt/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp b/reference/opt/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp new file mode 100644 index 000000000..59f3fb7ed --- /dev/null +++ b/reference/opt/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp @@ -0,0 +1,25 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float a; + float b; + float c; + float d; + float e; + float f; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _9) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _9->c = distance(_9->a, _9->b); + _9->d = length(_9->a); + _9->e = normalize(_9->a); + _9->f = distance(_9->a - 1.0f, _9->b - 2.0f); +} + diff --git a/reference/opt/shaders-opencl/comp/shared-std450.double.comp b/reference/opt/shaders-opencl/comp/shared-std450.double.comp new file mode 100644 index 000000000..5859d791c --- /dev/null +++ b/reference/opt/shaders-opencl/comp/shared-std450.double.comp @@ -0,0 +1,29 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +struct SSBO +{ + double in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + double out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global const double* _22, __global double* _44) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local double sShared[4]; + sShared[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = _22[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + mem_fence(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + _44[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = sShared[3u - ((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]; +} + diff --git a/reference/opt/shaders-opencl/comp/shared-struct-bool-cast.comp b/reference/opt/shaders-opencl/comp/shared-struct-bool-cast.comp new file mode 100644 index 000000000..68d589539 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/shared-struct-bool-cast.comp @@ -0,0 +1,65 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct S1 +{ + int3 a; + uint2 b; + int4 c; + uint d; +}; + +typedef struct S1 S1; + +struct block +{ + uint passed; +}; + +typedef struct block block; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _132) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local S1 s1; + s1.a = (int3)(6, 8, 8); + s1.b = (uint2)(4u); + s1.c = (int4)(false, false, false, true); + s1.d = 6u; + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + bool _144 = all((int3)(6, 8, 8) == s1.a); + bool _108; + if (_144) + { + _108 = all((uint2)(4u) == s1.b); + } + else + { + _108 = _144; + } + bool _117; + if (_108) + { + _117 = all((int4)(false, false, false, true) == s1.c); + } + else + { + _117 = _108; + } + bool _126; + if (_117) + { + _126 = 6u == s1.d; + } + else + { + _126 = _117; + } + if (_126) + { + _132[0] += as_uint(1); + } +} + diff --git a/reference/opt/shaders-opencl/comp/shared-zero-init-simple.comp b/reference/opt/shaders-opencl/comp/shared-zero-init-simple.comp new file mode 100644 index 000000000..0bec24063 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/shared-zero-init-simple.comp @@ -0,0 +1,25 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global const float* _22, __global float* _32) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local float sShared; + _32[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = sShared + _22[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; +} + diff --git a/reference/opt/shaders-opencl/comp/shared-zero-init.comp b/reference/opt/shaders-opencl/comp/shared-zero-init.comp new file mode 100644 index 000000000..b587d8f44 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/shared-zero-init.comp @@ -0,0 +1,30 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float in_data[1]; +}; + +typedef struct SSBO SSBO; + +constant float _31[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; + +struct SSBO2 +{ + float out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global const float* _22, __global float* _48) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local float sShared[4]; + sShared[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += _22[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + mem_fence(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + _48[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = sShared[3u - ((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]; +} + diff --git a/reference/opt/shaders-opencl/comp/shared.comp b/reference/opt/shaders-opencl/comp/shared.comp new file mode 100644 index 000000000..836b2bf9f --- /dev/null +++ b/reference/opt/shaders-opencl/comp/shared.comp @@ -0,0 +1,28 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global const float* _22, __global float* _44) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local float sShared[4]; + sShared[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = _22[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + mem_fence(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + _44[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = sShared[3u - ((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]; +} + diff --git a/reference/opt/shaders-opencl/comp/spec-constant-work-group-size.comp b/reference/opt/shaders-opencl/comp/spec-constant-work-group-size.comp new file mode 100644 index 000000000..4bf86f53f --- /dev/null +++ b/reference/opt/shaders-opencl/comp/spec-constant-work-group-size.comp @@ -0,0 +1,39 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 2 +#endif +constant int b = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 1 +#endif +constant int a = SPIRV_CROSS_CONSTANT_ID_0; + +struct SSBO +{ + int v[1]; +}; + +typedef struct SSBO SSBO; + +#define _21 ((as_uint(a) + 0u)) +#ifndef SPIRV_CROSS_CONSTANT_ID_10 +#define SPIRV_CROSS_CONSTANT_ID_10 1u +#endif +constant uint _22 = SPIRV_CROSS_CONSTANT_ID_10; +constant uint3 spvWorkgroupSize = (uint3)(_22, 20u, 1u); +#define _27 (spvWorkgroupSize.x) +#define _28 ((_21 + _27)) +#define _29 (spvWorkgroupSize.y) +#define _30 ((_28 + _29)) +#define _32 ((1 - a)) + +__attribute__((reqd_work_group_size(1, 20, 1))) +__kernel void comp_main(__global int* _17) +{ + int spec_const_array_size[b]; + spec_const_array_size[a] = a; + _17[_30] = b + spec_const_array_size[_32]; +} + diff --git a/reference/opt/shaders-opencl/comp/struct-layout.comp b/reference/opt/shaders-opencl/comp/struct-layout.comp new file mode 100644 index 000000000..39cabe2a8 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/struct-layout.comp @@ -0,0 +1,31 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Foo +{ + float4 m; +}; + +typedef struct Foo Foo; + +struct SSBO2 +{ + Foo out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO +{ + Foo in_data[1]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global Foo* _23, __global const Foo* _30) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _23[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].m = _30[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].m * _30[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].m; +} + diff --git a/reference/opt/shaders-opencl/comp/struct-nested.comp b/reference/opt/shaders-opencl/comp/struct-nested.comp new file mode 100644 index 000000000..264ad2ddb --- /dev/null +++ b/reference/opt/shaders-opencl/comp/struct-nested.comp @@ -0,0 +1,31 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct s1 +{ + int a; +}; + +typedef struct s1 s1; + +struct s2 +{ + s1 b; +}; + +typedef struct s2 s2; + +struct dstbuffer +{ + s2 test[1]; +}; + +typedef struct dstbuffer dstbuffer; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global s2* _19) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _19[0].b.a = 0; +} + diff --git a/reference/opt/shaders-opencl/comp/struct-packing.invalid.comp b/reference/opt/shaders-opencl/comp/struct-packing.invalid.comp new file mode 100644 index 000000000..e69de29bb diff --git a/reference/opt/shaders-opencl/comp/torture-loop.comp b/reference/opt/shaders-opencl/comp/torture-loop.comp new file mode 100644 index 000000000..1ca9606c7 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/torture-loop.comp @@ -0,0 +1,46 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 mvp; + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const SSBO* _24, __global float4* _89) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float4 _101; + _101 = _24->in_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + for (int _95 = 0; (_95 + 1) < 10; ) + { + _101 *= 2.0f; + _95 += 2; + continue; + } + float4 _100; + _100 = _101; + float4 _105; + for (uint _96 = 0u; _96 < 16u; _100 = _105, _96 += as_uint(1)) + { + _105 = _100; + for (uint _102 = 0u; _102 < 30u; ) + { + _105 = _24->mvp * _105; + _102 += as_uint(1); + continue; + } + } + _89[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _100; +} + diff --git a/reference/opt/shaders-opencl/comp/type-alias.comp b/reference/opt/shaders-opencl/comp/type-alias.comp new file mode 100644 index 000000000..32329cb1a --- /dev/null +++ b/reference/opt/shaders-opencl/comp/type-alias.comp @@ -0,0 +1,45 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct S0 +{ + float4 a; +}; + +typedef struct S0 S0; + +struct SSBO0 +{ + S0 s0s[1]; +}; + +typedef struct SSBO0 SSBO0; + +struct S1 +{ + float4 a; +}; + +typedef struct S1 S1; + +struct SSBO1 +{ + S1 s1s[1]; +}; + +typedef struct SSBO1 SSBO1; + +struct SSBO2 +{ + float4 outputs[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global S0* _36, __global S1* _55, __global float4* _66) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _66[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _36[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].a + _55[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].a; +} + diff --git a/reference/opt/shaders-opencl/comp/udiv.comp b/reference/opt/shaders-opencl/comp/udiv.comp new file mode 100644 index 000000000..7e336b9b4 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/udiv.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO2 +{ + uint outputs[1]; +}; + +typedef struct SSBO2 SSBO2; + +struct SSBO +{ + uint inputs[1]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _10, __global uint* _23) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _10[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = _23[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] / 29u; +} + diff --git a/reference/opt/shaders-opencl/comp/writable-ssbo.comp b/reference/opt/shaders-opencl/comp/writable-ssbo.comp new file mode 100644 index 000000000..30716e427 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/writable-ssbo.comp @@ -0,0 +1,18 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct myBlock +{ + int a; + float b; +}; + +typedef struct myBlock myBlock; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global myBlock* myStorage) +{ + myStorage->a = (myStorage->a + 1) % 256; + myStorage->b += 0.0199999995529651641845703125f; +} + From 6f820620b72c08a0ab7075bc2f8d1cf479057379 Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Fri, 13 Mar 2026 13:49:12 -0700 Subject: [PATCH 04/16] OpenCL: Adding C API, cleanup --- .../asm/comp/variable-pointers-2.asm.comp | 9 +- ...ariable-pointers-store-forwarding.asm.comp | 5 +- .../vector-builtin-type-cast-func.asm.comp | 5 +- .../access-private-workgroup-in-function.comp | 10 +- spirv_cross_c.cpp | 36 +++++++ spirv_cross_c.h | 9 +- spirv_glsl.hpp | 12 +-- spirv_opencl.cpp | 93 +++++++++++++++---- spirv_opencl.hpp | 6 +- test_shaders.sh | 1 + 10 files changed, 144 insertions(+), 42 deletions(-) diff --git a/reference/shaders-opencl/asm/comp/variable-pointers-2.asm.comp b/reference/shaders-opencl/asm/comp/variable-pointers-2.asm.comp index 3bfb4fcbd..fa9b7c971 100644 --- a/reference/shaders-opencl/asm/comp/variable-pointers-2.asm.comp +++ b/reference/shaders-opencl/asm/comp/variable-pointers-2.asm.comp @@ -22,15 +22,16 @@ __global foo* select_buffer(__global foo* a_1_1, bar cb) return (cb.d != 0) ? a_1_1 : NULL; } +#define _3 (*_3_ptr) +#define _4 (*_4_ptr) __private uint3* select_input(__private uint3* _3_ptr, __private uint3* _4_ptr, bar cb) { - #define _3 (*_3_ptr) - #define _4 (*_4_ptr) return (cb.d != 0) ? &_3 : &_4; - #undef _3 - #undef _4 } +#undef _3 +#undef _4 + __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global foo* buf, bar cb) { diff --git a/reference/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp b/reference/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp index cbc654c1c..f9a3b49ec 100644 --- a/reference/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp +++ b/reference/shaders-opencl/asm/comp/variable-pointers-store-forwarding.asm.comp @@ -15,13 +15,14 @@ struct bar typedef struct bar bar; +#define _3 (*_3_ptr) __global int* _24(__global foo* a_1, __global bar* b_1, __private uint3* _3_ptr) { - #define _3 (*_3_ptr) return (_3.x != 0u) ? &a_1->a : &b_1->b; - #undef _3 } +#undef _3 + __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global int* x, __global int* y) { diff --git a/reference/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp b/reference/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp index d4f5be9be..61d0b595f 100644 --- a/reference/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp +++ b/reference/shaders-opencl/asm/comp/vector-builtin-type-cast-func.asm.comp @@ -8,13 +8,14 @@ struct cb1_struct typedef struct cb1_struct cb1_struct; +#define _RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID (*_RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID_ptr) int2 get_texcoord( int2* base, int2* index, __private int3* _RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID_ptr) { - #define _RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID (*_RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID_ptr) return ((*base) * as_int3(_RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID).xy) + (*index); - #undef _RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID } +#undef _RESERVED_IDENTIFIER_FIXUP_gl_LocalInvocationID + __attribute__((reqd_work_group_size(16, 16, 1))) __kernel void comp_main(write_only image2d_t u0, cb1_struct cb0_1) { diff --git a/reference/shaders-opencl/comp/access-private-workgroup-in-function.comp b/reference/shaders-opencl/comp/access-private-workgroup-in-function.comp index 4aeedb66b..7a532044f 100644 --- a/reference/shaders-opencl/comp/access-private-workgroup-in-function.comp +++ b/reference/shaders-opencl/comp/access-private-workgroup-in-function.comp @@ -1,20 +1,22 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) +#define f (*f_ptr) void set_f(int* f_ptr) { - #define f (*f_ptr) f = 40; - #undef f } +#undef f + +#define u (*u_ptr) void set_shared_u(__local int* u_ptr) { - #define u (*u_ptr) u = 50; - #undef u } +#undef u + __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main() { diff --git a/spirv_cross_c.cpp b/spirv_cross_c.cpp index 4494700ed..f49366ac4 100644 --- a/spirv_cross_c.cpp +++ b/spirv_cross_c.cpp @@ -170,6 +170,9 @@ struct spvc_compiler_options_s : ScratchMemoryAllocation #if SPIRV_CROSS_C_API_HLSL CompilerHLSL::Options hlsl; #endif +#if SPIRV_CROSS_C_API_OPENCL + CompilerOpenCL::Options opencl; +#endif }; struct spvc_set_s : ScratchMemoryAllocation @@ -394,6 +397,14 @@ spvc_result spvc_compiler_create_compiler_options(spvc_compiler compiler, spvc_c break; #endif +#if SPIRV_CROSS_C_API_OPENCL + case SPVC_BACKEND_OPENCL: + opt->backend_flags |= SPVC_COMPILER_OPTION_OPENCL_BIT | SPVC_COMPILER_OPTION_COMMON_BIT; + opt->glsl = static_cast(compiler->compiler.get())->get_common_options(); + opt->opencl = static_cast(compiler->compiler.get())->get_opencl_options(); + break; +#endif + #if SPIRV_CROSS_C_API_GLSL case SPVC_BACKEND_GLSL: opt->backend_flags |= SPVC_COMPILER_OPTION_GLSL_BIT | SPVC_COMPILER_OPTION_COMMON_BIT; @@ -797,6 +808,24 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c break; #endif +#if SPIRV_CROSS_C_API_OPENCL + case SPVC_COMPILER_OPTION_OPENCL_VERSION: + options->opencl.opencl_version = value; + break; + case SPVC_COMPILER_OPTION_OPENCL_ENABLE_FP64: + options->opencl.enable_fp64 = value != 0; + break; + case SPVC_COMPILER_OPTION_OPENCL_ENABLE_64BIT_ATOMICS: + options->opencl.enable_64bit_atomics = value != 0; + break; + case SPVC_COMPILER_OPTION_OPENCL_ENABLE_SUBGROUPS: + options->opencl.enable_subgroups = value != 0; + break; + case SPVC_COMPILER_OPTION_OPENCL_ENABLE_SHUFFLE: + options->opencl.enable_shuffle = value != 0; + break; +#endif + default: options->context->report_error("Unknown option."); return SPVC_ERROR_INVALID_ARGUMENT; @@ -830,6 +859,13 @@ spvc_result spvc_compiler_install_compiler_options(spvc_compiler compiler, spvc_ break; #endif +#if SPIRV_CROSS_C_API_OPENCL + case SPVC_BACKEND_OPENCL: + static_cast(*compiler->compiler).set_common_options(options->glsl); + static_cast(*compiler->compiler).set_opencl_options(options->opencl); + break; +#endif + default: break; } diff --git a/spirv_cross_c.h b/spirv_cross_c.h index 76d2b8155..c59c299d0 100644 --- a/spirv_cross_c.h +++ b/spirv_cross_c.h @@ -272,7 +272,8 @@ extern "C" #define SPVC_COMPILER_OPTION_GLSL_BIT 0x2000000 #define SPVC_COMPILER_OPTION_HLSL_BIT 0x4000000 #define SPVC_COMPILER_OPTION_MSL_BIT 0x8000000 -#define SPVC_COMPILER_OPTION_LANG_BITS 0x0f000000 +#define SPVC_COMPILER_OPTION_OPENCL_BIT 0x10000000 +#define SPVC_COMPILER_OPTION_LANG_BITS 0x1f000000 #define SPVC_COMPILER_OPTION_ENUM_BITS 0xffffff #define SPVC_MAKE_MSL_VERSION(major, minor, patch) ((major) * 10000 + (minor) * 100 + (patch)) @@ -757,6 +758,12 @@ extern "C" SPVC_COMPILER_OPTION_HLSL_USER_SEMANTIC = 94 | SPVC_COMPILER_OPTION_HLSL_BIT, + SPVC_COMPILER_OPTION_OPENCL_VERSION = 95 | SPVC_COMPILER_OPTION_OPENCL_BIT, + SPVC_COMPILER_OPTION_OPENCL_ENABLE_FP64 = 96 | SPVC_COMPILER_OPTION_OPENCL_BIT, + SPVC_COMPILER_OPTION_OPENCL_ENABLE_64BIT_ATOMICS = 97 | SPVC_COMPILER_OPTION_OPENCL_BIT, + SPVC_COMPILER_OPTION_OPENCL_ENABLE_SUBGROUPS = 98 | SPVC_COMPILER_OPTION_OPENCL_BIT, + SPVC_COMPILER_OPTION_OPENCL_ENABLE_SHUFFLE = 99 | SPVC_COMPILER_OPTION_OPENCL_BIT, + SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff } spvc_compiler_option; diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index 4773595db..24e34d7b0 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -396,23 +396,13 @@ class CompilerGLSL : public Compiler static bool is_supported_subgroup_op_in_opengl(Op op, const uint32_t *ops); void reset(uint32_t iteration_count); - void emit_function(SPIRFunction &func, const Bitset &return_flags); + virtual void emit_function(SPIRFunction &func, const Bitset &return_flags); bool has_extension(const std::string &ext) const; void require_extension_internal(const std::string &ext); // Virtualize methods which need to be overridden by subclass targets like C++ and such. virtual void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags); - // Called right after the opening { of a non-entry helper function body. - // Override to emit per-function preamble declarations (e.g. #define aliases). - virtual void emit_function_local_declarations(SPIRFunction &) - { - } - // Called right before the closing } of a non-entry helper function body. - // Override to clean up anything emitted by emit_function_local_declarations. - virtual void emit_function_local_epilogue(SPIRFunction &) - { - } SPIRBlock *current_emitting_block = nullptr; SmallVector current_emitting_switch_stack; diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp index 522ba7d92..435979c1b 100644 --- a/spirv_opencl.cpp +++ b/spirv_opencl.cpp @@ -159,6 +159,31 @@ void CompilerOpenCL::emit_header() statement("#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable"); statement(""); + // Emit FP_CONTRACT pragma based on ContractionOff execution mode and FPFastMathDefault. + { + auto &ep = get_entry_point(); + bool contract = true; + + if (ep.flags.get(ExecutionModeContractionOff)) + contract = false; + + for (auto &fp_pair : ep.fp_fast_math_defaults) + { + if (fp_pair.second) + { + uint32_t flags = get(fp_pair.second).scalar(); + if (!(flags & FPFastMathModeAllowContractMask)) + contract = false; + } + } + + if (!contract) + { + statement("#pragma OPENCL FP_CONTRACT OFF"); + statement(""); + } + } + for (auto &header : header_lines) statement(header); if (!header_lines.empty()) @@ -1594,12 +1619,11 @@ void CompilerOpenCL::append_global_func_args(const SPIRFunction &func, uint32_t } } -void CompilerOpenCL::emit_function_local_declarations(SPIRFunction &func) +void CompilerOpenCL::emit_function(SPIRFunction &func, const Bitset &return_flags) { - // For helper functions that access workgroup/private global scalar variables via pointer params: - // emit #define var_name (*var_name_ptr) so that existing expressions (e.g. "u = 50;") - // transparently dereference the pointer parameter. + // Emit #define macros before the function for workgroup scalar pointer aliasing. auto wg_it = func_workgroup_args.find(func.self); + bool has_defines = false; if (wg_it != func_workgroup_args.end()) { for (auto var_id : wg_it->second) @@ -1608,24 +1632,38 @@ void CompilerOpenCL::emit_function_local_declarations(SPIRFunction &func) { auto var_name = to_name(var_id); statement("#define ", var_name, " (*", var_name, "_ptr)"); + has_defines = true; } } } -} -void CompilerOpenCL::emit_function_local_epilogue(SPIRFunction &func) -{ - auto wg_it = func_workgroup_args.find(func.self); - if (wg_it != func_workgroup_args.end()) + CompilerGLSL::emit_function(func, return_flags); + + // Emit #undef after the function. + if (has_defines) { for (auto var_id : wg_it->second) { if (workgroup_scalar_vars.count(var_id)) statement("#undef ", to_name(var_id)); } + statement(""); } } +void CompilerOpenCL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const string &qualifier, uint32_t) +{ + auto &membertype = get(member_type_id); + // OpenCL C does not use GLSL layout qualifiers or interpolation qualifiers. + statement(qualifier, variable_decl(membertype, to_member_name(type, index)), ";"); +} + +void CompilerOpenCL::emit_block_hints(const SPIRBlock &) +{ + // OpenCL C has no control-flow hint attributes; suppress SPIRV_CROSS_BRANCH/FLATTEN etc. +} + void CompilerOpenCL::emit_specialization_constants_and_structs() { bool emitted = false; @@ -1850,7 +1888,7 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) auto opencl_atomic = [this, ops](const char *opencl_op) { if (check_atomic_image(ops[2])) - SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + SPIRV_CROSS_THROW("Image atomics are not supported in OpenCL."); emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], opencl_op); }; @@ -2091,12 +2129,12 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) case OpAtomicExchange: if (check_atomic_image(ops[2])) - SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + SPIRV_CROSS_THROW("Image atomics are not supported in OpenCL."); emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], "atomic_xchg"); break; case OpAtomicCompareExchange: if (check_atomic_image(ops[2])) - SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + SPIRV_CROSS_THROW("Image atomics are not supported in OpenCL."); // OpenCL atomic_cmpxchg(&ptr, expected, desired) forced_temporaries.insert(ops[1]); emit_op(ops[0], ops[1], @@ -2112,7 +2150,7 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) case OpAtomicISub: { if (check_atomic_image(ops[2])) - SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + SPIRV_CROSS_THROW("Image atomics are not supported in OpenCL."); forced_temporaries.insert(ops[1]); auto expr = join("atomic_sub(", to_atomic_ptr_expression(ops[2]), ", ", to_enclosed_expression(ops[5]), ")"); emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); @@ -2139,7 +2177,7 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) case OpAtomicLoad: { if (check_atomic_image(ops[2])) - SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + SPIRV_CROSS_THROW("Image atomics are not supported in OpenCL."); auto &type = expression_type(ops[2]); forced_temporaries.insert(ops[1]); bool unsigned_type = (type.basetype == SPIRType::UInt); @@ -2151,7 +2189,7 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) case OpAtomicStore: { if (check_atomic_image(ops[0])) - SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + SPIRV_CROSS_THROW("Image atomics are not supported in OpenCL."); statement("atomic_xchg(", to_atomic_ptr_expression(ops[0]), ", ", to_expression(ops[3]), ");"); flush_all_atomic_capable_variables(); break; @@ -2160,7 +2198,7 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) case OpAtomicIDecrement: { if (check_atomic_image(ops[2])) - SPIRV_CROSS_THROW("Image atomics not yet implemented for OpenCL."); + SPIRV_CROSS_THROW("Image atomics are not supported in OpenCL."); forced_temporaries.insert(ops[1]); auto &type = expression_type(ops[2]); bool unsigned_type = (type.basetype == SPIRType::UInt); @@ -2656,6 +2694,29 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) break; } + case OpPtrEqual: + case OpPtrNotEqual: + case OpPtrDiff: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + const char *op = ""; + if (opcode == OpPtrEqual) + op = "=="; + else if (opcode == OpPtrNotEqual) + op = "!="; + else if (opcode == OpPtrDiff) + op = "-"; + bool forward = should_forward(op0) && should_forward(op1); + emit_op(result_type, result_id, join(to_pointer_expression(op0), " ", op, " ", to_pointer_expression(op1)), + forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + break; + } + default: CompilerGLSL::emit_instruction(instruction); break; diff --git a/spirv_opencl.hpp b/spirv_opencl.hpp index 90a4291e0..defe75032 100644 --- a/spirv_opencl.hpp +++ b/spirv_opencl.hpp @@ -126,8 +126,10 @@ class CompilerOpenCL : public CompilerGLSL uint32_t get_physical_type_id_stride(TypeID type_id) const override; void replace_illegal_names() override; - void emit_function_local_declarations(SPIRFunction &func) override; - void emit_function_local_epilogue(SPIRFunction &func) override; + void emit_function(SPIRFunction &func, const Bitset &return_flags) override; + void emit_block_hints(const SPIRBlock &block) override; + void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const std::string &qualifier = "", uint32_t base_offset = 0) override; Options opencl_options; diff --git a/test_shaders.sh b/test_shaders.sh index a054710ed..a6dd41c14 100755 --- a/test_shaders.sh +++ b/test_shaders.sh @@ -23,6 +23,7 @@ echo "Using SPIRV-Cross in: \"$SPIRV_CROSS_PATH\"." ./test_shaders.py shaders-msl-no-opt ${OPTS} --msl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 ./test_shaders.py shaders-opencl ${OPTS} --opencl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 ./test_shaders.py shaders-opencl ${OPTS} --opencl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 +./test_shaders.py shaders-opencl-no-opt ${OPTS} --opencl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 ./test_shaders.py shaders-hlsl ${OPTS} --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 ./test_shaders.py shaders-hlsl ${OPTS} --hlsl --opt --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 ./test_shaders.py shaders-hlsl-no-opt ${OPTS} --hlsl --spirv-cross "$SPIRV_CROSS_PATH" || exit 1 From 6586da175224b9d8eab2d12f3cdb77dc221d1d0c Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Fri, 13 Mar 2026 22:30:11 -0700 Subject: [PATCH 05/16] OpenCL: more GLSL mappings --- spirv_opencl.cpp | 590 ++++++++++++++++++++++++++++++++++++++++++++++- spirv_opencl.hpp | 2 + 2 files changed, 591 insertions(+), 1 deletion(-) diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp index 435979c1b..5a72f14e4 100644 --- a/spirv_opencl.cpp +++ b/spirv_opencl.cpp @@ -477,6 +477,19 @@ void CompilerOpenCL::emit_resources() statement(""); } + // Polyfill for bitfieldReverse (32-bit scalar only — vectors call per-component). + if (needs_bitreverse_polyfill) + { + statement("uint spvBitReverse(uint v) {"); + statement(" v = ((v >> 1u) & 0x55555555u) | ((v & 0x55555555u) << 1u);"); + statement(" v = ((v >> 2u) & 0x33333333u) | ((v & 0x33333333u) << 2u);"); + statement(" v = ((v >> 4u) & 0x0F0F0F0Fu) | ((v & 0x0F0F0F0Fu) << 4u);"); + statement(" v = ((v >> 8u) & 0x00FF00FFu) | ((v & 0x00FF00FFu) << 8u);"); + statement(" return (v >> 16u) | (v << 16u);"); + statement("}"); + statement(""); + } + // Default sampler for combined image+sampler usage (OpenCL C requires file-scope const sampler_t). if (needs_default_sampler) { @@ -854,6 +867,9 @@ string CompilerOpenCL::get_type_address_space(const SPIRType &type, uint32_t id, case StorageClassWorkgroup: addr_space = "__local"; break; + case StorageClassPhysicalStorageBuffer: + addr_space = "__global"; + break; case StorageClassInput: // Input builtins materialized as __private local variables. addr_space = "__private"; @@ -1209,6 +1225,132 @@ void CompilerOpenCL::emit_glsl_op(uint32_t result_type, uint32_t result_id, uint emit_unary_func_op(result_type, result_id, args[0], "spvUnpackHalf2x16"); break; + case GLSLstd450SAbs: + { + // OpenCL abs() on integer types returns unsigned. Need bitcast back to signed if result is signed. + auto &out_type = get(result_type); + auto &expr_type = expression_type(args[0]); + + // Cast input to signed if needed. + string input_expr; + auto expected_basetype = to_signed_basetype(expr_type.width); + if (expr_type.basetype != expected_basetype) + input_expr = bitcast_expression(expected_basetype, args[0]); + else + input_expr = to_expression(args[0]); + + string expr = join("abs(", input_expr, ")"); + + // abs() returns unsigned in OpenCL. Cast to result type if it's signed. + auto unsigned_basetype = to_unsigned_basetype(expr_type.width); + if (out_type.basetype != unsigned_basetype) + { + // Build the unsigned return type to bitcast from. + SPIRType abs_ret_type = out_type; + abs_ret_type.basetype = unsigned_basetype; + expr = join(bitcast_glsl_op(out_type, abs_ret_type), "(", expr, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(args[0])); + inherit_expression_dependencies(result_id, args[0]); + break; + } + + case GLSLstd450SSign: + { + // OpenCL has no integer sign(). Use clamp(x, -1, 1). + auto &expr_type = expression_type(args[0]); + auto &out_type = get(result_type); + + auto expected_basetype = to_signed_basetype(expr_type.width); + string input_expr; + if (expr_type.basetype != expected_basetype) + input_expr = bitcast_expression(expected_basetype, args[0]); + else + input_expr = to_expression(args[0]); + + string expr = join("clamp(", input_expr, ", -1, 1)"); + + // Cast to result type if needed (e.g. result is unsigned). + if (out_type.basetype != expected_basetype) + { + SPIRType signed_type = out_type; + signed_type.basetype = expected_basetype; + expr = join(bitcast_glsl_op(out_type, signed_type), "(", expr, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(args[0])); + inherit_expression_dependencies(result_id, args[0]); + break; + } + + case GLSLstd450FindSMsb: + { + // GLSL findMSB for signed: position of highest bit that differs from sign bit. + // OpenCL: (W-1) - clz(x ^ (x >> (W-1))) + // x >> (W-1) is arithmetic shift: 0 for positive, -1 for negative. + // x ^ -1 = ~x, x ^ 0 = x. So this gives clz(x) for positive, clz(~x) for negative. + auto &expr_type = expression_type(args[0]); + auto &out_type = get(result_type); + uint32_t width = expr_type.width; + + // Input must be signed for arithmetic right shift. + auto signed_basetype = to_signed_basetype(width); + SPIRType signed_type = expr_type; + signed_type.basetype = signed_basetype; + + string input_expr; + if (expr_type.basetype != signed_basetype) + input_expr = bitcast_expression(signed_basetype, args[0]); + else + input_expr = to_enclosed_expression(args[0]); + + string xor_expr = join(input_expr, " ^ (", input_expr, " >> ", width - 1, ")"); + string expr = join(width - 1, " - clz(", xor_expr, ")"); + + // clz on signed type returns signed, so result is signed. Cast if output is unsigned. + if (out_type.basetype != signed_basetype) + expr = join(bitcast_glsl_op(out_type, signed_type), "(", expr, ")"); + + emit_op(result_type, result_id, expr, should_forward(args[0])); + inherit_expression_dependencies(result_id, args[0]); + break; + } + + case GLSLstd450FindUMsb: + { + // GLSL findMSB for unsigned: position of highest set bit, -1 for 0. + // OpenCL: (W-1) - clz(x). clz(0) = W, so result = -1 for 0. + auto &expr_type = expression_type(args[0]); + auto &out_type = get(result_type); + uint32_t width = expr_type.width; + + auto unsigned_basetype = to_unsigned_basetype(width); + string input_expr; + if (expr_type.basetype != unsigned_basetype) + input_expr = bitcast_expression(unsigned_basetype, args[0]); + else + input_expr = to_expression(args[0]); + + // Cast to signed for the subtraction so result can be -1. + auto signed_basetype = to_signed_basetype(width); + SPIRType signed_type = out_type; + signed_type.basetype = signed_basetype; + string clz_expr = join("as_", type_to_glsl(signed_type), "(clz(", input_expr, "))"); + + string expr = join(width - 1, " - ", clz_expr); + + // findMSB returns int (signed). Cast if output type differs. + if (out_type.basetype != signed_basetype) + { + expr = join(bitcast_glsl_op(out_type, signed_type), "(", expr, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(args[0])); + inherit_expression_dependencies(result_id, args[0]); + break; + } + default: CompilerGLSL::emit_glsl_op(result_type, result_id, op, args, count); break; @@ -1223,6 +1365,13 @@ std::string CompilerOpenCL::bitcast_glsl_op(const SPIRType &out_type, const SPIR if (out_type.basetype == in_type.basetype) return ""; + // Pointer types are handled by emit_instruction for OpBitcast. + // If we get here as a fallback, use a simple C-style cast. + if (is_pointer(out_type)) + return join("(", type_to_glsl(out_type), ")"); + if (is_pointer(in_type)) + return "as_ulong"; + // All bitcasts (float↔int, int↔uint, half↔short, etc.) use as_TYPE() in OpenCL C. // type_to_glsl gives us the full type name including vector size (e.g. "float4", "uint"). auto out_name = type_to_glsl(out_type); @@ -1656,7 +1805,16 @@ void CompilerOpenCL::emit_struct_member(const SPIRType &type, uint32_t member_ty { auto &membertype = get(member_type_id); // OpenCL C does not use GLSL layout qualifiers or interpolation qualifiers. - statement(qualifier, variable_decl(membertype, to_member_name(type, index)), ";"); + // PhysicalStorageBuffer pointers in structs must be emitted as ulong since + // OpenCL C does not allow pointer types in kernel parameter structs. + if (is_pointer(membertype) && membertype.storage == StorageClassPhysicalStorageBuffer) + { + statement(qualifier, "ulong ", to_member_name(type, index), ";"); + } + else + { + statement(qualifier, variable_decl(membertype, to_member_name(type, index)), ";"); + } } void CompilerOpenCL::emit_block_hints(const SPIRBlock &) @@ -1947,6 +2105,26 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) inherit_expression_dependencies(result_id, ptr); break; } + // When loading a PhysicalStorageBuffer pointer from a struct member that was + // emitted as ulong (because OpenCL doesn't allow pointer types in kernel struct params), + // cast the loaded ulong value to the typed pointer. + { + auto &result_type_obj = get(ops[0]); + if (is_pointer(result_type_obj) && result_type_obj.storage == StorageClassPhysicalStorageBuffer) + { + auto *expr = maybe_get(ptr); + if (expr && expr->access_chain) + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + auto ptr_type_str = type_to_glsl(result_type_obj); + emit_op(result_type, result_id, join("((", ptr_type_str, ")(", to_expression(ptr), "))"), + should_forward(ptr)); + inherit_expression_dependencies(result_id, ptr); + break; + } + } + } CompilerGLSL::emit_instruction(instruction); break; } @@ -2210,6 +2388,269 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) flush_all_atomic_capable_variables(); break; } + case OpBitCount: + { + // GLSL bitCount → OpenCL popcount. + // popcount returns the same type as its input in OpenCL (unlike GLSL which returns int). + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t arg = ops[2]; + auto &in_type = expression_type(arg); + auto &out_type = get(result_type); + + string expr = join("popcount(", to_expression(arg), ")"); + + // Cast result if types differ (e.g. popcount(int4) → uint4 needs as_uint4). + if (out_type.basetype != in_type.basetype) + { + expr = join(bitcast_glsl_op(out_type, in_type), "(", expr, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(arg)); + inherit_expression_dependencies(result_id, arg); + break; + } + + case OpBitReverse: + { + // GLSL bitfieldReverse → no OpenCL builtin. + // Use scalar polyfill, call per-component for vectors. + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t arg = ops[2]; + auto &type = get(result_type); + + if (!needs_bitreverse_polyfill) + { + needs_bitreverse_polyfill = true; + force_recompile(); + } + + auto unsigned_basetype = to_unsigned_basetype(type.width); + string input_expr = bitcast_expression(unsigned_basetype, arg); + + string expr; + if (type.vecsize > 1) + { + // Call scalar polyfill per component. + SPIRType uint_type = type; + uint_type.basetype = unsigned_basetype; + expr = join("(", type_to_glsl(uint_type), ")("); + for (uint32_t i = 0; i < type.vecsize; i++) + { + if (i > 0) + expr += ", "; + expr += join("spvBitReverse(", input_expr, ".s", i, ")"); + } + expr += ")"; + } + else + expr = join("spvBitReverse(", input_expr, ")"); + + // Cast back to signed if needed. + if (type.basetype != unsigned_basetype) + { + SPIRType uint_type = type; + uint_type.basetype = unsigned_basetype; + expr = join(bitcast_glsl_op(type, uint_type), "(", expr, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(arg)); + inherit_expression_dependencies(result_id, arg); + break; + } + + case OpBitFieldSExtract: + case OpBitFieldUExtract: + { + // GLSL bitfieldExtract(value, offset, bits) → OpenCL: manual extraction. + // Unsigned: (value >> offset) & ((1u << bits) - 1u) + // Signed: (int)((value >> offset) << (W - bits)) >> (W - bits) [arithmetic shift for sign-extend] + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t value = ops[2]; + uint32_t offset_id = ops[3]; + uint32_t bits_id = ops[4]; + auto &type = get(result_type); + uint32_t width = type.width; + + bool is_signed_extract = (opcode == OpBitFieldSExtract); + + if (is_signed_extract) + { + auto signed_basetype = to_signed_basetype(width); + string val_expr = bitcast_expression(signed_basetype, value); + // Sign-extending extract: shift left to put field at MSB, then arithmetic shift right. + // result = (val << (W - bits - offset)) >> (W - bits) + // Simplified: extract bits, then sign-extend. + string expr = join("(", val_expr, " << (", width, " - ", to_expression(bits_id), " - ", + to_expression(offset_id), ")) >> (", width, " - ", to_expression(bits_id), ")"); + + if (type.basetype != signed_basetype) + { + SPIRType signed_type = type; + signed_type.basetype = signed_basetype; + expr = join(bitcast_glsl_op(type, signed_type), "(", expr, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(value)); + } + else + { + auto unsigned_basetype = to_unsigned_basetype(width); + string val_expr = bitcast_expression(unsigned_basetype, value); + SPIRType uint_type = type; + uint_type.basetype = unsigned_basetype; + auto utype = type_to_glsl(uint_type); + string expr = join("(", val_expr, " >> ", to_expression(offset_id), ") & ((", utype, ")(1u << ", + to_expression(bits_id), ") - (", utype, ")1u)"); + + if (type.basetype != unsigned_basetype) + expr = join(bitcast_glsl_op(type, uint_type), "(", expr, ")"); + + emit_op(result_type, result_id, expr, should_forward(value)); + } + inherit_expression_dependencies(result_id, value); + inherit_expression_dependencies(result_id, offset_id); + inherit_expression_dependencies(result_id, bits_id); + break; + } + + case OpBitFieldInsert: + { + // GLSL bitfieldInsert(base, insert, offset, bits) → OpenCL: manual insertion. + // mask = ((1u << bits) - 1u) << offset + // result = (base & ~mask) | ((insert << offset) & mask) + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t base_id = ops[2]; + uint32_t insert_id = ops[3]; + uint32_t offset_id = ops[4]; + uint32_t bits_id = ops[5]; + auto &type = get(result_type); + + auto unsigned_basetype = to_unsigned_basetype(type.width); + string base_expr = bitcast_expression(unsigned_basetype, base_id); + string insert_expr = bitcast_expression(unsigned_basetype, insert_id); + + SPIRType uint_type = type; + uint_type.basetype = unsigned_basetype; + auto utype = type_to_glsl(uint_type); + + string mask = + join("((", utype, ")(1u << ", to_expression(bits_id), ") - (", utype, ")1u) << ", to_expression(offset_id)); + string expr = join("(", base_expr, " & ~(", mask, ")) | ((", insert_expr, " << ", to_expression(offset_id), + ") & (", mask, "))"); + + if (type.basetype != unsigned_basetype) + expr = join(bitcast_glsl_op(type, uint_type), "(", expr, ")"); + + emit_op(result_type, result_id, expr, should_forward(base_id) && should_forward(insert_id)); + inherit_expression_dependencies(result_id, base_id); + inherit_expression_dependencies(result_id, insert_id); + inherit_expression_dependencies(result_id, offset_id); + inherit_expression_dependencies(result_id, bits_id); + break; + } + + case OpBitcast: + { + auto &out_type = get(ops[0]); + auto &in_type = expression_type(ops[2]); + + // Bitcast involving pointer types needs special handling in OpenCL C. + if (is_pointer(out_type) || is_pointer(in_type)) + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t arg = ops[2]; + + string expr; + if (is_pointer(out_type) && !is_pointer(in_type)) + { + // Non-pointer → pointer: cast via ulong if input is a vector (e.g. uvec2). + auto ptr_type_str = type_to_glsl(out_type); + if (in_type.vecsize > 1) + expr = join("((", ptr_type_str, ")as_ulong(", to_expression(arg), "))"); + else + expr = join("((", ptr_type_str, ")(", to_expression(arg), "))"); + } + else if (!is_pointer(out_type) && is_pointer(in_type)) + { + // Pointer → non-pointer: cast to ulong, then to target type. + if (out_type.vecsize > 1) + expr = join("as_", type_to_glsl(out_type), "((ulong)(", to_expression(arg), "))"); + else + expr = join("(", type_to_glsl(out_type), ")((ulong)(", to_expression(arg), "))"); + } + else + { + // Pointer → pointer: direct C-style cast. + expr = join("((", type_to_glsl(out_type), ")(", to_expression(arg), "))"); + } + + emit_op(result_type, result_id, std::move(expr), should_forward(arg)); + inherit_expression_dependencies(result_id, arg); + break; + } + + CompilerGLSL::emit_instruction(instruction); + break; + } + + case OpPtrAccessChain: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t base_id = ops[2]; + + auto &base_type = expression_type(base_id); + TypeID base_type_id = expression_type_id(base_id); + + // Check if custom stride pointer arithmetic is needed. + if (has_decoration(base_type_id, DecorationArrayStride)) + { + TypeID pointee_type_id = get_pointee_type_id(base_type_id); + uint32_t physical_stride = get_physical_type_id_stride(pointee_type_id); + uint32_t requested_stride = get_decoration(base_type_id, DecorationArrayStride); + + if (physical_stride != requested_stride) + { + // Custom stride: use pointer arithmetic via ulong cast. + // *((__global T*)((ulong)ptr + index * stride)) + uint32_t index_id = ops[3]; + auto &pointee_type = get(pointee_type_id); + auto &ptr_type = get(base_type_id); + auto addr_space = get_type_address_space(ptr_type, 0); + + string base_expr = to_enclosed_expression(base_id); + string intptr_expr = + join("(ulong)(", base_expr, ") + ", to_enclosed_expression(index_id), " * ", requested_stride); + string ptr_cast = join("(", addr_space, " ", type_to_glsl(pointee_type), "*)(", intptr_expr, ")"); + string expr = join("*(", ptr_cast, ")"); + + auto &e = set(result_id, std::move(expr), result_type, should_forward(base_id)); + auto *backing_var = maybe_get_backing_variable(base_id); + e.loaded_from = backing_var ? backing_var->self : ID(base_id); + e.access_chain = true; + forwarded_temporaries.insert(result_id); + suppressed_usage_tracking.insert(result_id); + inherit_expression_dependencies(result_id, base_id); + inherit_expression_dependencies(result_id, index_id); + + // Mark as packed if the vector stride differs from natural alignment. + if (is_vector(pointee_type) && requested_stride != physical_stride) + set_extended_decoration(result_id, SPIRVCrossDecorationPhysicalTypePacked); + + break; + } + } + + // No custom stride — fall through to base class. + CompilerGLSL::emit_instruction(instruction); + break; + } + case OpAccessChain: case OpInBoundsAccessChain: { @@ -2260,6 +2701,16 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) } handled = true; } + else if (length == 5 && !is_single_member && struct_type && !struct_type->array.empty()) + { + // Array of multi-member SSBOs: ptr[array_idx].member_name + // ops[3] = array index (dynamic), ops[4] = member index (constant) + uint32_t mbr_idx = get(ops[4]).scalar(); + auto mbr_name = to_member_name(*struct_type, mbr_idx); + expr = join(to_name(base_id), "[", to_expression(ops[3]), "].", mbr_name); + is_subscript_deref = true; + handled = true; + } else if (length == 5 && !is_single_member && struct_type) { // Multi-member SSBO: ptr->member_name[element_idx] @@ -2277,6 +2728,13 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) is_subscript_deref = true; handled = true; } + else if (length == 4 && !is_single_member && struct_type && !struct_type->array.empty()) + { + // Array of multi-member SSBOs: ptr[array_idx] (result is struct) + expr = join(to_name(base_id), "[", to_expression(ops[3]), "]"); + is_subscript_deref = true; + handled = true; + } else if (length == 4 && !is_single_member && struct_type) { // Multi-member SSBO: ptr->member_name (lvalue, not address-of) @@ -2717,6 +3175,136 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) break; } + case OpSDot: + case OpUDot: + case OpSUDot: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec1 = ops[2]; + uint32_t vec2 = ops[3]; + + auto &input_type1 = expression_type(vec1); + auto &input_type2 = expression_type(vec2); + auto &type = get(result_type); + + string vec1input, vec2input; + uint32_t input_size = input_type1.vecsize; + + if (instruction.length == 5) + { + if (ops[4] == PackedVectorFormatPackedVectorFormat4x8Bit) + { + string type1 = opcode == OpSDot || opcode == OpSUDot ? "char4" : "uchar4"; + vec1input = join("as_", type1, "(", to_expression(vec1), ")"); + string type2 = opcode == OpSDot ? "char4" : "uchar4"; + vec2input = join("as_", type2, "(", to_expression(vec2), ")"); + input_size = 4; + } + else + SPIRV_CROSS_THROW("Packed vector formats other than 4x8Bit for integer dot product is not supported."); + } + else + { + SPIRType::BaseType vec1_expected_type = + opcode != OpUDot ? to_signed_basetype(input_type1.width) : to_unsigned_basetype(input_type1.width); + SPIRType::BaseType vec2_expected_type = + opcode != OpSDot ? to_unsigned_basetype(input_type2.width) : to_signed_basetype(input_type2.width); + + vec1input = bitcast_expression(vec1_expected_type, vec1); + vec2input = bitcast_expression(vec2_expected_type, vec2); + } + + // Emit inline sum of component-wise products: + // (result_type)(a.s0) * (result_type)(b.s0) + ... + (result_type)(a.sN) * (result_type)(b.sN) + auto result_type_str = type_to_glsl(type); + string exp; + for (uint32_t i = 0; i < input_size; i++) + { + if (i > 0) + exp += " + "; + string comp = input_size > 1 ? join(".s", i) : ""; + exp += + join("(", result_type_str, ")(", vec1input, comp, ") * (", result_type_str, ")(", vec2input, comp, ")"); + } + + emit_op(result_type, id, exp, should_forward(vec1) && should_forward(vec2)); + inherit_expression_dependencies(id, vec1); + inherit_expression_dependencies(id, vec2); + break; + } + + case OpSDotAccSat: + case OpUDotAccSat: + case OpSUDotAccSat: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec1 = ops[2]; + uint32_t vec2 = ops[3]; + uint32_t acc = ops[4]; + + auto input_type1 = expression_type(vec1); + auto input_type2 = expression_type(vec2); + auto &type = get(result_type); + + string vec1input, vec2input; + uint32_t input_size = input_type1.vecsize; + + if (instruction.length == 6) + { + if (ops[5] == PackedVectorFormatPackedVectorFormat4x8Bit) + { + string type1 = opcode == OpSDotAccSat || opcode == OpSUDotAccSat ? "char4" : "uchar4"; + vec1input = join("as_", type1, "(", to_expression(vec1), ")"); + string type2 = opcode == OpSDotAccSat ? "char4" : "uchar4"; + vec2input = join("as_", type2, "(", to_expression(vec2), ")"); + input_size = 4; + } + else + SPIRV_CROSS_THROW("Packed vector formats other than 4x8Bit for integer dot product is not supported."); + } + else + { + SPIRType::BaseType vec1_expected_type = opcode != OpUDotAccSat ? to_signed_basetype(input_type1.width) : + to_unsigned_basetype(input_type1.width); + SPIRType::BaseType vec2_expected_type = opcode != OpSDotAccSat ? to_unsigned_basetype(input_type2.width) : + to_signed_basetype(input_type2.width); + + vec1input = bitcast_expression(vec1_expected_type, vec1); + vec2input = bitcast_expression(vec2_expected_type, vec2); + } + + SPIRType::BaseType pre_saturate_type = + opcode != OpUDotAccSat ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width); + + // Use the pre-saturate type for internal computation so add_sat arguments match. + SPIRType sat_type = type; + sat_type.basetype = pre_saturate_type; + auto sat_type_str = type_to_glsl(sat_type); + auto result_type_str = type_to_glsl(type); + + // Build dot product expression: sum of component-wise products + string dot_exp; + for (uint32_t i = 0; i < input_size; i++) + { + if (i > 0) + dot_exp += " + "; + string comp = input_size > 1 ? join(".s", i) : ""; + dot_exp += + join("(", sat_type_str, ")(", vec1input, comp, ") * (", sat_type_str, ")(", vec2input, comp, ")"); + } + + // Wrap with add_sat and cast to result type + string exp = + join("(", result_type_str, ")add_sat(", dot_exp, ", ", bitcast_expression(pre_saturate_type, acc), ")"); + + emit_op(result_type, id, exp, should_forward(vec1) && should_forward(vec2)); + inherit_expression_dependencies(id, vec1); + inherit_expression_dependencies(id, vec2); + break; + } + default: CompilerGLSL::emit_instruction(instruction); break; diff --git a/spirv_opencl.hpp b/spirv_opencl.hpp index defe75032..a9991fa90 100644 --- a/spirv_opencl.hpp +++ b/spirv_opencl.hpp @@ -147,6 +147,8 @@ class CompilerOpenCL : public CompilerGLSL // Set when packHalf2x16/unpackHalf2x16 polyfill helpers are needed. bool needs_half_pack_polyfill = false; bool needs_half_unpack_polyfill = false; + // Set when bitfieldReverse polyfill is needed. + bool needs_bitreverse_polyfill = false; // Set when a default sampler is needed for combined image+sampler usage. bool needs_default_sampler = false; From cb78845e2d2438f04b03984ea7bd62914a47793c Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Sat, 14 Mar 2026 10:12:47 -0700 Subject: [PATCH 06/16] OpenCL: support for matrices and half types --- main.cpp | 16 +- .../comp/relaxed-block-layout.fp16.asm.comp} | 1 + .../comp/replicated-composites.spv16.asm.comp | 6 +- .../opt/shaders-opencl/comp/dowhile.comp | 11 +- .../opt/shaders-opencl/comp/inverse.comp | 53 + .../comp/mat3-row-maj-read-write-const.comp | 45 + reference/opt/shaders-opencl/comp/mat3.comp | 19 + .../shaders-opencl/comp/outer-product.comp | 127 +- .../opt/shaders-opencl/comp/rmw-matrix.comp | 41 + .../comp/shared-matrix-array-of-array.comp | 430 ++++ .../comp/shared-matrix-cast.comp | 131 ++ .../shared-matrix-nested-struct-array.comp | 463 ++++ .../comp/shared-matrix-nested-struct.comp | 583 +++++ ...50.double.comp => shared-std450.fp64.comp} | 0 .../shaders-opencl/comp/struct-layout.comp | 21 +- .../shaders-opencl/comp/struct-packing.comp | 125 ++ .../comp/relaxed-block-layout.fp16.asm.comp | 24 + .../comp/replicated-composites.spv16.asm.comp | 6 +- reference/shaders-opencl/comp/dowhile.comp | 25 +- reference/shaders-opencl/comp/inverse.comp | 53 + .../comp/mat3-row-maj-read-write-const.comp | 47 + reference/shaders-opencl/comp/mat3.comp | 20 + .../shaders-opencl/comp/outer-product.comp | 127 +- reference/shaders-opencl/comp/rmw-matrix.comp | 41 + ...alar-std450-distance-length-normalize.comp | 8 +- .../comp/shared-matrix-array-of-array.comp | 357 ++++ .../comp/shared-matrix-cast.comp | 174 ++ .../shared-matrix-nested-struct-array.comp | 401 ++++ .../comp/shared-matrix-nested-struct.comp | 598 ++++++ ...50.double.comp => shared-std450.fp64.comp} | 0 .../shaders-opencl/comp/struct-layout.comp | 23 +- .../shaders-opencl/comp/struct-packing.comp | 125 ++ .../comp/struct-packing.invalid.comp | 0 .../shaders-opencl/comp/torture-loop.comp | 11 +- ...omp => relaxed-block-layout.fp16.asm.comp} | 0 shaders-opencl/comp/inverse.comp | 23 + .../comp/mat3-row-maj-read-write-const.comp | 17 + shaders-opencl/comp/mat3.comp | 14 + shaders-opencl/comp/rmw-matrix.comp | 20 + .../comp/shared-matrix-array-of-array.comp | 65 + shaders-opencl/comp/shared-matrix-cast.comp | 33 + .../shared-matrix-nested-struct-array.comp | 87 + .../comp/shared-matrix-nested-struct.comp | 141 ++ ...50.double.comp => shared-std450.fp64.comp} | 0 ...cking.invalid.comp => struct-packing.comp} | 0 spirv_cross_c.cpp | 13 +- spirv_cross_c.h | 11 +- spirv_glsl.cpp | 1899 +++++++++-------- spirv_glsl.hpp | 5 +- spirv_opencl.cpp | 1670 ++++++++++++++- spirv_opencl.hpp | 84 +- test_shaders.py | 30 +- 52 files changed, 7127 insertions(+), 1097 deletions(-) rename reference/{shaders-opencl/asm/comp/relaxed-block-layout.asm.comp => opt/shaders-opencl/asm/comp/relaxed-block-layout.fp16.asm.comp} (94%) create mode 100644 reference/opt/shaders-opencl/comp/inverse.comp create mode 100644 reference/opt/shaders-opencl/comp/mat3-row-maj-read-write-const.comp create mode 100644 reference/opt/shaders-opencl/comp/mat3.comp create mode 100644 reference/opt/shaders-opencl/comp/rmw-matrix.comp create mode 100644 reference/opt/shaders-opencl/comp/shared-matrix-array-of-array.comp create mode 100644 reference/opt/shaders-opencl/comp/shared-matrix-cast.comp create mode 100644 reference/opt/shaders-opencl/comp/shared-matrix-nested-struct-array.comp create mode 100644 reference/opt/shaders-opencl/comp/shared-matrix-nested-struct.comp rename reference/opt/shaders-opencl/comp/{shared-std450.double.comp => shared-std450.fp64.comp} (100%) create mode 100644 reference/opt/shaders-opencl/comp/struct-packing.comp create mode 100644 reference/shaders-opencl/asm/comp/relaxed-block-layout.fp16.asm.comp create mode 100644 reference/shaders-opencl/comp/inverse.comp create mode 100644 reference/shaders-opencl/comp/mat3-row-maj-read-write-const.comp create mode 100644 reference/shaders-opencl/comp/mat3.comp create mode 100644 reference/shaders-opencl/comp/rmw-matrix.comp create mode 100644 reference/shaders-opencl/comp/shared-matrix-array-of-array.comp create mode 100644 reference/shaders-opencl/comp/shared-matrix-cast.comp create mode 100644 reference/shaders-opencl/comp/shared-matrix-nested-struct-array.comp create mode 100644 reference/shaders-opencl/comp/shared-matrix-nested-struct.comp rename reference/shaders-opencl/comp/{shared-std450.double.comp => shared-std450.fp64.comp} (100%) create mode 100644 reference/shaders-opencl/comp/struct-packing.comp delete mode 100644 reference/shaders-opencl/comp/struct-packing.invalid.comp rename shaders-opencl/asm/comp/{relaxed-block-layout.asm.comp => relaxed-block-layout.fp16.asm.comp} (100%) create mode 100644 shaders-opencl/comp/inverse.comp create mode 100644 shaders-opencl/comp/mat3-row-maj-read-write-const.comp create mode 100644 shaders-opencl/comp/mat3.comp create mode 100644 shaders-opencl/comp/rmw-matrix.comp create mode 100644 shaders-opencl/comp/shared-matrix-array-of-array.comp create mode 100644 shaders-opencl/comp/shared-matrix-cast.comp create mode 100644 shaders-opencl/comp/shared-matrix-nested-struct-array.comp create mode 100644 shaders-opencl/comp/shared-matrix-nested-struct.comp rename shaders-opencl/comp/{shared-std450.double.comp => shared-std450.fp64.comp} (100%) rename shaders-opencl/comp/{struct-packing.invalid.comp => struct-packing.comp} (100%) diff --git a/main.cpp b/main.cpp index 2fc6ced5c..a53f5e758 100644 --- a/main.cpp +++ b/main.cpp @@ -778,10 +778,13 @@ struct CLIArguments bool opencl = false; uint32_t opencl_version = 120; + bool opencl_enable_fp16 = false; bool opencl_enable_fp64 = false; bool opencl_enable_64bit_atomics = false; bool opencl_enable_subgroups = false; - bool opencl_enable_shuffle = false; + bool opencl_enable_subgroups_all = false; + bool opencl_emulate_subgroups = false; + uint32_t opencl_fixed_subgroup_size = 0; }; static void print_version() @@ -1362,9 +1365,12 @@ static string compile_iteration(const CLIArguments &args, std::vector auto *ocl_comp = static_cast(compiler.get()); CompilerOpenCL::Options ocl_opts = ocl_comp->get_opencl_options(); ocl_opts.opencl_version = args.opencl_version; + ocl_opts.enable_fp16 = args.opencl_enable_fp16; ocl_opts.enable_fp64 = args.opencl_enable_fp64; ocl_opts.enable_subgroups = args.opencl_enable_subgroups; - ocl_opts.enable_shuffle = args.opencl_enable_shuffle; + ocl_opts.enable_subgroups_all = args.opencl_enable_subgroups_all; + ocl_opts.emulate_subgroups = args.opencl_emulate_subgroups; + ocl_opts.fixed_subgroup_size = args.opencl_fixed_subgroup_size; ocl_comp->set_opencl_options(ocl_opts); } else if (args.hlsl) @@ -1995,10 +2001,14 @@ static int main_inner(int argc, char *argv[]) }); cbs.add("--opencl", [&args](CLIParser &) { args.opencl = true; }); cbs.add("--opencl-version", [&args](CLIParser &parser) { args.opencl_version = parser.next_uint(); }); + cbs.add("--opencl-fp16", [&args](CLIParser &) { args.opencl_enable_fp16 = true; }); cbs.add("--opencl-fp64", [&args](CLIParser &) { args.opencl_enable_fp64 = true; }); cbs.add("--opencl-64bit-atomics", [&args](CLIParser &) { args.opencl_enable_64bit_atomics = true; }); cbs.add("--opencl-subgroups", [&args](CLIParser &) { args.opencl_enable_subgroups = true; }); - cbs.add("--opencl-shuffle", [&args](CLIParser &) { args.opencl_enable_shuffle = true; }); + cbs.add("--opencl-subgroups-all", [&args](CLIParser &) { args.opencl_enable_subgroups = true; }); + cbs.add("--opencl-emulate-subgroups", [&args](CLIParser &) { args.opencl_emulate_subgroups = true; }); + cbs.add("--opencl-fixed-subgroup-size", + [&args](CLIParser &parser) { args.opencl_fixed_subgroup_size = parser.next_uint(); }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) diff --git a/reference/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp b/reference/opt/shaders-opencl/asm/comp/relaxed-block-layout.fp16.asm.comp similarity index 94% rename from reference/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp rename to reference/opt/shaders-opencl/asm/comp/relaxed-block-layout.fp16.asm.comp index ddae4bb54..a8926a145 100644 --- a/reference/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp +++ b/reference/opt/shaders-opencl/asm/comp/relaxed-block-layout.fp16.asm.comp @@ -1,5 +1,6 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) +#pragma OPENCL EXTENSION cl_khr_fp16 : enable struct foo { diff --git a/reference/opt/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp b/reference/opt/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp index 545ecf547..90501e9a1 100644 --- a/reference/opt/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp +++ b/reference/opt/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp @@ -1,6 +1,8 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) +typedef struct { float4 columns[4]; } spvMat4; + #ifndef SPIRV_CROSS_CONSTANT_ID_0 #define SPIRV_CROSS_CONSTANT_ID_0 0.0f #endif @@ -21,10 +23,10 @@ __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(UBO ubo) { float4 a_1 = (float4)(0.0f); - float4 b_1 = (float4)(1.0f); + spvMat4 b_1 = (spvMat4){ { (float4)(1.0f), (float4)(1.0f), (float4)(1.0f), (float4)(1.0f) } }; float4 c_1 = _20; float4 _36 = (float4)(ubo.uniform_float); float4 d_1 = _36; - float4 e_1 = _36; + spvMat4 e_1 = (spvMat4){ { _36, _36, _36, _36 } }; } diff --git a/reference/opt/shaders-opencl/comp/dowhile.comp b/reference/opt/shaders-opencl/comp/dowhile.comp index e5a51f6be..d858c8428 100644 --- a/reference/opt/shaders-opencl/comp/dowhile.comp +++ b/reference/opt/shaders-opencl/comp/dowhile.comp @@ -1,9 +1,11 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) +typedef struct { float4 columns[4]; } spvMat4; + struct SSBO { - float4 mvp; + spvMat4 mvp; float4 in_data[1]; }; @@ -16,6 +18,11 @@ struct SSBO2 typedef struct SSBO2 SSBO2; +static float4 spvMulMat4Vec4(spvMat4 m, float4 v) +{ + return m.columns[0] * v.x + m.columns[1] * v.y + m.columns[2] * v.z + m.columns[3] * v.w; +} + __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global const SSBO* _28, __global float4* _52) { @@ -27,7 +34,7 @@ __kernel void comp_main(__global const SSBO* _28, __global float4* _52) float4 _42; for (;;) { - _42 = _28->mvp * _59; + _42 = spvMulMat4Vec4(_28->mvp, _59); int _44 = _60 + 1; if (_44 < 16) { diff --git a/reference/opt/shaders-opencl/comp/inverse.comp b/reference/opt/shaders-opencl/comp/inverse.comp new file mode 100644 index 000000000..3db4ff542 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/inverse.comp @@ -0,0 +1,53 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float2 columns[2]; } spvMat2; +typedef struct { float3 columns[3]; } spvMat3; +typedef struct { float4 columns[4]; } spvMat4; + +struct MatrixOut +{ + spvMat2 m2out; + spvMat3 m3out; + spvMat4 m4out; +}; + +typedef struct MatrixOut MatrixOut; + +struct MatrixIn +{ + spvMat2 m2in; + spvMat3 m3in; + spvMat4 m4in; +}; + +typedef struct MatrixIn MatrixIn; + +static spvMat2 spvInverse2(spvMat2 m) { + float d = 1.0f / (m.columns[0].x * m.columns[1].y - m.columns[1].x * m.columns[0].y); + return (spvMat2){ { (float2)(m.columns[1].y * d, -m.columns[0].y * d), (float2)(-m.columns[1].x * d, m.columns[0].x * d) } }; +} + +static spvMat3 spvInverse3(spvMat3 m) { + float3 t = (float3)(m.columns[1].y * m.columns[2].z - m.columns[1].z * m.columns[2].y, m.columns[1].z * m.columns[2].x - m.columns[1].x * m.columns[2].z, m.columns[1].x * m.columns[2].y - m.columns[1].y * m.columns[2].x); + float d = 1.0f / dot(m.columns[0], t); + return (spvMat3){ { t * d, (float3)(m.columns[0].z * m.columns[2].y - m.columns[0].y * m.columns[2].z, m.columns[0].x * m.columns[2].z - m.columns[0].z * m.columns[2].x, m.columns[0].y * m.columns[2].x - m.columns[0].x * m.columns[2].y) * d, (float3)(m.columns[0].y * m.columns[1].z - m.columns[0].z * m.columns[1].y, m.columns[0].z * m.columns[1].x - m.columns[0].x * m.columns[1].z, m.columns[0].x * m.columns[1].y - m.columns[0].y * m.columns[1].x) * d } }; +} + +static spvMat4 spvInverse4(spvMat4 m) { + float4 t = (float4)(m.columns[2].y * m.columns[3].z * m.columns[1].w - m.columns[3].y * m.columns[2].z * m.columns[1].w + m.columns[3].y * m.columns[1].z * m.columns[2].w - m.columns[1].y * m.columns[3].z * m.columns[2].w - m.columns[2].y * m.columns[1].z * m.columns[3].w + m.columns[1].y * m.columns[2].z * m.columns[3].w, m.columns[3].x * m.columns[2].z * m.columns[1].w - m.columns[2].x * m.columns[3].z * m.columns[1].w - m.columns[3].x * m.columns[1].z * m.columns[2].w + m.columns[1].x * m.columns[3].z * m.columns[2].w + m.columns[2].x * m.columns[1].z * m.columns[3].w - m.columns[1].x * m.columns[2].z * m.columns[3].w, m.columns[2].x * m.columns[3].y * m.columns[1].w - m.columns[3].x * m.columns[2].y * m.columns[1].w + m.columns[3].x * m.columns[1].y * m.columns[2].w - m.columns[1].x * m.columns[3].y * m.columns[2].w - m.columns[2].x * m.columns[1].y * m.columns[3].w + m.columns[1].x * m.columns[2].y * m.columns[3].w, m.columns[3].x * m.columns[2].y * m.columns[1].z - m.columns[2].x * m.columns[3].y * m.columns[1].z - m.columns[3].x * m.columns[1].y * m.columns[2].z + m.columns[1].x * m.columns[3].y * m.columns[2].z + m.columns[2].x * m.columns[1].y * m.columns[3].z - m.columns[1].x * m.columns[2].y * m.columns[3].z); + spvMat4 r = (spvMat4){ { (float4)(t.x, m.columns[3].y * m.columns[2].z * m.columns[0].w - m.columns[2].y * m.columns[3].z * m.columns[0].w - m.columns[3].y * m.columns[0].z * m.columns[2].w + m.columns[0].y * m.columns[3].z * m.columns[2].w + m.columns[2].y * m.columns[0].z * m.columns[3].w - m.columns[0].y * m.columns[2].z * m.columns[3].w, m.columns[1].y * m.columns[3].z * m.columns[0].w - m.columns[3].y * m.columns[1].z * m.columns[0].w + m.columns[3].y * m.columns[0].z * m.columns[1].w - m.columns[0].y * m.columns[3].z * m.columns[1].w - m.columns[1].y * m.columns[0].z * m.columns[3].w + m.columns[0].y * m.columns[1].z * m.columns[3].w, m.columns[2].y * m.columns[1].z * m.columns[0].w - m.columns[1].y * m.columns[2].z * m.columns[0].w - m.columns[2].y * m.columns[0].z * m.columns[1].w + m.columns[0].y * m.columns[2].z * m.columns[1].w + m.columns[1].y * m.columns[0].z * m.columns[2].w - m.columns[0].y * m.columns[1].z * m.columns[2].w), (float4)(t.y, m.columns[2].x * m.columns[3].z * m.columns[0].w - m.columns[3].x * m.columns[2].z * m.columns[0].w + m.columns[3].x * m.columns[0].z * m.columns[2].w - m.columns[0].x * m.columns[3].z * m.columns[2].w - m.columns[2].x * m.columns[0].z * m.columns[3].w + m.columns[0].x * m.columns[2].z * m.columns[3].w, m.columns[3].x * m.columns[1].z * m.columns[0].w - m.columns[1].x * m.columns[3].z * m.columns[0].w - m.columns[3].x * m.columns[0].z * m.columns[1].w + m.columns[0].x * m.columns[3].z * m.columns[1].w + m.columns[1].x * m.columns[0].z * m.columns[3].w - m.columns[0].x * m.columns[1].z * m.columns[3].w, m.columns[1].x * m.columns[2].z * m.columns[0].w - m.columns[2].x * m.columns[1].z * m.columns[0].w + m.columns[2].x * m.columns[0].z * m.columns[1].w - m.columns[0].x * m.columns[2].z * m.columns[1].w - m.columns[1].x * m.columns[0].z * m.columns[2].w + m.columns[0].x * m.columns[1].z * m.columns[2].w), (float4)(t.z, m.columns[3].x * m.columns[2].y * m.columns[0].w - m.columns[2].x * m.columns[3].y * m.columns[0].w - m.columns[3].x * m.columns[0].y * m.columns[2].w + m.columns[0].x * m.columns[3].y * m.columns[2].w + m.columns[2].x * m.columns[0].y * m.columns[3].w - m.columns[0].x * m.columns[2].y * m.columns[3].w, m.columns[1].x * m.columns[3].y * m.columns[0].w - m.columns[3].x * m.columns[1].y * m.columns[0].w + m.columns[3].x * m.columns[0].y * m.columns[1].w - m.columns[0].x * m.columns[3].y * m.columns[1].w - m.columns[1].x * m.columns[0].y * m.columns[3].w + m.columns[0].x * m.columns[1].y * m.columns[3].w, m.columns[2].x * m.columns[1].y * m.columns[0].w - m.columns[1].x * m.columns[2].y * m.columns[0].w - m.columns[2].x * m.columns[0].y * m.columns[1].w + m.columns[0].x * m.columns[2].y * m.columns[1].w + m.columns[1].x * m.columns[0].y * m.columns[2].w - m.columns[0].x * m.columns[1].y * m.columns[2].w), (float4)(t.w, m.columns[2].x * m.columns[3].y * m.columns[0].z - m.columns[3].x * m.columns[2].y * m.columns[0].z + m.columns[3].x * m.columns[0].y * m.columns[2].z - m.columns[0].x * m.columns[3].y * m.columns[2].z - m.columns[2].x * m.columns[0].y * m.columns[3].z + m.columns[0].x * m.columns[2].y * m.columns[3].z, m.columns[3].x * m.columns[1].y * m.columns[0].z - m.columns[1].x * m.columns[3].y * m.columns[0].z - m.columns[3].x * m.columns[0].y * m.columns[1].z + m.columns[0].x * m.columns[3].y * m.columns[1].z + m.columns[1].x * m.columns[0].y * m.columns[3].z - m.columns[0].x * m.columns[1].y * m.columns[3].z, m.columns[1].x * m.columns[2].y * m.columns[0].z - m.columns[2].x * m.columns[1].y * m.columns[0].z + m.columns[2].x * m.columns[0].y * m.columns[1].z - m.columns[0].x * m.columns[2].y * m.columns[1].z - m.columns[1].x * m.columns[0].y * m.columns[2].z + m.columns[0].x * m.columns[1].y * m.columns[2].z) } }; + float d = 1.0f / dot(m.columns[0], t); + r.columns[0] *= d; r.columns[1] *= d; r.columns[2] *= d; r.columns[3] *= d; + return r; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global MatrixOut* _15, __global const MatrixIn* _20) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _15->m2out = spvInverse2(_20->m2in); + _15->m3out = spvInverse3(_20->m3in); + _15->m4out = spvInverse4(_20->m4in); +} + diff --git a/reference/opt/shaders-opencl/comp/mat3-row-maj-read-write-const.comp b/reference/opt/shaders-opencl/comp/mat3-row-maj-read-write-const.comp new file mode 100644 index 000000000..a1b4522fa --- /dev/null +++ b/reference/opt/shaders-opencl/comp/mat3-row-maj-read-write-const.comp @@ -0,0 +1,45 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float3 columns[3]; } spvMat3; + +struct model_t +{ + spvMat3 mtx_rm; +}; + +typedef struct model_t model_t; + +static float3 spvMulMat3Vec3(spvMat3 m, float3 v) +{ + return m.columns[0] * v.x + m.columns[1] * v.y + m.columns[2] * v.z; +} + +static spvMat3 spvMulMat3Mat3(spvMat3 a, spvMat3 b) +{ + spvMat3 r; + r.columns[0] = spvMulMat3Vec3(a, b.columns[0]); + r.columns[1] = spvMulMat3Vec3(a, b.columns[1]); + r.columns[2] = spvMulMat3Vec3(a, b.columns[2]); + return r; +} + +static spvMat3 spvTransposeMat3(spvMat3 m) +{ + spvMat3 r; + r.columns[0] = (float3)(m.columns[0].x, m.columns[1].x, m.columns[2].x); + r.columns[1] = (float3)(m.columns[0].y, m.columns[1].y, m.columns[2].y); + r.columns[2] = (float3)(m.columns[0].z, m.columns[1].z, m.columns[2].z); + return r; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global spvMat3* model) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + if (spvMulMat3Mat3(spvTransposeMat3(model[0]), (spvMat3){ { (float3)(4.0f, -3.0f, 1.0f), (float3)(-7.0f, 7.0f, -7.0f), (float3)(-5.0f, 6.0f, -8.0f) } }).columns[0].x != 0.0f) + { + model[0] = spvTransposeMat3((spvMat3){ { (float3)(-5.0f, -3.0f, -5.0f), (float3)(-2.0f, 2.0f, -5.0f), (float3)(6.0f, 3.0f, -8.0f) } }); + } +} + diff --git a/reference/opt/shaders-opencl/comp/mat3.comp b/reference/opt/shaders-opencl/comp/mat3.comp new file mode 100644 index 000000000..bc825c561 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/mat3.comp @@ -0,0 +1,19 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float3 columns[3]; } spvMat3; + +struct SSBO2 +{ + spvMat3 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global spvMat3* _22) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _22[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = (spvMat3){ { (float3)(10.0f), (float3)(20.0f), (float3)(40.0f) } }; +} + diff --git a/reference/opt/shaders-opencl/comp/outer-product.comp b/reference/opt/shaders-opencl/comp/outer-product.comp index 4462fc221..b607c18c3 100644 --- a/reference/opt/shaders-opencl/comp/outer-product.comp +++ b/reference/opt/shaders-opencl/comp/outer-product.comp @@ -1,17 +1,27 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) +typedef struct { float2 columns[2]; } spvMat2; +typedef struct { float3 columns[2]; } spvMat2x3; +typedef struct { float4 columns[2]; } spvMat2x4; +typedef struct { float2 columns[3]; } spvMat3x2; +typedef struct { float3 columns[3]; } spvMat3; +typedef struct { float4 columns[3]; } spvMat3x4; +typedef struct { float2 columns[4]; } spvMat4x2; +typedef struct { float3 columns[4]; } spvMat4x3; +typedef struct { float4 columns[4]; } spvMat4; + struct SSBO { - float2 m22; - float3 m23; - float4 m24; - float2 m32; - float3 m33; - float4 m34; - float2 m42; - float3 m43; - float4 m44; + spvMat2 m22; + spvMat2x3 m23; + spvMat2x4 m24; + spvMat3x2 m32; + spvMat3 m33; + spvMat3x4 m34; + spvMat4x2 m42; + spvMat4x3 m43; + spvMat4 m44; }; typedef struct SSBO SSBO; @@ -25,21 +35,102 @@ struct ReadSSBO typedef struct ReadSSBO ReadSSBO; +static spvMat2 spvOuterProductVec2Vec2(float2 c, float2 r) +{ + spvMat2 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + return m; +} + +static spvMat2x3 spvOuterProductVec3Vec2(float3 c, float2 r) +{ + spvMat2x3 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + return m; +} + +static spvMat2x4 spvOuterProductVec4Vec2(float4 c, float2 r) +{ + spvMat2x4 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + return m; +} + +static spvMat3x2 spvOuterProductVec2Vec3(float2 c, float3 r) +{ + spvMat3x2 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + m.columns[2] = c * r.z; + return m; +} + +static spvMat3 spvOuterProductVec3Vec3(float3 c, float3 r) +{ + spvMat3 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + m.columns[2] = c * r.z; + return m; +} + +static spvMat3x4 spvOuterProductVec4Vec3(float4 c, float3 r) +{ + spvMat3x4 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + m.columns[2] = c * r.z; + return m; +} + +static spvMat4x2 spvOuterProductVec2Vec4(float2 c, float4 r) +{ + spvMat4x2 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + m.columns[2] = c * r.z; + m.columns[3] = c * r.w; + return m; +} + +static spvMat4x3 spvOuterProductVec3Vec4(float3 c, float4 r) +{ + spvMat4x3 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + m.columns[2] = c * r.z; + m.columns[3] = c * r.w; + return m; +} + +static spvMat4 spvOuterProductVec4Vec4(float4 c, float4 r) +{ + spvMat4 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + m.columns[2] = c * r.z; + m.columns[3] = c * r.w; + return m; +} + __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global SSBO* _21, __global const ReadSSBO* _26) { uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); float2 _29 = _26->v2; - _21->m22 = _29 * _29.x; + _21->m22 = spvOuterProductVec2Vec2(_29, _29); float3 _38 = _26->v3; - _21->m23 = _38 * _29.x; + _21->m23 = spvOuterProductVec3Vec2(_38, _29); float4 _47 = _26->v4; - _21->m24 = _47 * _29.x; - _21->m32 = _29 * _38.x; - _21->m33 = _38 * _38.x; - _21->m34 = _47 * _38.x; - _21->m42 = _29 * _47.x; - _21->m43 = _38 * _47.x; - _21->m44 = _47 * _47.x; + _21->m24 = spvOuterProductVec4Vec2(_47, _29); + _21->m32 = spvOuterProductVec2Vec3(_29, _38); + _21->m33 = spvOuterProductVec3Vec3(_38, _38); + _21->m34 = spvOuterProductVec4Vec3(_47, _38); + _21->m42 = spvOuterProductVec2Vec4(_29, _47); + _21->m43 = spvOuterProductVec3Vec4(_38, _47); + _21->m44 = spvOuterProductVec4Vec4(_47, _47); } diff --git a/reference/opt/shaders-opencl/comp/rmw-matrix.comp b/reference/opt/shaders-opencl/comp/rmw-matrix.comp new file mode 100644 index 000000000..9fdc47c62 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/rmw-matrix.comp @@ -0,0 +1,41 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float4 columns[4]; } spvMat4; + +struct SSBO +{ + float a; + float4 b; + spvMat4 c; + float a1; + float4 b1; + spvMat4 c1; +}; + +typedef struct SSBO SSBO; + +static float4 spvMulMat4Vec4(spvMat4 m, float4 v) +{ + return m.columns[0] * v.x + m.columns[1] * v.y + m.columns[2] * v.z + m.columns[3] * v.w; +} + +static spvMat4 spvMulMat4Mat4(spvMat4 a, spvMat4 b) +{ + spvMat4 r; + r.columns[0] = spvMulMat4Vec4(a, b.columns[0]); + r.columns[1] = spvMulMat4Vec4(a, b.columns[1]); + r.columns[2] = spvMulMat4Vec4(a, b.columns[2]); + r.columns[3] = spvMulMat4Vec4(a, b.columns[3]); + return r; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _11) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _11->a *= _11->a1; + _11->b *= _11->b1; + _11->c = spvMulMat4Mat4(_11->c, _11->c1); +} + diff --git a/reference/opt/shaders-opencl/comp/shared-matrix-array-of-array.comp b/reference/opt/shaders-opencl/comp/shared-matrix-array-of-array.comp new file mode 100644 index 000000000..63af2dc47 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/shared-matrix-array-of-array.comp @@ -0,0 +1,430 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float3 columns[4]; } spvMat4x3; + +struct S1 +{ + spvMat4x3 a[2]; + float b; + float2 c[3]; +}; + +typedef struct S1 S1; + +struct S2 +{ + int4 a; + bool b[3][1][3]; +}; + +typedef struct S2 S2; + +struct block +{ + uint passed; +}; + +typedef struct block block; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _383) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local S1 s1; + __local S2 s2; + s1.a[0] = (spvMat4x3){ { (float3)(0.0f, 2.0f, -8.0f), (float3)(6.0f, 7.0f, 5.0f), (float3)(-6.0f, 1.0f, 9.0f), (float3)(-4.0f, -3.0f, 4.0f) } }; + s1.a[1] = (spvMat4x3){ { (float3)(4.0f, 9.0f, -9.0f), (float3)(-8.0f, -9.0f, 8.0f), (float3)(0.0f, 4.0f, -4.0f), (float3)(7.0f, 2.0f, -1.0f) } }; + s1.b = 7.0f; + s1.c[0] = (float2)(-5.0f, -4.0f); + s1.c[1] = (float2)(3.0f, -5.0f); + s1.c[2] = (float2)(-3.0f, -1.0f); + s2.a = (int4)(1, 0, -3, 1); + s2.b[0][0][0] = true; + s2.b[0][0][1] = false; + s2.b[0][0][2] = false; + s2.b[1][0][0] = true; + s2.b[1][0][1] = false; + s2.b[1][0][2] = true; + s2.b[2][0][0] = false; + s2.b[2][0][1] = true; + s2.b[2][0][2] = true; + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + bool _464 = fabs(-s1.a[0].columns[0].x) < 0.0500000007450580596923828125f; + bool _449; + if (_464) + { + _449 = fabs(2.0f - s1.a[0].columns[0].y) < 0.0500000007450580596923828125f; + } + else + { + _449 = _464; + } + bool _457; + if (_449) + { + _457 = fabs((-8.0f) - s1.a[0].columns[0].z) < 0.0500000007450580596923828125f; + } + else + { + _457 = _449; + } + bool _412; + if (_457) + { + bool _514 = fabs(6.0f - s1.a[0].columns[1].x) < 0.0500000007450580596923828125f; + bool _499; + if (_514) + { + _499 = fabs(7.0f - s1.a[0].columns[1].y) < 0.0500000007450580596923828125f; + } + else + { + _499 = _514; + } + bool _507; + if (_499) + { + _507 = fabs(5.0f - s1.a[0].columns[1].z) < 0.0500000007450580596923828125f; + } + else + { + _507 = _499; + } + _412 = _507; + } + else + { + _412 = _457; + } + bool _420; + if (_412) + { + bool _564 = fabs((-6.0f) - s1.a[0].columns[2].x) < 0.0500000007450580596923828125f; + bool _549; + if (_564) + { + _549 = fabs(1.0f - s1.a[0].columns[2].y) < 0.0500000007450580596923828125f; + } + else + { + _549 = _564; + } + bool _557; + if (_549) + { + _557 = fabs(9.0f - s1.a[0].columns[2].z) < 0.0500000007450580596923828125f; + } + else + { + _557 = _549; + } + _420 = _557; + } + else + { + _420 = _412; + } + bool _428; + if (_420) + { + bool _614 = fabs((-4.0f) - s1.a[0].columns[3].x) < 0.0500000007450580596923828125f; + bool _599; + if (_614) + { + _599 = fabs((-3.0f) - s1.a[0].columns[3].y) < 0.0500000007450580596923828125f; + } + else + { + _599 = _614; + } + bool _607; + if (_599) + { + _607 = fabs(4.0f - s1.a[0].columns[3].z) < 0.0500000007450580596923828125f; + } + else + { + _607 = _599; + } + _428 = _607; + } + else + { + _428 = _420; + } + bool _251; + if (_428) + { + bool _703 = fabs(4.0f - s1.a[1].columns[0].x) < 0.0500000007450580596923828125f; + bool _688; + if (_703) + { + _688 = fabs(9.0f - s1.a[1].columns[0].y) < 0.0500000007450580596923828125f; + } + else + { + _688 = _703; + } + bool _696; + if (_688) + { + _696 = fabs((-9.0f) - s1.a[1].columns[0].z) < 0.0500000007450580596923828125f; + } + else + { + _696 = _688; + } + bool _651; + if (_696) + { + bool _753 = fabs((-8.0f) - s1.a[1].columns[1].x) < 0.0500000007450580596923828125f; + bool _738; + if (_753) + { + _738 = fabs((-9.0f) - s1.a[1].columns[1].y) < 0.0500000007450580596923828125f; + } + else + { + _738 = _753; + } + bool _746; + if (_738) + { + _746 = fabs(8.0f - s1.a[1].columns[1].z) < 0.0500000007450580596923828125f; + } + else + { + _746 = _738; + } + _651 = _746; + } + else + { + _651 = _696; + } + bool _659; + if (_651) + { + bool _803 = fabs(-s1.a[1].columns[2].x) < 0.0500000007450580596923828125f; + bool _788; + if (_803) + { + _788 = fabs(4.0f - s1.a[1].columns[2].y) < 0.0500000007450580596923828125f; + } + else + { + _788 = _803; + } + bool _796; + if (_788) + { + _796 = fabs((-4.0f) - s1.a[1].columns[2].z) < 0.0500000007450580596923828125f; + } + else + { + _796 = _788; + } + _659 = _796; + } + else + { + _659 = _651; + } + bool _667; + if (_659) + { + bool _853 = fabs(7.0f - s1.a[1].columns[3].x) < 0.0500000007450580596923828125f; + bool _838; + if (_853) + { + _838 = fabs(2.0f - s1.a[1].columns[3].y) < 0.0500000007450580596923828125f; + } + else + { + _838 = _853; + } + bool _846; + if (_838) + { + _846 = fabs((-1.0f) - s1.a[1].columns[3].z) < 0.0500000007450580596923828125f; + } + else + { + _846 = _838; + } + _667 = _846; + } + else + { + _667 = _659; + } + _251 = _667; + } + else + { + _251 = _428; + } + bool _260; + if (_251) + { + _260 = fabs(7.0f - s1.b) < 0.0500000007450580596923828125f; + } + else + { + _260 = _251; + } + bool _269; + if (_260) + { + bool _900 = fabs((-5.0f) - s1.c[0].x) < 0.0500000007450580596923828125f; + bool _893; + if (_900) + { + _893 = fabs((-4.0f) - s1.c[0].y) < 0.0500000007450580596923828125f; + } + else + { + _893 = _900; + } + _269 = _893; + } + else + { + _269 = _260; + } + bool _278; + if (_269) + { + bool _933 = fabs(3.0f - s1.c[1].x) < 0.0500000007450580596923828125f; + bool _926; + if (_933) + { + _926 = fabs((-5.0f) - s1.c[1].y) < 0.0500000007450580596923828125f; + } + else + { + _926 = _933; + } + _278 = _926; + } + else + { + _278 = _269; + } + bool _287; + if (_278) + { + bool _966 = fabs((-3.0f) - s1.c[2].x) < 0.0500000007450580596923828125f; + bool _959; + if (_966) + { + _959 = fabs((-1.0f) - s1.c[2].y) < 0.0500000007450580596923828125f; + } + else + { + _959 = _966; + } + _287 = _959; + } + else + { + _287 = _278; + } + bool _296; + if (_287) + { + _296 = all((int4)(1, 0, -3, 1) == s2.a); + } + else + { + _296 = _287; + } + bool _305; + if (_296) + { + _305 = true == s2.b[0][0][0]; + } + else + { + _305 = _296; + } + bool _314; + if (_305) + { + _314 = false == s2.b[0][0][1]; + } + else + { + _314 = _305; + } + bool _323; + if (_314) + { + _323 = false == s2.b[0][0][2]; + } + else + { + _323 = _314; + } + bool _332; + if (_323) + { + _332 = true == s2.b[1][0][0]; + } + else + { + _332 = _323; + } + bool _341; + if (_332) + { + _341 = false == s2.b[1][0][1]; + } + else + { + _341 = _332; + } + bool _350; + if (_341) + { + _350 = true == s2.b[1][0][2]; + } + else + { + _350 = _341; + } + bool _359; + if (_350) + { + _359 = false == s2.b[2][0][0]; + } + else + { + _359 = _350; + } + bool _368; + if (_359) + { + _368 = true == s2.b[2][0][1]; + } + else + { + _368 = _359; + } + bool _377; + if (_368) + { + _377 = true == s2.b[2][0][2]; + } + else + { + _377 = _368; + } + if (_377) + { + _383[0] += as_uint(1); + } +} + diff --git a/reference/opt/shaders-opencl/comp/shared-matrix-cast.comp b/reference/opt/shaders-opencl/comp/shared-matrix-cast.comp new file mode 100644 index 000000000..6734de200 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/shared-matrix-cast.comp @@ -0,0 +1,131 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float2 columns[3]; } spvMat3x2; + +struct S1 +{ + float4 a; + spvMat3x2 b; + int4 c; +}; + +typedef struct S1 S1; + +struct block +{ + uint passed; +}; + +typedef struct block block; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _212) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local S1 s1; + s1.a = (float4)(1.0f, -5.0f, -9.0f, -5.0f); + s1.b = (spvMat3x2){ { (float2)(1.0f, -7.0f), (float2)(1.0f, 2.0f), (float2)(8.0f, 7.0f) } }; + s1.c = (int4)(false, true, false, false); + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + bool _264 = fabs(1.0f - s1.a.x) < 0.0500000007450580596923828125f; + bool _241; + if (_264) + { + _241 = fabs((-5.0f) - s1.a.y) < 0.0500000007450580596923828125f; + } + else + { + _241 = _264; + } + bool _249; + if (_241) + { + _249 = fabs((-9.0f) - s1.a.z) < 0.0500000007450580596923828125f; + } + else + { + _249 = _241; + } + bool _257; + if (_249) + { + _257 = fabs((-5.0f) - s1.a.w) < 0.0500000007450580596923828125f; + } + else + { + _257 = _249; + } + bool _197; + if (_257) + { + bool _340 = fabs(1.0f - s1.b.columns[0].x) < 0.0500000007450580596923828125f; + bool _333; + if (_340) + { + _333 = fabs((-7.0f) - s1.b.columns[0].y) < 0.0500000007450580596923828125f; + } + else + { + _333 = _340; + } + bool _306; + if (_333) + { + bool _373 = fabs(1.0f - s1.b.columns[1].x) < 0.0500000007450580596923828125f; + bool _366; + if (_373) + { + _366 = fabs(2.0f - s1.b.columns[1].y) < 0.0500000007450580596923828125f; + } + else + { + _366 = _373; + } + _306 = _366; + } + else + { + _306 = _333; + } + bool _314; + if (_306) + { + bool _406 = fabs(8.0f - s1.b.columns[2].x) < 0.0500000007450580596923828125f; + bool _399; + if (_406) + { + _399 = fabs(7.0f - s1.b.columns[2].y) < 0.0500000007450580596923828125f; + } + else + { + _399 = _406; + } + _314 = _399; + } + else + { + _314 = _306; + } + _197 = _314; + } + else + { + _197 = _257; + } + bool _206; + if (_197) + { + _206 = all((int4)(false, true, false, false) == s1.c); + } + else + { + _206 = _197; + } + if (_206) + { + _212[0] += as_uint(1); + } +} + diff --git a/reference/opt/shaders-opencl/comp/shared-matrix-nested-struct-array.comp b/reference/opt/shaders-opencl/comp/shared-matrix-nested-struct-array.comp new file mode 100644 index 000000000..33748669a --- /dev/null +++ b/reference/opt/shaders-opencl/comp/shared-matrix-nested-struct-array.comp @@ -0,0 +1,463 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float2 columns[2]; } spvMat2; +typedef struct { float3 columns[2]; } spvMat2x3; +typedef struct { float2 columns[3]; } spvMat3x2; +typedef struct { float3 columns[4]; } spvMat4x3; + +struct sA +{ + spvMat2x3 mA; +}; + +typedef struct sA sA; + +struct sB +{ + spvMat2 mA; + spvMat3x2 mB; + uint3 mC; +}; + +typedef struct sB sB; + +struct sC +{ + sA mA; + sB mB; +}; + +typedef struct sC sC; + +struct sD +{ + sC mA; +}; + +typedef struct sD sD; + +struct sE +{ + spvMat3x2 mA; + spvMat4x3 mB; +}; + +typedef struct sE sE; + +struct sF +{ + sE mA; +}; + +typedef struct sF sF; + +struct sG +{ + sF mA; +}; + +typedef struct sG sG; + +struct sH +{ + int3 mA[2]; +}; + +typedef struct sH sH; + +struct S1 +{ + sD a; + sG b; + sH c[2]; +}; + +typedef struct S1 S1; + +struct block +{ + uint passed; +}; + +typedef struct block block; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _424) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local S1 s1; + s1.a.mA.mA.mA = (spvMat2x3){ { (float3)(6.0f, 8.0f, 8.0f), (float3)(0.0f, -4.0f, -5.0f) } }; + s1.a.mA.mB.mA = (spvMat2){ { (float2)(9.0f, -4.0f), (float2)(-6.0f, -1.0f) } }; + s1.a.mA.mB.mB = (spvMat3x2){ { (float2)(-1.0f, -2.0f), (float2)(1.0f, 6.0f), (float2)(5.0f, 7.0f) } }; + s1.a.mA.mB.mC = (uint3)(3u, 1u, 5u); + s1.b.mA.mA.mA = (spvMat3x2){ { (float2)(8.0f, 3.0f), (float2)(0.0f, 2.0f), (float2)(1.0f, 8.0f) } }; + s1.b.mA.mA.mB = (spvMat4x3){ { (float3)(0.0f, 9.0f, -1.0f), (float3)(-1.0f, -7.0f, 7.0f), (float3)(-4.0f, -3.0f, 1.0f), (float3)(-4.0f, -9.0f, 1.0f) } }; + s1.c[0].mA[0] = (int3)(true, false, false); + s1.c[0].mA[1] = (int3)(true, false, false); + s1.c[1].mA[0] = (int3)(false); + s1.c[1].mA[1] = (int3)(false); + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + bool _484 = fabs(6.0f - s1.a.mA.mA.mA.columns[0].x) < 0.0500000007450580596923828125f; + bool _469; + if (_484) + { + _469 = fabs(8.0f - s1.a.mA.mA.mA.columns[0].y) < 0.0500000007450580596923828125f; + } + else + { + _469 = _484; + } + bool _477; + if (_469) + { + _477 = fabs(8.0f - s1.a.mA.mA.mA.columns[0].z) < 0.0500000007450580596923828125f; + } + else + { + _477 = _469; + } + bool _448; + if (_477) + { + bool _534 = fabs(-s1.a.mA.mA.mA.columns[1].x) < 0.0500000007450580596923828125f; + bool _519; + if (_534) + { + _519 = fabs((-4.0f) - s1.a.mA.mA.mA.columns[1].y) < 0.0500000007450580596923828125f; + } + else + { + _519 = _534; + } + bool _527; + if (_519) + { + _527 = fabs((-5.0f) - s1.a.mA.mA.mA.columns[1].z) < 0.0500000007450580596923828125f; + } + else + { + _527 = _519; + } + _448 = _527; + } + else + { + _448 = _477; + } + bool _346; + if (_448) + { + bool _593 = fabs(9.0f - s1.a.mA.mB.mA.columns[0].x) < 0.0500000007450580596923828125f; + bool _586; + if (_593) + { + _586 = fabs((-4.0f) - s1.a.mA.mB.mA.columns[0].y) < 0.0500000007450580596923828125f; + } + else + { + _586 = _593; + } + bool _567; + if (_586) + { + bool _626 = fabs((-6.0f) - s1.a.mA.mB.mA.columns[1].x) < 0.0500000007450580596923828125f; + bool _619; + if (_626) + { + _619 = fabs((-1.0f) - s1.a.mA.mB.mA.columns[1].y) < 0.0500000007450580596923828125f; + } + else + { + _619 = _626; + } + _567 = _619; + } + else + { + _567 = _586; + } + _346 = _567; + } + else + { + _346 = _448; + } + bool _355; + if (_346) + { + bool _688 = fabs((-1.0f) - s1.a.mA.mB.mB.columns[0].x) < 0.0500000007450580596923828125f; + bool _681; + if (_688) + { + _681 = fabs((-2.0f) - s1.a.mA.mB.mB.columns[0].y) < 0.0500000007450580596923828125f; + } + else + { + _681 = _688; + } + bool _654; + if (_681) + { + bool _721 = fabs(1.0f - s1.a.mA.mB.mB.columns[1].x) < 0.0500000007450580596923828125f; + bool _714; + if (_721) + { + _714 = fabs(6.0f - s1.a.mA.mB.mB.columns[1].y) < 0.0500000007450580596923828125f; + } + else + { + _714 = _721; + } + _654 = _714; + } + else + { + _654 = _681; + } + bool _662; + if (_654) + { + bool _754 = fabs(5.0f - s1.a.mA.mB.mB.columns[2].x) < 0.0500000007450580596923828125f; + bool _747; + if (_754) + { + _747 = fabs(7.0f - s1.a.mA.mB.mB.columns[2].y) < 0.0500000007450580596923828125f; + } + else + { + _747 = _754; + } + _662 = _747; + } + else + { + _662 = _654; + } + _355 = _662; + } + else + { + _355 = _346; + } + bool _364; + if (_355) + { + _364 = all((uint3)(3u, 1u, 5u) == s1.a.mA.mB.mC); + } + else + { + _364 = _355; + } + bool _373; + if (_364) + { + bool _822 = fabs(8.0f - s1.b.mA.mA.mA.columns[0].x) < 0.0500000007450580596923828125f; + bool _815; + if (_822) + { + _815 = fabs(3.0f - s1.b.mA.mA.mA.columns[0].y) < 0.0500000007450580596923828125f; + } + else + { + _815 = _822; + } + bool _788; + if (_815) + { + bool _855 = fabs(-s1.b.mA.mA.mA.columns[1].x) < 0.0500000007450580596923828125f; + bool _848; + if (_855) + { + _848 = fabs(2.0f - s1.b.mA.mA.mA.columns[1].y) < 0.0500000007450580596923828125f; + } + else + { + _848 = _855; + } + _788 = _848; + } + else + { + _788 = _815; + } + bool _796; + if (_788) + { + bool _888 = fabs(1.0f - s1.b.mA.mA.mA.columns[2].x) < 0.0500000007450580596923828125f; + bool _881; + if (_888) + { + _881 = fabs(8.0f - s1.b.mA.mA.mA.columns[2].y) < 0.0500000007450580596923828125f; + } + else + { + _881 = _888; + } + _796 = _881; + } + else + { + _796 = _788; + } + _373 = _796; + } + else + { + _373 = _364; + } + bool _382; + if (_373) + { + bool _970 = fabs(-s1.b.mA.mA.mB.columns[0].x) < 0.0500000007450580596923828125f; + bool _955; + if (_970) + { + _955 = fabs(9.0f - s1.b.mA.mA.mB.columns[0].y) < 0.0500000007450580596923828125f; + } + else + { + _955 = _970; + } + bool _963; + if (_955) + { + _963 = fabs((-1.0f) - s1.b.mA.mA.mB.columns[0].z) < 0.0500000007450580596923828125f; + } + else + { + _963 = _955; + } + bool _918; + if (_963) + { + bool _1020 = fabs((-1.0f) - s1.b.mA.mA.mB.columns[1].x) < 0.0500000007450580596923828125f; + bool _1005; + if (_1020) + { + _1005 = fabs((-7.0f) - s1.b.mA.mA.mB.columns[1].y) < 0.0500000007450580596923828125f; + } + else + { + _1005 = _1020; + } + bool _1013; + if (_1005) + { + _1013 = fabs(7.0f - s1.b.mA.mA.mB.columns[1].z) < 0.0500000007450580596923828125f; + } + else + { + _1013 = _1005; + } + _918 = _1013; + } + else + { + _918 = _963; + } + bool _926; + if (_918) + { + bool _1070 = fabs((-4.0f) - s1.b.mA.mA.mB.columns[2].x) < 0.0500000007450580596923828125f; + bool _1055; + if (_1070) + { + _1055 = fabs((-3.0f) - s1.b.mA.mA.mB.columns[2].y) < 0.0500000007450580596923828125f; + } + else + { + _1055 = _1070; + } + bool _1063; + if (_1055) + { + _1063 = fabs(1.0f - s1.b.mA.mA.mB.columns[2].z) < 0.0500000007450580596923828125f; + } + else + { + _1063 = _1055; + } + _926 = _1063; + } + else + { + _926 = _918; + } + bool _934; + if (_926) + { + bool _1120 = fabs((-4.0f) - s1.b.mA.mA.mB.columns[3].x) < 0.0500000007450580596923828125f; + bool _1105; + if (_1120) + { + _1105 = fabs((-9.0f) - s1.b.mA.mA.mB.columns[3].y) < 0.0500000007450580596923828125f; + } + else + { + _1105 = _1120; + } + bool _1113; + if (_1105) + { + _1113 = fabs(1.0f - s1.b.mA.mA.mB.columns[3].z) < 0.0500000007450580596923828125f; + } + else + { + _1113 = _1105; + } + _934 = _1113; + } + else + { + _934 = _926; + } + _382 = _934; + } + else + { + _382 = _373; + } + bool _391; + if (_382) + { + _391 = all((int3)(true, false, false) == s1.c[0].mA[0]); + } + else + { + _391 = _382; + } + bool _400; + if (_391) + { + _400 = all((int3)(true, false, false) == s1.c[0].mA[1]); + } + else + { + _400 = _391; + } + bool _409; + if (_400) + { + _409 = all((int3)(false) == s1.c[1].mA[0]); + } + else + { + _409 = _400; + } + bool _418; + if (_409) + { + _418 = all((int3)(false) == s1.c[1].mA[1]); + } + else + { + _418 = _409; + } + if (_418) + { + _424[0] += as_uint(1); + } +} + diff --git a/reference/opt/shaders-opencl/comp/shared-matrix-nested-struct.comp b/reference/opt/shaders-opencl/comp/shared-matrix-nested-struct.comp new file mode 100644 index 000000000..5440da2f1 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/shared-matrix-nested-struct.comp @@ -0,0 +1,583 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float2 columns[2]; } spvMat2; +typedef struct { float2 columns[3]; } spvMat3x2; +typedef struct { float4 columns[4]; } spvMat4; + +struct S1 +{ + uint a; + float4 b; +}; + +typedef struct S1 S1; + +struct sA +{ + spvMat4 mA; + int3 mB; + int4 mC; +}; + +typedef struct sA sA; + +struct sB +{ + int2 mA; +}; + +typedef struct sB sB; + +struct sC +{ + float mA; + uint4 mB; + float mC; +}; + +typedef struct sC sC; + +struct sD +{ + sA mA; + sB mB; + sC mC; +}; + +typedef struct sD sD; + +struct sE +{ + sD mA; +}; + +typedef struct sE sE; + +struct sF +{ + uint3 mA; + bool mB; +}; + +typedef struct sF sF; + +struct sG +{ + sF mA; + spvMat3x2 mB; +}; + +typedef struct sG sG; + +struct sH +{ + sG mA; + float2 mB; +}; + +typedef struct sH sH; + +struct sI +{ + spvMat2 mA; + int3 mB; + int4 mC; +}; + +typedef struct sI sI; + +struct sJ +{ + sI mA; + int3 mB; +}; + +typedef struct sJ sJ; + +struct sK +{ + int2 mA; + sJ mB; + int2 mC; +}; + +typedef struct sK sK; + +struct S2 +{ + sE a; + int3 b; + sH c; + sK d; +}; + +typedef struct S2 S2; + +struct block +{ + uint passed; +}; + +typedef struct block block; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _612) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local S1 s1; + __local S2 s2; + s1.a = 0u; + s1.b = (float4)(8.0f, 8.0f, 0.0f, -4.0f); + s2.a.mA.mA.mA = (spvMat4){ { (float4)(-5.0f, 9.0f, -4.0f, -6.0f), (float4)(-1.0f, -1.0f, -2.0f, 1.0f), (float4)(6.0f, 5.0f, 7.0f, -2.0f), (float4)(-4.0f, -9.0f, 8.0f, 3.0f) } }; + s2.a.mA.mA.mB = (int3)(true, false, false); + s2.a.mA.mA.mC = (int4)(true, true, true, false); + s2.a.mA.mB.mA = (int2)(true); + s2.a.mA.mC.mA = 7.0f; + s2.a.mA.mC.mB = (uint4)(8u, 6u, 2u, 0u); + s2.a.mA.mC.mC = -9.0f; + s2.b = (int3)(1, -4, 0); + s2.c.mA.mA.mA = (uint3)(4u, 9u, 1u); + s2.c.mA.mA.mB = false; + s2.c.mA.mB = (spvMat3x2){ { (float2)(3.0f, -5.0f), (float2)(-1.0f, -5.0f), (float2)(-1.0f, -9.0f) } }; + s2.c.mB = (float2)(-6.0f, -9.0f); + s2.d.mA = (int2)(true, false); + s2.d.mB.mA.mA = (spvMat2){ { (float2)(-2.0f, 3.0f), (float2)(7.0f, 2.0f) } }; + s2.d.mB.mA.mB = (int3)(false); + s2.d.mB.mA.mC = (int4)(false, false, false, true); + s2.d.mB.mB = (int3)(true, false, false); + s2.d.mC = (int2)(-9, 0); + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + bool _622 = 0u == s1.a; + bool _444; + if (_622) + { + bool _668 = fabs(8.0f - s1.b.x) < 0.0500000007450580596923828125f; + bool _645; + if (_668) + { + _645 = fabs(8.0f - s1.b.y) < 0.0500000007450580596923828125f; + } + else + { + _645 = _668; + } + bool _653; + if (_645) + { + _653 = fabs(-s1.b.z) < 0.0500000007450580596923828125f; + } + else + { + _653 = _645; + } + bool _661; + if (_653) + { + _661 = fabs((-4.0f) - s1.b.w) < 0.0500000007450580596923828125f; + } + else + { + _661 = _653; + } + _444 = _661; + } + else + { + _444 = _622; + } + bool _453; + if (_444) + { + bool _774 = fabs((-5.0f) - s2.a.mA.mA.mA.columns[0].x) < 0.0500000007450580596923828125f; + bool _751; + if (_774) + { + _751 = fabs(9.0f - s2.a.mA.mA.mA.columns[0].y) < 0.0500000007450580596923828125f; + } + else + { + _751 = _774; + } + bool _759; + if (_751) + { + _759 = fabs((-4.0f) - s2.a.mA.mA.mA.columns[0].z) < 0.0500000007450580596923828125f; + } + else + { + _759 = _751; + } + bool _767; + if (_759) + { + _767 = fabs((-6.0f) - s2.a.mA.mA.mA.columns[0].w) < 0.0500000007450580596923828125f; + } + else + { + _767 = _759; + } + bool _712; + if (_767) + { + bool _841 = fabs((-1.0f) - s2.a.mA.mA.mA.columns[1].x) < 0.0500000007450580596923828125f; + bool _818; + if (_841) + { + _818 = fabs((-1.0f) - s2.a.mA.mA.mA.columns[1].y) < 0.0500000007450580596923828125f; + } + else + { + _818 = _841; + } + bool _826; + if (_818) + { + _826 = fabs((-2.0f) - s2.a.mA.mA.mA.columns[1].z) < 0.0500000007450580596923828125f; + } + else + { + _826 = _818; + } + bool _834; + if (_826) + { + _834 = fabs(1.0f - s2.a.mA.mA.mA.columns[1].w) < 0.0500000007450580596923828125f; + } + else + { + _834 = _826; + } + _712 = _834; + } + else + { + _712 = _767; + } + bool _720; + if (_712) + { + bool _908 = fabs(6.0f - s2.a.mA.mA.mA.columns[2].x) < 0.0500000007450580596923828125f; + bool _885; + if (_908) + { + _885 = fabs(5.0f - s2.a.mA.mA.mA.columns[2].y) < 0.0500000007450580596923828125f; + } + else + { + _885 = _908; + } + bool _893; + if (_885) + { + _893 = fabs(7.0f - s2.a.mA.mA.mA.columns[2].z) < 0.0500000007450580596923828125f; + } + else + { + _893 = _885; + } + bool _901; + if (_893) + { + _901 = fabs((-2.0f) - s2.a.mA.mA.mA.columns[2].w) < 0.0500000007450580596923828125f; + } + else + { + _901 = _893; + } + _720 = _901; + } + else + { + _720 = _712; + } + bool _728; + if (_720) + { + bool _975 = fabs((-4.0f) - s2.a.mA.mA.mA.columns[3].x) < 0.0500000007450580596923828125f; + bool _952; + if (_975) + { + _952 = fabs((-9.0f) - s2.a.mA.mA.mA.columns[3].y) < 0.0500000007450580596923828125f; + } + else + { + _952 = _975; + } + bool _960; + if (_952) + { + _960 = fabs(8.0f - s2.a.mA.mA.mA.columns[3].z) < 0.0500000007450580596923828125f; + } + else + { + _960 = _952; + } + bool _968; + if (_960) + { + _968 = fabs(3.0f - s2.a.mA.mA.mA.columns[3].w) < 0.0500000007450580596923828125f; + } + else + { + _968 = _960; + } + _728 = _968; + } + else + { + _728 = _720; + } + _453 = _728; + } + else + { + _453 = _444; + } + bool _462; + if (_453) + { + _462 = all((int3)(true, false, false) == s2.a.mA.mA.mB); + } + else + { + _462 = _453; + } + bool _471; + if (_462) + { + _471 = all((int4)(true, true, true, false) == s2.a.mA.mA.mC); + } + else + { + _471 = _462; + } + bool _480; + if (_471) + { + _480 = all((int2)(true) == s2.a.mA.mB.mA); + } + else + { + _480 = _471; + } + bool _489; + if (_480) + { + _489 = fabs(7.0f - s2.a.mA.mC.mA) < 0.0500000007450580596923828125f; + } + else + { + _489 = _480; + } + bool _498; + if (_489) + { + _498 = all((uint4)(8u, 6u, 2u, 0u) == s2.a.mA.mC.mB); + } + else + { + _498 = _489; + } + bool _507; + if (_498) + { + _507 = fabs((-9.0f) - s2.a.mA.mC.mC) < 0.0500000007450580596923828125f; + } + else + { + _507 = _498; + } + bool _516; + if (_507) + { + _516 = all((int3)(1, -4, 0) == s2.b); + } + else + { + _516 = _507; + } + bool _525; + if (_516) + { + _525 = all((uint3)(4u, 9u, 1u) == s2.c.mA.mA.mA); + } + else + { + _525 = _516; + } + bool _534; + if (_525) + { + _534 = false == s2.c.mA.mA.mB; + } + else + { + _534 = _525; + } + bool _543; + if (_534) + { + bool _1106 = fabs(3.0f - s2.c.mA.mB.columns[0].x) < 0.0500000007450580596923828125f; + bool _1099; + if (_1106) + { + _1099 = fabs((-5.0f) - s2.c.mA.mB.columns[0].y) < 0.0500000007450580596923828125f; + } + else + { + _1099 = _1106; + } + bool _1072; + if (_1099) + { + bool _1139 = fabs((-1.0f) - s2.c.mA.mB.columns[1].x) < 0.0500000007450580596923828125f; + bool _1132; + if (_1139) + { + _1132 = fabs((-5.0f) - s2.c.mA.mB.columns[1].y) < 0.0500000007450580596923828125f; + } + else + { + _1132 = _1139; + } + _1072 = _1132; + } + else + { + _1072 = _1099; + } + bool _1080; + if (_1072) + { + bool _1172 = fabs((-1.0f) - s2.c.mA.mB.columns[2].x) < 0.0500000007450580596923828125f; + bool _1165; + if (_1172) + { + _1165 = fabs((-9.0f) - s2.c.mA.mB.columns[2].y) < 0.0500000007450580596923828125f; + } + else + { + _1165 = _1172; + } + _1080 = _1165; + } + else + { + _1080 = _1072; + } + _543 = _1080; + } + else + { + _543 = _534; + } + bool _552; + if (_543) + { + bool _1205 = fabs((-6.0f) - s2.c.mB.x) < 0.0500000007450580596923828125f; + bool _1198; + if (_1205) + { + _1198 = fabs((-9.0f) - s2.c.mB.y) < 0.0500000007450580596923828125f; + } + else + { + _1198 = _1205; + } + _552 = _1198; + } + else + { + _552 = _543; + } + bool _561; + if (_552) + { + _561 = all((int2)(true, false) == s2.d.mA); + } + else + { + _561 = _552; + } + bool _570; + if (_561) + { + bool _1263 = fabs((-2.0f) - s2.d.mB.mA.mA.columns[0].x) < 0.0500000007450580596923828125f; + bool _1256; + if (_1263) + { + _1256 = fabs(3.0f - s2.d.mB.mA.mA.columns[0].y) < 0.0500000007450580596923828125f; + } + else + { + _1256 = _1263; + } + bool _1237; + if (_1256) + { + bool _1296 = fabs(7.0f - s2.d.mB.mA.mA.columns[1].x) < 0.0500000007450580596923828125f; + bool _1289; + if (_1296) + { + _1289 = fabs(2.0f - s2.d.mB.mA.mA.columns[1].y) < 0.0500000007450580596923828125f; + } + else + { + _1289 = _1296; + } + _1237 = _1289; + } + else + { + _1237 = _1256; + } + _570 = _1237; + } + else + { + _570 = _561; + } + bool _579; + if (_570) + { + _579 = all((int3)(false) == s2.d.mB.mA.mB); + } + else + { + _579 = _570; + } + bool _588; + if (_579) + { + _588 = all((int4)(false, false, false, true) == s2.d.mB.mA.mC); + } + else + { + _588 = _579; + } + bool _597; + if (_588) + { + _597 = all((int3)(true, false, false) == s2.d.mB.mB); + } + else + { + _597 = _588; + } + bool _606; + if (_597) + { + _606 = all((int2)(-9, 0) == s2.d.mC); + } + else + { + _606 = _597; + } + if (_606) + { + _612[0] += as_uint(1); + } +} + diff --git a/reference/opt/shaders-opencl/comp/shared-std450.double.comp b/reference/opt/shaders-opencl/comp/shared-std450.fp64.comp similarity index 100% rename from reference/opt/shaders-opencl/comp/shared-std450.double.comp rename to reference/opt/shaders-opencl/comp/shared-std450.fp64.comp diff --git a/reference/opt/shaders-opencl/comp/struct-layout.comp b/reference/opt/shaders-opencl/comp/struct-layout.comp index 39cabe2a8..b2df43cd3 100644 --- a/reference/opt/shaders-opencl/comp/struct-layout.comp +++ b/reference/opt/shaders-opencl/comp/struct-layout.comp @@ -1,9 +1,11 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) +typedef struct { float4 columns[4]; } spvMat4; + struct Foo { - float4 m; + spvMat4 m; }; typedef struct Foo Foo; @@ -22,10 +24,25 @@ struct SSBO typedef struct SSBO SSBO; +static float4 spvMulMat4Vec4(spvMat4 m, float4 v) +{ + return m.columns[0] * v.x + m.columns[1] * v.y + m.columns[2] * v.z + m.columns[3] * v.w; +} + +static spvMat4 spvMulMat4Mat4(spvMat4 a, spvMat4 b) +{ + spvMat4 r; + r.columns[0] = spvMulMat4Vec4(a, b.columns[0]); + r.columns[1] = spvMulMat4Vec4(a, b.columns[1]); + r.columns[2] = spvMulMat4Vec4(a, b.columns[2]); + r.columns[3] = spvMulMat4Vec4(a, b.columns[3]); + return r; +} + __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global Foo* _23, __global const Foo* _30) { uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); - _23[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].m = _30[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].m * _30[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].m; + _23[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].m = spvMulMat4Mat4(_30[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].m, _30[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].m); } diff --git a/reference/opt/shaders-opencl/comp/struct-packing.comp b/reference/opt/shaders-opencl/comp/struct-packing.comp new file mode 100644 index 000000000..3f0a147f0 --- /dev/null +++ b/reference/opt/shaders-opencl/comp/struct-packing.comp @@ -0,0 +1,125 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float2 columns[2]; } spvMat2; +typedef struct { float3 columns[2]; } spvMat2x3; +typedef struct { float2 columns[3]; } spvMat3x2; + +struct S0 +{ + float2 a[1]; + float b; +}; + +typedef struct S0 S0; + +struct S1 +{ + float3 a; + float b; +}; + +typedef struct S1 S1; + +struct S2 +{ + float3 a[1]; + float b; +}; + +typedef struct S2 S2; + +struct S3 +{ + float2 a; + float b; +}; + +typedef struct S3 S3; + +struct S4 +{ + float2 c; +}; + +typedef struct S4 S4; + +struct Content +{ + S0 m0s[1]; + S1 m1s[1]; + S2 m2s[1]; + S0 m0; + S1 m1; + S2 m2; + S3 m3; + float m4; + S4 m3s[8]; +}; + +typedef struct Content Content; + +struct SSBO1 +{ + Content content; + Content content1[2]; + Content content2; + spvMat2 m0; + spvMat2 m1; + spvMat2x3 m2[4]; + spvMat3x2 m3; + spvMat2 m4; + spvMat2 m5[9]; + spvMat3x2 m6[4][2]; + spvMat2x3 m7; + float array[1]; +}; + +typedef struct SSBO1 SSBO1; + +struct SSBO0 +{ + Content content; + Content content1[2]; + Content content2; + float array[1]; +}; + +typedef struct SSBO0 SSBO0; + +static float3 spvMulVec2Mat3x2(float2 v, spvMat3x2 m) +{ + return (float3)(dot(v, m.columns[0]), dot(v, m.columns[1]), dot(v, m.columns[2])); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO1* ssbo_430, __global SSBO0* ssbo_140) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + Content _60 = ssbo_140->content; + ssbo_430->content.m0s[0].a[0] = _60.m0s[0].a[0]; + ssbo_430->content.m0s[0].b = _60.m0s[0].b; + ssbo_430->content.m1s[0].a = _60.m1s[0].a; + ssbo_430->content.m1s[0].b = _60.m1s[0].b; + ssbo_430->content.m2s[0].a[0] = _60.m2s[0].a[0]; + ssbo_430->content.m2s[0].b = _60.m2s[0].b; + ssbo_430->content.m0.a[0] = _60.m0.a[0]; + ssbo_430->content.m0.b = _60.m0.b; + ssbo_430->content.m1.a = _60.m1.a; + ssbo_430->content.m1.b = _60.m1.b; + ssbo_430->content.m2.a[0] = _60.m2.a[0]; + ssbo_430->content.m2.b = _60.m2.b; + ssbo_430->content.m3.a = _60.m3.a; + ssbo_430->content.m3.b = _60.m3.b; + ssbo_430->content[7] = _60.m4; + ssbo_430->content.m3s[0].c = _60.m3s[0].c; + ssbo_430->content.m3s[1].c = _60.m3s[1].c; + ssbo_430->content.m3s[2].c = _60.m3s[2].c; + ssbo_430->content.m3s[3].c = _60.m3s[3].c; + ssbo_430->content.m3s[4].c = _60.m3s[4].c; + ssbo_430->content.m3s[5].c = _60.m3s[5].c; + ssbo_430->content.m3s[6].c = _60.m3s[6].c; + ssbo_430->content.m3s[7].c = _60.m3s[7].c; + ssbo_430->content.m1.a = spvMulVec2Mat3x2(ssbo_430->content.m3.a, ssbo_430->m6[1][1]); +} + diff --git a/reference/shaders-opencl/asm/comp/relaxed-block-layout.fp16.asm.comp b/reference/shaders-opencl/asm/comp/relaxed-block-layout.fp16.asm.comp new file mode 100644 index 000000000..a8926a145 --- /dev/null +++ b/reference/shaders-opencl/asm/comp/relaxed-block-layout.fp16.asm.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +struct foo +{ + uint bar; + float3 baz; + uchar quux; + uchar4 blah; + half2 wibble; +}; + +typedef struct foo foo; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global foo* _10) +{ + _10->bar = ((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))).x; + _10->baz = convert_float3(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2)))); + _10->blah = convert_uchar4((uint4)(convert_uint4(_10->blah).xyz + ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))), 0u)); + _10->wibble = convert_half2(convert_float2(_10->wibble) * convert_float2(((uint3)(get_num_groups(0), get_num_groups(1), get_num_groups(2))).xy)); +} + diff --git a/reference/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp b/reference/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp index 5bcad0013..6113e59a9 100644 --- a/reference/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp +++ b/reference/shaders-opencl/asm/comp/replicated-composites.spv16.asm.comp @@ -1,6 +1,8 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) +typedef struct { float4 columns[4]; } spvMat4; + #ifndef SPIRV_CROSS_CONSTANT_ID_0 #define SPIRV_CROSS_CONSTANT_ID_0 0.0f #endif @@ -19,10 +21,10 @@ __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(UBO ubo) { float4 a = (float4)(0.0f); - float4 b = (float4)(1.0f); + spvMat4 b = (spvMat4){ { (float4)(1.0f), (float4)(1.0f), (float4)(1.0f), (float4)(1.0f) } }; float4 c = _20; float4 d = (float4)(ubo.uniform_float); - float4 e = d; + spvMat4 e = (spvMat4){ { d, d, d, d } }; float f[8] = {ubo.uniform_float, ubo.uniform_float, ubo.uniform_float, ubo.uniform_float, ubo.uniform_float, ubo.uniform_float, ubo.uniform_float, ubo.uniform_float}; } diff --git a/reference/shaders-opencl/comp/dowhile.comp b/reference/shaders-opencl/comp/dowhile.comp index 2dca8bcda..1e518a628 100644 --- a/reference/shaders-opencl/comp/dowhile.comp +++ b/reference/shaders-opencl/comp/dowhile.comp @@ -1,9 +1,11 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) +typedef struct { float4 columns[4]; } spvMat4; + struct SSBO { - float4 mvp; + spvMat4 mvp; float4 in_data[1]; }; @@ -16,19 +18,24 @@ struct SSBO2 typedef struct SSBO2 SSBO2; +static float4 spvMulMat4Vec4(spvMat4 m, float4 v) +{ + return m.columns[0] * v.x + m.columns[1] * v.y + m.columns[2] * v.z + m.columns[3] * v.w; +} + __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global const SSBO* _28, __global float4* _52) { uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); - int i; - uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; - i = 0; - float4 idat = _28->in_data[ident]; + int i_1; + uint ident_1 = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + i_1 = 0; + float4 idat_1 = _28->in_data[ident_1]; do { - idat = _28->mvp * idat; - i++; - } while (i < 16); - _52[ident] = idat; + idat_1 = spvMulMat4Vec4(_28->mvp, idat_1); + i_1++; + } while (i_1 < 16); + _52[ident_1] = idat_1; } diff --git a/reference/shaders-opencl/comp/inverse.comp b/reference/shaders-opencl/comp/inverse.comp new file mode 100644 index 000000000..3db4ff542 --- /dev/null +++ b/reference/shaders-opencl/comp/inverse.comp @@ -0,0 +1,53 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float2 columns[2]; } spvMat2; +typedef struct { float3 columns[3]; } spvMat3; +typedef struct { float4 columns[4]; } spvMat4; + +struct MatrixOut +{ + spvMat2 m2out; + spvMat3 m3out; + spvMat4 m4out; +}; + +typedef struct MatrixOut MatrixOut; + +struct MatrixIn +{ + spvMat2 m2in; + spvMat3 m3in; + spvMat4 m4in; +}; + +typedef struct MatrixIn MatrixIn; + +static spvMat2 spvInverse2(spvMat2 m) { + float d = 1.0f / (m.columns[0].x * m.columns[1].y - m.columns[1].x * m.columns[0].y); + return (spvMat2){ { (float2)(m.columns[1].y * d, -m.columns[0].y * d), (float2)(-m.columns[1].x * d, m.columns[0].x * d) } }; +} + +static spvMat3 spvInverse3(spvMat3 m) { + float3 t = (float3)(m.columns[1].y * m.columns[2].z - m.columns[1].z * m.columns[2].y, m.columns[1].z * m.columns[2].x - m.columns[1].x * m.columns[2].z, m.columns[1].x * m.columns[2].y - m.columns[1].y * m.columns[2].x); + float d = 1.0f / dot(m.columns[0], t); + return (spvMat3){ { t * d, (float3)(m.columns[0].z * m.columns[2].y - m.columns[0].y * m.columns[2].z, m.columns[0].x * m.columns[2].z - m.columns[0].z * m.columns[2].x, m.columns[0].y * m.columns[2].x - m.columns[0].x * m.columns[2].y) * d, (float3)(m.columns[0].y * m.columns[1].z - m.columns[0].z * m.columns[1].y, m.columns[0].z * m.columns[1].x - m.columns[0].x * m.columns[1].z, m.columns[0].x * m.columns[1].y - m.columns[0].y * m.columns[1].x) * d } }; +} + +static spvMat4 spvInverse4(spvMat4 m) { + float4 t = (float4)(m.columns[2].y * m.columns[3].z * m.columns[1].w - m.columns[3].y * m.columns[2].z * m.columns[1].w + m.columns[3].y * m.columns[1].z * m.columns[2].w - m.columns[1].y * m.columns[3].z * m.columns[2].w - m.columns[2].y * m.columns[1].z * m.columns[3].w + m.columns[1].y * m.columns[2].z * m.columns[3].w, m.columns[3].x * m.columns[2].z * m.columns[1].w - m.columns[2].x * m.columns[3].z * m.columns[1].w - m.columns[3].x * m.columns[1].z * m.columns[2].w + m.columns[1].x * m.columns[3].z * m.columns[2].w + m.columns[2].x * m.columns[1].z * m.columns[3].w - m.columns[1].x * m.columns[2].z * m.columns[3].w, m.columns[2].x * m.columns[3].y * m.columns[1].w - m.columns[3].x * m.columns[2].y * m.columns[1].w + m.columns[3].x * m.columns[1].y * m.columns[2].w - m.columns[1].x * m.columns[3].y * m.columns[2].w - m.columns[2].x * m.columns[1].y * m.columns[3].w + m.columns[1].x * m.columns[2].y * m.columns[3].w, m.columns[3].x * m.columns[2].y * m.columns[1].z - m.columns[2].x * m.columns[3].y * m.columns[1].z - m.columns[3].x * m.columns[1].y * m.columns[2].z + m.columns[1].x * m.columns[3].y * m.columns[2].z + m.columns[2].x * m.columns[1].y * m.columns[3].z - m.columns[1].x * m.columns[2].y * m.columns[3].z); + spvMat4 r = (spvMat4){ { (float4)(t.x, m.columns[3].y * m.columns[2].z * m.columns[0].w - m.columns[2].y * m.columns[3].z * m.columns[0].w - m.columns[3].y * m.columns[0].z * m.columns[2].w + m.columns[0].y * m.columns[3].z * m.columns[2].w + m.columns[2].y * m.columns[0].z * m.columns[3].w - m.columns[0].y * m.columns[2].z * m.columns[3].w, m.columns[1].y * m.columns[3].z * m.columns[0].w - m.columns[3].y * m.columns[1].z * m.columns[0].w + m.columns[3].y * m.columns[0].z * m.columns[1].w - m.columns[0].y * m.columns[3].z * m.columns[1].w - m.columns[1].y * m.columns[0].z * m.columns[3].w + m.columns[0].y * m.columns[1].z * m.columns[3].w, m.columns[2].y * m.columns[1].z * m.columns[0].w - m.columns[1].y * m.columns[2].z * m.columns[0].w - m.columns[2].y * m.columns[0].z * m.columns[1].w + m.columns[0].y * m.columns[2].z * m.columns[1].w + m.columns[1].y * m.columns[0].z * m.columns[2].w - m.columns[0].y * m.columns[1].z * m.columns[2].w), (float4)(t.y, m.columns[2].x * m.columns[3].z * m.columns[0].w - m.columns[3].x * m.columns[2].z * m.columns[0].w + m.columns[3].x * m.columns[0].z * m.columns[2].w - m.columns[0].x * m.columns[3].z * m.columns[2].w - m.columns[2].x * m.columns[0].z * m.columns[3].w + m.columns[0].x * m.columns[2].z * m.columns[3].w, m.columns[3].x * m.columns[1].z * m.columns[0].w - m.columns[1].x * m.columns[3].z * m.columns[0].w - m.columns[3].x * m.columns[0].z * m.columns[1].w + m.columns[0].x * m.columns[3].z * m.columns[1].w + m.columns[1].x * m.columns[0].z * m.columns[3].w - m.columns[0].x * m.columns[1].z * m.columns[3].w, m.columns[1].x * m.columns[2].z * m.columns[0].w - m.columns[2].x * m.columns[1].z * m.columns[0].w + m.columns[2].x * m.columns[0].z * m.columns[1].w - m.columns[0].x * m.columns[2].z * m.columns[1].w - m.columns[1].x * m.columns[0].z * m.columns[2].w + m.columns[0].x * m.columns[1].z * m.columns[2].w), (float4)(t.z, m.columns[3].x * m.columns[2].y * m.columns[0].w - m.columns[2].x * m.columns[3].y * m.columns[0].w - m.columns[3].x * m.columns[0].y * m.columns[2].w + m.columns[0].x * m.columns[3].y * m.columns[2].w + m.columns[2].x * m.columns[0].y * m.columns[3].w - m.columns[0].x * m.columns[2].y * m.columns[3].w, m.columns[1].x * m.columns[3].y * m.columns[0].w - m.columns[3].x * m.columns[1].y * m.columns[0].w + m.columns[3].x * m.columns[0].y * m.columns[1].w - m.columns[0].x * m.columns[3].y * m.columns[1].w - m.columns[1].x * m.columns[0].y * m.columns[3].w + m.columns[0].x * m.columns[1].y * m.columns[3].w, m.columns[2].x * m.columns[1].y * m.columns[0].w - m.columns[1].x * m.columns[2].y * m.columns[0].w - m.columns[2].x * m.columns[0].y * m.columns[1].w + m.columns[0].x * m.columns[2].y * m.columns[1].w + m.columns[1].x * m.columns[0].y * m.columns[2].w - m.columns[0].x * m.columns[1].y * m.columns[2].w), (float4)(t.w, m.columns[2].x * m.columns[3].y * m.columns[0].z - m.columns[3].x * m.columns[2].y * m.columns[0].z + m.columns[3].x * m.columns[0].y * m.columns[2].z - m.columns[0].x * m.columns[3].y * m.columns[2].z - m.columns[2].x * m.columns[0].y * m.columns[3].z + m.columns[0].x * m.columns[2].y * m.columns[3].z, m.columns[3].x * m.columns[1].y * m.columns[0].z - m.columns[1].x * m.columns[3].y * m.columns[0].z - m.columns[3].x * m.columns[0].y * m.columns[1].z + m.columns[0].x * m.columns[3].y * m.columns[1].z + m.columns[1].x * m.columns[0].y * m.columns[3].z - m.columns[0].x * m.columns[1].y * m.columns[3].z, m.columns[1].x * m.columns[2].y * m.columns[0].z - m.columns[2].x * m.columns[1].y * m.columns[0].z + m.columns[2].x * m.columns[0].y * m.columns[1].z - m.columns[0].x * m.columns[2].y * m.columns[1].z - m.columns[1].x * m.columns[0].y * m.columns[2].z + m.columns[0].x * m.columns[1].y * m.columns[2].z) } }; + float d = 1.0f / dot(m.columns[0], t); + r.columns[0] *= d; r.columns[1] *= d; r.columns[2] *= d; r.columns[3] *= d; + return r; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global MatrixOut* _15, __global const MatrixIn* _20) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _15->m2out = spvInverse2(_20->m2in); + _15->m3out = spvInverse3(_20->m3in); + _15->m4out = spvInverse4(_20->m4in); +} + diff --git a/reference/shaders-opencl/comp/mat3-row-maj-read-write-const.comp b/reference/shaders-opencl/comp/mat3-row-maj-read-write-const.comp new file mode 100644 index 000000000..eb4beccdd --- /dev/null +++ b/reference/shaders-opencl/comp/mat3-row-maj-read-write-const.comp @@ -0,0 +1,47 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float3 columns[3]; } spvMat3; + +struct model_t +{ + spvMat3 mtx_rm; +}; + +typedef struct model_t model_t; + +static float3 spvMulMat3Vec3(spvMat3 m, float3 v) +{ + return m.columns[0] * v.x + m.columns[1] * v.y + m.columns[2] * v.z; +} + +static spvMat3 spvMulMat3Mat3(spvMat3 a, spvMat3 b) +{ + spvMat3 r; + r.columns[0] = spvMulMat3Vec3(a, b.columns[0]); + r.columns[1] = spvMulMat3Vec3(a, b.columns[1]); + r.columns[2] = spvMulMat3Vec3(a, b.columns[2]); + return r; +} + +static spvMat3 spvTransposeMat3(spvMat3 m) +{ + spvMat3 r; + r.columns[0] = (float3)(m.columns[0].x, m.columns[1].x, m.columns[2].x); + r.columns[1] = (float3)(m.columns[0].y, m.columns[1].y, m.columns[2].y); + r.columns[2] = (float3)(m.columns[0].z, m.columns[1].z, m.columns[2].z); + return r; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global spvMat3* model) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + spvMat3 mtx_cm_1 = spvTransposeMat3(model[0]); + spvMat3 mtx1_1 = spvMulMat3Mat3(mtx_cm_1, (spvMat3){ { (float3)(4.0f, -3.0f, 1.0f), (float3)(-7.0f, 7.0f, -7.0f), (float3)(-5.0f, 6.0f, -8.0f) } }); + if (mtx1_1.columns[0].x != 0.0f) + { + model[0] = spvTransposeMat3((spvMat3){ { (float3)(-5.0f, -3.0f, -5.0f), (float3)(-2.0f, 2.0f, -5.0f), (float3)(6.0f, 3.0f, -8.0f) } }); + } +} + diff --git a/reference/shaders-opencl/comp/mat3.comp b/reference/shaders-opencl/comp/mat3.comp new file mode 100644 index 000000000..12663175a --- /dev/null +++ b/reference/shaders-opencl/comp/mat3.comp @@ -0,0 +1,20 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float3 columns[3]; } spvMat3; + +struct SSBO2 +{ + spvMat3 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global spvMat3* _22) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + _22[ident] = (spvMat3){ { (float3)(10.0f), (float3)(20.0f), (float3)(40.0f) } }; +} + diff --git a/reference/shaders-opencl/comp/outer-product.comp b/reference/shaders-opencl/comp/outer-product.comp index 8441e6d2d..d0ab225fd 100644 --- a/reference/shaders-opencl/comp/outer-product.comp +++ b/reference/shaders-opencl/comp/outer-product.comp @@ -1,17 +1,27 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) +typedef struct { float2 columns[2]; } spvMat2; +typedef struct { float3 columns[2]; } spvMat2x3; +typedef struct { float4 columns[2]; } spvMat2x4; +typedef struct { float2 columns[3]; } spvMat3x2; +typedef struct { float3 columns[3]; } spvMat3; +typedef struct { float4 columns[3]; } spvMat3x4; +typedef struct { float2 columns[4]; } spvMat4x2; +typedef struct { float3 columns[4]; } spvMat4x3; +typedef struct { float4 columns[4]; } spvMat4; + struct SSBO { - float2 m22; - float3 m23; - float4 m24; - float2 m32; - float3 m33; - float4 m34; - float2 m42; - float3 m43; - float4 m44; + spvMat2 m22; + spvMat2x3 m23; + spvMat2x4 m24; + spvMat3x2 m32; + spvMat3 m33; + spvMat3x4 m34; + spvMat4x2 m42; + spvMat4x3 m43; + spvMat4 m44; }; typedef struct SSBO SSBO; @@ -25,18 +35,99 @@ struct ReadSSBO typedef struct ReadSSBO ReadSSBO; +static spvMat2 spvOuterProductVec2Vec2(float2 c, float2 r) +{ + spvMat2 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + return m; +} + +static spvMat2x3 spvOuterProductVec3Vec2(float3 c, float2 r) +{ + spvMat2x3 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + return m; +} + +static spvMat2x4 spvOuterProductVec4Vec2(float4 c, float2 r) +{ + spvMat2x4 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + return m; +} + +static spvMat3x2 spvOuterProductVec2Vec3(float2 c, float3 r) +{ + spvMat3x2 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + m.columns[2] = c * r.z; + return m; +} + +static spvMat3 spvOuterProductVec3Vec3(float3 c, float3 r) +{ + spvMat3 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + m.columns[2] = c * r.z; + return m; +} + +static spvMat3x4 spvOuterProductVec4Vec3(float4 c, float3 r) +{ + spvMat3x4 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + m.columns[2] = c * r.z; + return m; +} + +static spvMat4x2 spvOuterProductVec2Vec4(float2 c, float4 r) +{ + spvMat4x2 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + m.columns[2] = c * r.z; + m.columns[3] = c * r.w; + return m; +} + +static spvMat4x3 spvOuterProductVec3Vec4(float3 c, float4 r) +{ + spvMat4x3 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + m.columns[2] = c * r.z; + m.columns[3] = c * r.w; + return m; +} + +static spvMat4 spvOuterProductVec4Vec4(float4 c, float4 r) +{ + spvMat4 m; + m.columns[0] = c * r.x; + m.columns[1] = c * r.y; + m.columns[2] = c * r.z; + m.columns[3] = c * r.w; + return m; +} + __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global SSBO* _21, __global const ReadSSBO* _26) { uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); - _21->m22 = _26->v2 * _26->v2.x; - _21->m23 = _26->v3 * _26->v2.x; - _21->m24 = _26->v4 * _26->v2.x; - _21->m32 = _26->v2 * _26->v3.x; - _21->m33 = _26->v3 * _26->v3.x; - _21->m34 = _26->v4 * _26->v3.x; - _21->m42 = _26->v2 * _26->v4.x; - _21->m43 = _26->v3 * _26->v4.x; - _21->m44 = _26->v4 * _26->v4.x; + _21->m22 = spvOuterProductVec2Vec2(_26->v2, _26->v2); + _21->m23 = spvOuterProductVec3Vec2(_26->v3, _26->v2); + _21->m24 = spvOuterProductVec4Vec2(_26->v4, _26->v2); + _21->m32 = spvOuterProductVec2Vec3(_26->v2, _26->v3); + _21->m33 = spvOuterProductVec3Vec3(_26->v3, _26->v3); + _21->m34 = spvOuterProductVec4Vec3(_26->v4, _26->v3); + _21->m42 = spvOuterProductVec2Vec4(_26->v2, _26->v4); + _21->m43 = spvOuterProductVec3Vec4(_26->v3, _26->v4); + _21->m44 = spvOuterProductVec4Vec4(_26->v4, _26->v4); } diff --git a/reference/shaders-opencl/comp/rmw-matrix.comp b/reference/shaders-opencl/comp/rmw-matrix.comp new file mode 100644 index 000000000..9fdc47c62 --- /dev/null +++ b/reference/shaders-opencl/comp/rmw-matrix.comp @@ -0,0 +1,41 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float4 columns[4]; } spvMat4; + +struct SSBO +{ + float a; + float4 b; + spvMat4 c; + float a1; + float4 b1; + spvMat4 c1; +}; + +typedef struct SSBO SSBO; + +static float4 spvMulMat4Vec4(spvMat4 m, float4 v) +{ + return m.columns[0] * v.x + m.columns[1] * v.y + m.columns[2] * v.z + m.columns[3] * v.w; +} + +static spvMat4 spvMulMat4Mat4(spvMat4 a, spvMat4 b) +{ + spvMat4 r; + r.columns[0] = spvMulMat4Vec4(a, b.columns[0]); + r.columns[1] = spvMulMat4Vec4(a, b.columns[1]); + r.columns[2] = spvMulMat4Vec4(a, b.columns[2]); + r.columns[3] = spvMulMat4Vec4(a, b.columns[3]); + return r; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _11) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _11->a *= _11->a1; + _11->b *= _11->b1; + _11->c = spvMulMat4Mat4(_11->c, _11->c1); +} + diff --git a/reference/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp b/reference/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp index 59f3fb7ed..c607a22f3 100644 --- a/reference/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp +++ b/reference/shaders-opencl/comp/scalar-std450-distance-length-normalize.comp @@ -17,9 +17,9 @@ __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global SSBO* _9) { uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); - _9->c = distance(_9->a, _9->b); - _9->d = length(_9->a); - _9->e = normalize(_9->a); - _9->f = distance(_9->a - 1.0f, _9->b - 2.0f); + _9->c = fabs(_9->a - _9->b); + _9->d = fabs(_9->a); + _9->e = sign(_9->a); + _9->f = fabs(_9->a - 1.0f - _9->b - 2.0f); } diff --git a/reference/shaders-opencl/comp/shared-matrix-array-of-array.comp b/reference/shaders-opencl/comp/shared-matrix-array-of-array.comp new file mode 100644 index 000000000..f474aede3 --- /dev/null +++ b/reference/shaders-opencl/comp/shared-matrix-array-of-array.comp @@ -0,0 +1,357 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float3 columns[4]; } spvMat4x3; + +struct S1 +{ + spvMat4x3 a[2]; + float b; + float2 c[3]; +}; + +typedef struct S1 S1; + +struct S2 +{ + int4 a; + bool b[3][1][3]; +}; + +typedef struct S2 S2; + +struct block +{ + uint passed; +}; + +typedef struct block block; + +bool compare_float( float* a_6, float* b_6) +{ + return fabs((*a_6) - (*b_6)) < 0.0500000007450580596923828125f; +} + +bool compare_vec3( float3* a_1_1, float3* b_1_1) +{ + float param_50 = (*a_1_1).x; + float param_1_1 = (*b_1_1).x; + bool _85 = compare_float(¶m_50, ¶m_1_1); + bool _95; + if (_85) + { + float param_2_1 = (*a_1_1).y; + float param_3_1 = (*b_1_1).y; + _95 = compare_float(¶m_2_1, ¶m_3_1); + } + else + { + _95 = _85; + } + bool _106; + if (_95) + { + float param_4_1 = (*a_1_1).z; + float param_5_1 = (*b_1_1).z; + _106 = compare_float(¶m_4_1, ¶m_5_1); + } + else + { + _106 = _95; + } + return _106; +} + +bool compare_mat4x3( spvMat4x3* a_2_1, spvMat4x3* b_2_1) +{ + float3 param_6_1 = (*a_2_1).columns[0]; + float3 param_7_1 = (*b_2_1).columns[0]; + bool _116 = compare_vec3(¶m_6_1, ¶m_7_1); + bool _127; + if (_116) + { + float3 param_8_1 = (*a_2_1).columns[1]; + float3 param_9_1 = (*b_2_1).columns[1]; + _127 = compare_vec3(¶m_8_1, ¶m_9_1); + } + else + { + _127 = _116; + } + bool _138; + if (_127) + { + float3 param_10_1 = (*a_2_1).columns[2]; + float3 param_11_1 = (*b_2_1).columns[2]; + _138 = compare_vec3(¶m_10_1, ¶m_11_1); + } + else + { + _138 = _127; + } + bool _149; + if (_138) + { + float3 param_12_1 = (*a_2_1).columns[3]; + float3 param_13_1 = (*b_2_1).columns[3]; + _149 = compare_vec3(¶m_12_1, ¶m_13_1); + } + else + { + _149 = _138; + } + return _149; +} + +bool compare_vec2( float2* a_3_1, float2* b_3_1) +{ + float param_14_1 = (*a_3_1).x; + float param_15_1 = (*b_3_1).x; + bool _65 = compare_float(¶m_14_1, ¶m_15_1); + bool _76; + if (_65) + { + float param_16_1 = (*a_3_1).y; + float param_17_1 = (*b_3_1).y; + _76 = compare_float(¶m_16_1, ¶m_17_1); + } + else + { + _76 = _65; + } + return _76; +} + +bool compare_ivec4( int4* a_4_1, int4* b_4_1) +{ + return all((*a_4_1) == (*b_4_1)); +} + +bool compare_bool( bool* a_5_1, bool* b_5_1) +{ + return (*a_5_1) == (*b_5_1); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _383) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local S1 s1; + __local S2 s2; + s1.a[0] = (spvMat4x3){ { (float3)(0.0f, 2.0f, -8.0f), (float3)(6.0f, 7.0f, 5.0f), (float3)(-6.0f, 1.0f, 9.0f), (float3)(-4.0f, -3.0f, 4.0f) } }; + s1.a[1] = (spvMat4x3){ { (float3)(4.0f, 9.0f, -9.0f), (float3)(-8.0f, -9.0f, 8.0f), (float3)(0.0f, 4.0f, -4.0f), (float3)(7.0f, 2.0f, -1.0f) } }; + s1.b = 7.0f; + s1.c[0] = (float2)(-5.0f, -4.0f); + s1.c[1] = (float2)(3.0f, -5.0f); + s1.c[2] = (float2)(-3.0f, -1.0f); + s2.a = (int4)(1, 0, -3, 1); + s2.b[0][0][0] = true; + s2.b[0][0][1] = false; + s2.b[0][0][2] = false; + s2.b[1][0][0] = true; + s2.b[1][0][1] = false; + s2.b[1][0][2] = true; + s2.b[2][0][0] = false; + s2.b[2][0][1] = true; + s2.b[2][0][2] = true; + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + bool allOk_1 = true; + bool _242; + if (allOk_1) + { + spvMat4x3 param_18_1 = (spvMat4x3){ { (float3)(0.0f, 2.0f, -8.0f), (float3)(6.0f, 7.0f, 5.0f), (float3)(-6.0f, 1.0f, 9.0f), (float3)(-4.0f, -3.0f, 4.0f) } }; + spvMat4x3 param_19_1 = s1.a[0]; + _242 = compare_mat4x3(¶m_18_1, ¶m_19_1); + } + else + { + _242 = allOk_1; + } + allOk_1 = _242; + bool _251; + if (allOk_1) + { + spvMat4x3 param_20_1 = (spvMat4x3){ { (float3)(4.0f, 9.0f, -9.0f), (float3)(-8.0f, -9.0f, 8.0f), (float3)(0.0f, 4.0f, -4.0f), (float3)(7.0f, 2.0f, -1.0f) } }; + spvMat4x3 param_21_1 = s1.a[1]; + _251 = compare_mat4x3(¶m_20_1, ¶m_21_1); + } + else + { + _251 = allOk_1; + } + allOk_1 = _251; + bool _260; + if (allOk_1) + { + float param_22_1 = 7.0f; + float param_23_1 = s1.b; + _260 = compare_float(¶m_22_1, ¶m_23_1); + } + else + { + _260 = allOk_1; + } + allOk_1 = _260; + bool _269; + if (allOk_1) + { + float2 param_24_1 = (float2)(-5.0f, -4.0f); + float2 param_25_1 = s1.c[0]; + _269 = compare_vec2(¶m_24_1, ¶m_25_1); + } + else + { + _269 = allOk_1; + } + allOk_1 = _269; + bool _278; + if (allOk_1) + { + float2 param_26_1 = (float2)(3.0f, -5.0f); + float2 param_27_1 = s1.c[1]; + _278 = compare_vec2(¶m_26_1, ¶m_27_1); + } + else + { + _278 = allOk_1; + } + allOk_1 = _278; + bool _287; + if (allOk_1) + { + float2 param_28_1 = (float2)(-3.0f, -1.0f); + float2 param_29_1 = s1.c[2]; + _287 = compare_vec2(¶m_28_1, ¶m_29_1); + } + else + { + _287 = allOk_1; + } + allOk_1 = _287; + bool _296; + if (allOk_1) + { + int4 param_30_1 = (int4)(1, 0, -3, 1); + int4 param_31_1 = s2.a; + _296 = compare_ivec4(¶m_30_1, ¶m_31_1); + } + else + { + _296 = allOk_1; + } + allOk_1 = _296; + bool _305; + if (allOk_1) + { + bool param_32_1 = true; + bool param_33_1 = s2.b[0][0][0]; + _305 = compare_bool(¶m_32_1, ¶m_33_1); + } + else + { + _305 = allOk_1; + } + allOk_1 = _305; + bool _314; + if (allOk_1) + { + bool param_34_1 = false; + bool param_35_1 = s2.b[0][0][1]; + _314 = compare_bool(¶m_34_1, ¶m_35_1); + } + else + { + _314 = allOk_1; + } + allOk_1 = _314; + bool _323; + if (allOk_1) + { + bool param_36_1 = false; + bool param_37_1 = s2.b[0][0][2]; + _323 = compare_bool(¶m_36_1, ¶m_37_1); + } + else + { + _323 = allOk_1; + } + allOk_1 = _323; + bool _332; + if (allOk_1) + { + bool param_38_1 = true; + bool param_39_1 = s2.b[1][0][0]; + _332 = compare_bool(¶m_38_1, ¶m_39_1); + } + else + { + _332 = allOk_1; + } + allOk_1 = _332; + bool _341; + if (allOk_1) + { + bool param_40_1 = false; + bool param_41_1 = s2.b[1][0][1]; + _341 = compare_bool(¶m_40_1, ¶m_41_1); + } + else + { + _341 = allOk_1; + } + allOk_1 = _341; + bool _350; + if (allOk_1) + { + bool param_42_1 = true; + bool param_43_1 = s2.b[1][0][2]; + _350 = compare_bool(¶m_42_1, ¶m_43_1); + } + else + { + _350 = allOk_1; + } + allOk_1 = _350; + bool _359; + if (allOk_1) + { + bool param_44_1 = false; + bool param_45_1 = s2.b[2][0][0]; + _359 = compare_bool(¶m_44_1, ¶m_45_1); + } + else + { + _359 = allOk_1; + } + allOk_1 = _359; + bool _368; + if (allOk_1) + { + bool param_46_1 = true; + bool param_47_1 = s2.b[2][0][1]; + _368 = compare_bool(¶m_46_1, ¶m_47_1); + } + else + { + _368 = allOk_1; + } + allOk_1 = _368; + bool _377; + if (allOk_1) + { + bool param_48_1 = true; + bool param_49_1 = s2.b[2][0][2]; + _377 = compare_bool(¶m_48_1, ¶m_49_1); + } + else + { + _377 = allOk_1; + } + allOk_1 = _377; + if (allOk_1) + { + _383[0] += as_uint(1); + } +} + diff --git a/reference/shaders-opencl/comp/shared-matrix-cast.comp b/reference/shaders-opencl/comp/shared-matrix-cast.comp new file mode 100644 index 000000000..d5404ae8a --- /dev/null +++ b/reference/shaders-opencl/comp/shared-matrix-cast.comp @@ -0,0 +1,174 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float2 columns[3]; } spvMat3x2; + +struct S1 +{ + float4 a; + spvMat3x2 b; + int4 c; +}; + +typedef struct S1 S1; + +struct block +{ + uint passed; +}; + +typedef struct block block; + +bool compare_float( float* a_5, float* b_5) +{ + return fabs((*a_5) - (*b_5)) < 0.0500000007450580596923828125f; +} + +bool compare_vec4( float4* a_1_1, float4* b_1_1) +{ + float param_24 = (*a_1_1).x; + float param_1_1 = (*b_1_1).x; + bool _78 = compare_float(¶m_24, ¶m_1_1); + bool _88; + if (_78) + { + float param_2_1 = (*a_1_1).y; + float param_3_1 = (*b_1_1).y; + _88 = compare_float(¶m_2_1, ¶m_3_1); + } + else + { + _88 = _78; + } + bool _99; + if (_88) + { + float param_4_1 = (*a_1_1).z; + float param_5_1 = (*b_1_1).z; + _99 = compare_float(¶m_4_1, ¶m_5_1); + } + else + { + _99 = _88; + } + bool _110; + if (_99) + { + float param_6_1 = (*a_1_1).w; + float param_7_1 = (*b_1_1).w; + _110 = compare_float(¶m_6_1, ¶m_7_1); + } + else + { + _110 = _99; + } + return _110; +} + +bool compare_vec2( float2* a_2_1, float2* b_2_1) +{ + float param_8_1 = (*a_2_1).x; + float param_9_1 = (*b_2_1).x; + bool _58 = compare_float(¶m_8_1, ¶m_9_1); + bool _69; + if (_58) + { + float param_10_1 = (*a_2_1).y; + float param_11_1 = (*b_2_1).y; + _69 = compare_float(¶m_10_1, ¶m_11_1); + } + else + { + _69 = _58; + } + return _69; +} + +bool compare_mat3x2( spvMat3x2* a_3_1, spvMat3x2* b_3_1) +{ + float2 param_12_1 = (*a_3_1).columns[0]; + float2 param_13_1 = (*b_3_1).columns[0]; + bool _121 = compare_vec2(¶m_12_1, ¶m_13_1); + bool _132; + if (_121) + { + float2 param_14_1 = (*a_3_1).columns[1]; + float2 param_15_1 = (*b_3_1).columns[1]; + _132 = compare_vec2(¶m_14_1, ¶m_15_1); + } + else + { + _132 = _121; + } + bool _143; + if (_132) + { + float2 param_16_1 = (*a_3_1).columns[2]; + float2 param_17_1 = (*b_3_1).columns[2]; + _143 = compare_vec2(¶m_16_1, ¶m_17_1); + } + else + { + _143 = _132; + } + return _143; +} + +bool compare_bvec4( int4* a_4_1, int4* b_4_1) +{ + return all((*a_4_1) == (*b_4_1)); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _212) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local S1 s1; + s1.a = (float4)(1.0f, -5.0f, -9.0f, -5.0f); + s1.b = (spvMat3x2){ { (float2)(1.0f, -7.0f), (float2)(1.0f, 2.0f), (float2)(8.0f, 7.0f) } }; + s1.c = (int4)(false, true, false, false); + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + bool allOk_1 = true; + bool _188; + if (allOk_1) + { + float4 param_18_1 = (float4)(1.0f, -5.0f, -9.0f, -5.0f); + float4 param_19_1 = s1.a; + _188 = compare_vec4(¶m_18_1, ¶m_19_1); + } + else + { + _188 = allOk_1; + } + allOk_1 = _188; + bool _197; + if (allOk_1) + { + spvMat3x2 param_20_1 = (spvMat3x2){ { (float2)(1.0f, -7.0f), (float2)(1.0f, 2.0f), (float2)(8.0f, 7.0f) } }; + spvMat3x2 param_21_1 = s1.b; + _197 = compare_mat3x2(¶m_20_1, ¶m_21_1); + } + else + { + _197 = allOk_1; + } + allOk_1 = _197; + bool _206; + if (allOk_1) + { + int4 param_22_1 = (int4)(false, true, false, false); + int4 param_23_1 = s1.c; + _206 = compare_bvec4(¶m_22_1, ¶m_23_1); + } + else + { + _206 = allOk_1; + } + allOk_1 = _206; + if (allOk_1) + { + _212[0] += as_uint(1); + } +} + diff --git a/reference/shaders-opencl/comp/shared-matrix-nested-struct-array.comp b/reference/shaders-opencl/comp/shared-matrix-nested-struct-array.comp new file mode 100644 index 000000000..9b2d8f159 --- /dev/null +++ b/reference/shaders-opencl/comp/shared-matrix-nested-struct-array.comp @@ -0,0 +1,401 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float2 columns[2]; } spvMat2; +typedef struct { float3 columns[2]; } spvMat2x3; +typedef struct { float2 columns[3]; } spvMat3x2; +typedef struct { float3 columns[4]; } spvMat4x3; + +struct sA +{ + spvMat2x3 mA; +}; + +typedef struct sA sA; + +struct sB +{ + spvMat2 mA; + spvMat3x2 mB; + uint3 mC; +}; + +typedef struct sB sB; + +struct sC +{ + sA mA; + sB mB; +}; + +typedef struct sC sC; + +struct sD +{ + sC mA; +}; + +typedef struct sD sD; + +struct sE +{ + spvMat3x2 mA; + spvMat4x3 mB; +}; + +typedef struct sE sE; + +struct sF +{ + sE mA; +}; + +typedef struct sF sF; + +struct sG +{ + sF mA; +}; + +typedef struct sG sG; + +struct sH +{ + int3 mA[2]; +}; + +typedef struct sH sH; + +struct S1 +{ + sD a; + sG b; + sH c[2]; +}; + +typedef struct S1 S1; + +struct block +{ + uint passed; +}; + +typedef struct block block; + +bool compare_float( float* a_9, float* b_9) +{ + return fabs((*a_9) - (*b_9)) < 0.0500000007450580596923828125f; +} + +bool compare_vec3( float3* a_1_1, float3* b_1_1) +{ + float param_52 = (*a_1_1).x; + float param_1_1 = (*b_1_1).x; + bool _106 = compare_float(¶m_52, ¶m_1_1); + bool _116; + if (_106) + { + float param_2_1 = (*a_1_1).y; + float param_3_1 = (*b_1_1).y; + _116 = compare_float(¶m_2_1, ¶m_3_1); + } + else + { + _116 = _106; + } + bool _127; + if (_116) + { + float param_4_1 = (*a_1_1).z; + float param_5_1 = (*b_1_1).z; + _127 = compare_float(¶m_4_1, ¶m_5_1); + } + else + { + _127 = _116; + } + return _127; +} + +bool compare_mat2x3( spvMat2x3* a_2_1, spvMat2x3* b_2_1) +{ + float3 param_6_1 = (*a_2_1).columns[0]; + float3 param_7_1 = (*b_2_1).columns[0]; + bool _158 = compare_vec3(¶m_6_1, ¶m_7_1); + bool _168; + if (_158) + { + float3 param_8_1 = (*a_2_1).columns[1]; + float3 param_9_1 = (*b_2_1).columns[1]; + _168 = compare_vec3(¶m_8_1, ¶m_9_1); + } + else + { + _168 = _158; + } + return _168; +} + +bool compare_vec2( float2* a_3_1, float2* b_3_1) +{ + float param_10_1 = (*a_3_1).x; + float param_11_1 = (*b_3_1).x; + bool _86 = compare_float(¶m_10_1, ¶m_11_1); + bool _97; + if (_86) + { + float param_12_1 = (*a_3_1).y; + float param_13_1 = (*b_3_1).y; + _97 = compare_float(¶m_12_1, ¶m_13_1); + } + else + { + _97 = _86; + } + return _97; +} + +bool compare_mat2( spvMat2* a_4_1, spvMat2* b_4_1) +{ + float2 param_14_1 = (*a_4_1).columns[0]; + float2 param_15_1 = (*b_4_1).columns[0]; + bool _138 = compare_vec2(¶m_14_1, ¶m_15_1); + bool _149; + if (_138) + { + float2 param_16_1 = (*a_4_1).columns[1]; + float2 param_17_1 = (*b_4_1).columns[1]; + _149 = compare_vec2(¶m_16_1, ¶m_17_1); + } + else + { + _149 = _138; + } + return _149; +} + +bool compare_mat3x2( spvMat3x2* a_5_1, spvMat3x2* b_5_1) +{ + float2 param_18_1 = (*a_5_1).columns[0]; + float2 param_19_1 = (*b_5_1).columns[0]; + bool _177 = compare_vec2(¶m_18_1, ¶m_19_1); + bool _187; + if (_177) + { + float2 param_20_1 = (*a_5_1).columns[1]; + float2 param_21_1 = (*b_5_1).columns[1]; + _187 = compare_vec2(¶m_20_1, ¶m_21_1); + } + else + { + _187 = _177; + } + bool _198; + if (_187) + { + float2 param_22_1 = (*a_5_1).columns[2]; + float2 param_23_1 = (*b_5_1).columns[2]; + _198 = compare_vec2(¶m_22_1, ¶m_23_1); + } + else + { + _198 = _187; + } + return _198; +} + +bool compare_uvec3( uint3* a_6_1, uint3* b_6_1) +{ + return all((*a_6_1) == (*b_6_1)); +} + +bool compare_mat4x3( spvMat4x3* a_7_1, spvMat4x3* b_7_1) +{ + float3 param_24_1 = (*a_7_1).columns[0]; + float3 param_25_1 = (*b_7_1).columns[0]; + bool _207 = compare_vec3(¶m_24_1, ¶m_25_1); + bool _217; + if (_207) + { + float3 param_26_1 = (*a_7_1).columns[1]; + float3 param_27_1 = (*b_7_1).columns[1]; + _217 = compare_vec3(¶m_26_1, ¶m_27_1); + } + else + { + _217 = _207; + } + bool _227; + if (_217) + { + float3 param_28_1 = (*a_7_1).columns[2]; + float3 param_29_1 = (*b_7_1).columns[2]; + _227 = compare_vec3(¶m_28_1, ¶m_29_1); + } + else + { + _227 = _217; + } + bool _238; + if (_227) + { + float3 param_30_1 = (*a_7_1).columns[3]; + float3 param_31_1 = (*b_7_1).columns[3]; + _238 = compare_vec3(¶m_30_1, ¶m_31_1); + } + else + { + _238 = _227; + } + return _238; +} + +bool compare_bvec3( int3* a_8_1, int3* b_8_1) +{ + return all((*a_8_1) == (*b_8_1)); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _424) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local S1 s1; + s1.a.mA.mA.mA = (spvMat2x3){ { (float3)(6.0f, 8.0f, 8.0f), (float3)(0.0f, -4.0f, -5.0f) } }; + s1.a.mA.mB.mA = (spvMat2){ { (float2)(9.0f, -4.0f), (float2)(-6.0f, -1.0f) } }; + s1.a.mA.mB.mB = (spvMat3x2){ { (float2)(-1.0f, -2.0f), (float2)(1.0f, 6.0f), (float2)(5.0f, 7.0f) } }; + s1.a.mA.mB.mC = (uint3)(3u, 1u, 5u); + s1.b.mA.mA.mA = (spvMat3x2){ { (float2)(8.0f, 3.0f), (float2)(0.0f, 2.0f), (float2)(1.0f, 8.0f) } }; + s1.b.mA.mA.mB = (spvMat4x3){ { (float3)(0.0f, 9.0f, -1.0f), (float3)(-1.0f, -7.0f, 7.0f), (float3)(-4.0f, -3.0f, 1.0f), (float3)(-4.0f, -9.0f, 1.0f) } }; + s1.c[0].mA[0] = (int3)(true, false, false); + s1.c[0].mA[1] = (int3)(true, false, false); + s1.c[1].mA[0] = (int3)(false); + s1.c[1].mA[1] = (int3)(false); + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + bool allOk_1 = true; + bool _337; + if (allOk_1) + { + spvMat2x3 param_32_1 = (spvMat2x3){ { (float3)(6.0f, 8.0f, 8.0f), (float3)(0.0f, -4.0f, -5.0f) } }; + spvMat2x3 param_33_1 = s1.a.mA.mA.mA; + _337 = compare_mat2x3(¶m_32_1, ¶m_33_1); + } + else + { + _337 = allOk_1; + } + allOk_1 = _337; + bool _346; + if (allOk_1) + { + spvMat2 param_34_1 = (spvMat2){ { (float2)(9.0f, -4.0f), (float2)(-6.0f, -1.0f) } }; + spvMat2 param_35_1 = s1.a.mA.mB.mA; + _346 = compare_mat2(¶m_34_1, ¶m_35_1); + } + else + { + _346 = allOk_1; + } + allOk_1 = _346; + bool _355; + if (allOk_1) + { + spvMat3x2 param_36_1 = (spvMat3x2){ { (float2)(-1.0f, -2.0f), (float2)(1.0f, 6.0f), (float2)(5.0f, 7.0f) } }; + spvMat3x2 param_37_1 = s1.a.mA.mB.mB; + _355 = compare_mat3x2(¶m_36_1, ¶m_37_1); + } + else + { + _355 = allOk_1; + } + allOk_1 = _355; + bool _364; + if (allOk_1) + { + uint3 param_38_1 = (uint3)(3u, 1u, 5u); + uint3 param_39_1 = s1.a.mA.mB.mC; + _364 = compare_uvec3(¶m_38_1, ¶m_39_1); + } + else + { + _364 = allOk_1; + } + allOk_1 = _364; + bool _373; + if (allOk_1) + { + spvMat3x2 param_40_1 = (spvMat3x2){ { (float2)(8.0f, 3.0f), (float2)(0.0f, 2.0f), (float2)(1.0f, 8.0f) } }; + spvMat3x2 param_41_1 = s1.b.mA.mA.mA; + _373 = compare_mat3x2(¶m_40_1, ¶m_41_1); + } + else + { + _373 = allOk_1; + } + allOk_1 = _373; + bool _382; + if (allOk_1) + { + spvMat4x3 param_42_1 = (spvMat4x3){ { (float3)(0.0f, 9.0f, -1.0f), (float3)(-1.0f, -7.0f, 7.0f), (float3)(-4.0f, -3.0f, 1.0f), (float3)(-4.0f, -9.0f, 1.0f) } }; + spvMat4x3 param_43_1 = s1.b.mA.mA.mB; + _382 = compare_mat4x3(¶m_42_1, ¶m_43_1); + } + else + { + _382 = allOk_1; + } + allOk_1 = _382; + bool _391; + if (allOk_1) + { + int3 param_44_1 = (int3)(true, false, false); + int3 param_45_1 = s1.c[0].mA[0]; + _391 = compare_bvec3(¶m_44_1, ¶m_45_1); + } + else + { + _391 = allOk_1; + } + allOk_1 = _391; + bool _400; + if (allOk_1) + { + int3 param_46_1 = (int3)(true, false, false); + int3 param_47_1 = s1.c[0].mA[1]; + _400 = compare_bvec3(¶m_46_1, ¶m_47_1); + } + else + { + _400 = allOk_1; + } + allOk_1 = _400; + bool _409; + if (allOk_1) + { + int3 param_48_1 = (int3)(false); + int3 param_49_1 = s1.c[1].mA[0]; + _409 = compare_bvec3(¶m_48_1, ¶m_49_1); + } + else + { + _409 = allOk_1; + } + allOk_1 = _409; + bool _418; + if (allOk_1) + { + int3 param_50_1 = (int3)(false); + int3 param_51_1 = s1.c[1].mA[1]; + _418 = compare_bvec3(¶m_50_1, ¶m_51_1); + } + else + { + _418 = allOk_1; + } + allOk_1 = _418; + if (allOk_1) + { + _424[0] += as_uint(1); + } +} + diff --git a/reference/shaders-opencl/comp/shared-matrix-nested-struct.comp b/reference/shaders-opencl/comp/shared-matrix-nested-struct.comp new file mode 100644 index 000000000..b9f2423e4 --- /dev/null +++ b/reference/shaders-opencl/comp/shared-matrix-nested-struct.comp @@ -0,0 +1,598 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float2 columns[2]; } spvMat2; +typedef struct { float2 columns[3]; } spvMat3x2; +typedef struct { float4 columns[4]; } spvMat4; + +struct S1 +{ + uint a; + float4 b; +}; + +typedef struct S1 S1; + +struct sA +{ + spvMat4 mA; + int3 mB; + int4 mC; +}; + +typedef struct sA sA; + +struct sB +{ + int2 mA; +}; + +typedef struct sB sB; + +struct sC +{ + float mA; + uint4 mB; + float mC; +}; + +typedef struct sC sC; + +struct sD +{ + sA mA; + sB mB; + sC mC; +}; + +typedef struct sD sD; + +struct sE +{ + sD mA; +}; + +typedef struct sE sE; + +struct sF +{ + uint3 mA; + bool mB; +}; + +typedef struct sF sF; + +struct sG +{ + sF mA; + spvMat3x2 mB; +}; + +typedef struct sG sG; + +struct sH +{ + sG mA; + float2 mB; +}; + +typedef struct sH sH; + +struct sI +{ + spvMat2 mA; + int3 mB; + int4 mC; +}; + +typedef struct sI sI; + +struct sJ +{ + sI mA; + int3 mB; +}; + +typedef struct sJ sJ; + +struct sK +{ + int2 mA; + sJ mB; + int2 mC; +}; + +typedef struct sK sK; + +struct S2 +{ + sE a; + int3 b; + sH c; + sK d; +}; + +typedef struct S2 S2; + +struct block +{ + uint passed; +}; + +typedef struct block block; + +bool compare_uint( uint* a_15, uint* b_15) +{ + return (*a_15) == (*b_15); +} + +bool compare_float( float* a_1_1, float* b_1_1) +{ + return fabs((*a_1_1) - (*b_1_1)) < 0.0500000007450580596923828125f; +} + +bool compare_vec4( float4* a_2_1, float4* b_2_1) +{ + float param_70 = (*a_2_1).x; + float param_1_1 = (*b_2_1).x; + bool _147 = compare_float(¶m_70, ¶m_1_1); + bool _157; + if (_147) + { + float param_2_1 = (*a_2_1).y; + float param_3_1 = (*b_2_1).y; + _157 = compare_float(¶m_2_1, ¶m_3_1); + } + else + { + _157 = _147; + } + bool _168; + if (_157) + { + float param_4_1 = (*a_2_1).z; + float param_5_1 = (*b_2_1).z; + _168 = compare_float(¶m_4_1, ¶m_5_1); + } + else + { + _168 = _157; + } + bool _179; + if (_168) + { + float param_6_1 = (*a_2_1).w; + float param_7_1 = (*b_2_1).w; + _179 = compare_float(¶m_6_1, ¶m_7_1); + } + else + { + _179 = _168; + } + return _179; +} + +bool compare_mat4( spvMat4* a_3_1, spvMat4* b_3_1) +{ + float4 param_8_1 = (*a_3_1).columns[0]; + float4 param_9_1 = (*b_3_1).columns[0]; + bool _239 = compare_vec4(¶m_8_1, ¶m_9_1); + bool _249; + if (_239) + { + float4 param_10_1 = (*a_3_1).columns[1]; + float4 param_11_1 = (*b_3_1).columns[1]; + _249 = compare_vec4(¶m_10_1, ¶m_11_1); + } + else + { + _249 = _239; + } + bool _259; + if (_249) + { + float4 param_12_1 = (*a_3_1).columns[2]; + float4 param_13_1 = (*b_3_1).columns[2]; + _259 = compare_vec4(¶m_12_1, ¶m_13_1); + } + else + { + _259 = _249; + } + bool _270; + if (_259) + { + float4 param_14_1 = (*a_3_1).columns[3]; + float4 param_15_1 = (*b_3_1).columns[3]; + _270 = compare_vec4(¶m_14_1, ¶m_15_1); + } + else + { + _270 = _259; + } + return _270; +} + +bool compare_bvec3( int3* a_4_1, int3* b_4_1) +{ + return all((*a_4_1) == (*b_4_1)); +} + +bool compare_bvec4( int4* a_5_1, int4* b_5_1) +{ + return all((*a_5_1) == (*b_5_1)); +} + +bool compare_bvec2( int2* a_6_1, int2* b_6_1) +{ + return all((*a_6_1) == (*b_6_1)); +} + +bool compare_uvec4( uint4* a_7_1, uint4* b_7_1) +{ + return all((*a_7_1) == (*b_7_1)); +} + +bool compare_ivec3( int3* a_8_1, int3* b_8_1) +{ + return all((*a_8_1) == (*b_8_1)); +} + +bool compare_uvec3( uint3* a_9_1, uint3* b_9_1) +{ + return all((*a_9_1) == (*b_9_1)); +} + +bool compare_bool( bool* a_10_1, bool* b_10_1) +{ + return (*a_10_1) == (*b_10_1); +} + +bool compare_vec2( float2* a_11_1, float2* b_11_1) +{ + float param_16_1 = (*a_11_1).x; + float param_17_1 = (*b_11_1).x; + bool _127 = compare_float(¶m_16_1, ¶m_17_1); + bool _138; + if (_127) + { + float param_18_1 = (*a_11_1).y; + float param_19_1 = (*b_11_1).y; + _138 = compare_float(¶m_18_1, ¶m_19_1); + } + else + { + _138 = _127; + } + return _138; +} + +bool compare_mat3x2( spvMat3x2* a_12_1, spvMat3x2* b_12_1) +{ + float2 param_20_1 = (*a_12_1).columns[0]; + float2 param_21_1 = (*b_12_1).columns[0]; + bool _209 = compare_vec2(¶m_20_1, ¶m_21_1); + bool _219; + if (_209) + { + float2 param_22_1 = (*a_12_1).columns[1]; + float2 param_23_1 = (*b_12_1).columns[1]; + _219 = compare_vec2(¶m_22_1, ¶m_23_1); + } + else + { + _219 = _209; + } + bool _230; + if (_219) + { + float2 param_24_1 = (*a_12_1).columns[2]; + float2 param_25_1 = (*b_12_1).columns[2]; + _230 = compare_vec2(¶m_24_1, ¶m_25_1); + } + else + { + _230 = _219; + } + return _230; +} + +bool compare_mat2( spvMat2* a_13_1, spvMat2* b_13_1) +{ + float2 param_26_1 = (*a_13_1).columns[0]; + float2 param_27_1 = (*b_13_1).columns[0]; + bool _189 = compare_vec2(¶m_26_1, ¶m_27_1); + bool _200; + if (_189) + { + float2 param_28_1 = (*a_13_1).columns[1]; + float2 param_29_1 = (*b_13_1).columns[1]; + _200 = compare_vec2(¶m_28_1, ¶m_29_1); + } + else + { + _200 = _189; + } + return _200; +} + +bool compare_ivec2( int2* a_14_1, int2* b_14_1) +{ + return all((*a_14_1) == (*b_14_1)); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _612) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local S1 s1; + __local S2 s2; + s1.a = 0u; + s1.b = (float4)(8.0f, 8.0f, 0.0f, -4.0f); + s2.a.mA.mA.mA = (spvMat4){ { (float4)(-5.0f, 9.0f, -4.0f, -6.0f), (float4)(-1.0f, -1.0f, -2.0f, 1.0f), (float4)(6.0f, 5.0f, 7.0f, -2.0f), (float4)(-4.0f, -9.0f, 8.0f, 3.0f) } }; + s2.a.mA.mA.mB = (int3)(true, false, false); + s2.a.mA.mA.mC = (int4)(true, true, true, false); + s2.a.mA.mB.mA = (int2)(true); + s2.a.mA.mC.mA = 7.0f; + s2.a.mA.mC.mB = (uint4)(8u, 6u, 2u, 0u); + s2.a.mA.mC.mC = -9.0f; + s2.b = (int3)(1, -4, 0); + s2.c.mA.mA.mA = (uint3)(4u, 9u, 1u); + s2.c.mA.mA.mB = false; + s2.c.mA.mB = (spvMat3x2){ { (float2)(3.0f, -5.0f), (float2)(-1.0f, -5.0f), (float2)(-1.0f, -9.0f) } }; + s2.c.mB = (float2)(-6.0f, -9.0f); + s2.d.mA = (int2)(true, false); + s2.d.mB.mA.mA = (spvMat2){ { (float2)(-2.0f, 3.0f), (float2)(7.0f, 2.0f) } }; + s2.d.mB.mA.mB = (int3)(false); + s2.d.mB.mA.mC = (int4)(false, false, false, true); + s2.d.mB.mB = (int3)(true, false, false); + s2.d.mC = (int2)(-9, 0); + barrier(CLK_LOCAL_MEM_FENCE); + mem_fence(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + bool allOk_1 = true; + bool _435; + if (allOk_1) + { + uint param_30_1 = 0u; + uint param_31_1 = s1.a; + _435 = compare_uint(¶m_30_1, ¶m_31_1); + } + else + { + _435 = allOk_1; + } + allOk_1 = _435; + bool _444; + if (allOk_1) + { + float4 param_32_1 = (float4)(8.0f, 8.0f, 0.0f, -4.0f); + float4 param_33_1 = s1.b; + _444 = compare_vec4(¶m_32_1, ¶m_33_1); + } + else + { + _444 = allOk_1; + } + allOk_1 = _444; + bool _453; + if (allOk_1) + { + spvMat4 param_34_1 = (spvMat4){ { (float4)(-5.0f, 9.0f, -4.0f, -6.0f), (float4)(-1.0f, -1.0f, -2.0f, 1.0f), (float4)(6.0f, 5.0f, 7.0f, -2.0f), (float4)(-4.0f, -9.0f, 8.0f, 3.0f) } }; + spvMat4 param_35_1 = s2.a.mA.mA.mA; + _453 = compare_mat4(¶m_34_1, ¶m_35_1); + } + else + { + _453 = allOk_1; + } + allOk_1 = _453; + bool _462; + if (allOk_1) + { + int3 param_36_1 = (int3)(true, false, false); + int3 param_37_1 = s2.a.mA.mA.mB; + _462 = compare_bvec3(¶m_36_1, ¶m_37_1); + } + else + { + _462 = allOk_1; + } + allOk_1 = _462; + bool _471; + if (allOk_1) + { + int4 param_38_1 = (int4)(true, true, true, false); + int4 param_39_1 = s2.a.mA.mA.mC; + _471 = compare_bvec4(¶m_38_1, ¶m_39_1); + } + else + { + _471 = allOk_1; + } + allOk_1 = _471; + bool _480; + if (allOk_1) + { + int2 param_40_1 = (int2)(true); + int2 param_41_1 = s2.a.mA.mB.mA; + _480 = compare_bvec2(¶m_40_1, ¶m_41_1); + } + else + { + _480 = allOk_1; + } + allOk_1 = _480; + bool _489; + if (allOk_1) + { + float param_42_1 = 7.0f; + float param_43_1 = s2.a.mA.mC.mA; + _489 = compare_float(¶m_42_1, ¶m_43_1); + } + else + { + _489 = allOk_1; + } + allOk_1 = _489; + bool _498; + if (allOk_1) + { + uint4 param_44_1 = (uint4)(8u, 6u, 2u, 0u); + uint4 param_45_1 = s2.a.mA.mC.mB; + _498 = compare_uvec4(¶m_44_1, ¶m_45_1); + } + else + { + _498 = allOk_1; + } + allOk_1 = _498; + bool _507; + if (allOk_1) + { + float param_46_1 = -9.0f; + float param_47_1 = s2.a.mA.mC.mC; + _507 = compare_float(¶m_46_1, ¶m_47_1); + } + else + { + _507 = allOk_1; + } + allOk_1 = _507; + bool _516; + if (allOk_1) + { + int3 param_48_1 = (int3)(1, -4, 0); + int3 param_49_1 = s2.b; + _516 = compare_ivec3(¶m_48_1, ¶m_49_1); + } + else + { + _516 = allOk_1; + } + allOk_1 = _516; + bool _525; + if (allOk_1) + { + uint3 param_50_1 = (uint3)(4u, 9u, 1u); + uint3 param_51_1 = s2.c.mA.mA.mA; + _525 = compare_uvec3(¶m_50_1, ¶m_51_1); + } + else + { + _525 = allOk_1; + } + allOk_1 = _525; + bool _534; + if (allOk_1) + { + bool param_52_1 = false; + bool param_53_1 = s2.c.mA.mA.mB; + _534 = compare_bool(¶m_52_1, ¶m_53_1); + } + else + { + _534 = allOk_1; + } + allOk_1 = _534; + bool _543; + if (allOk_1) + { + spvMat3x2 param_54_1 = (spvMat3x2){ { (float2)(3.0f, -5.0f), (float2)(-1.0f, -5.0f), (float2)(-1.0f, -9.0f) } }; + spvMat3x2 param_55_1 = s2.c.mA.mB; + _543 = compare_mat3x2(¶m_54_1, ¶m_55_1); + } + else + { + _543 = allOk_1; + } + allOk_1 = _543; + bool _552; + if (allOk_1) + { + float2 param_56_1 = (float2)(-6.0f, -9.0f); + float2 param_57_1 = s2.c.mB; + _552 = compare_vec2(¶m_56_1, ¶m_57_1); + } + else + { + _552 = allOk_1; + } + allOk_1 = _552; + bool _561; + if (allOk_1) + { + int2 param_58_1 = (int2)(true, false); + int2 param_59_1 = s2.d.mA; + _561 = compare_bvec2(¶m_58_1, ¶m_59_1); + } + else + { + _561 = allOk_1; + } + allOk_1 = _561; + bool _570; + if (allOk_1) + { + spvMat2 param_60_1 = (spvMat2){ { (float2)(-2.0f, 3.0f), (float2)(7.0f, 2.0f) } }; + spvMat2 param_61_1 = s2.d.mB.mA.mA; + _570 = compare_mat2(¶m_60_1, ¶m_61_1); + } + else + { + _570 = allOk_1; + } + allOk_1 = _570; + bool _579; + if (allOk_1) + { + int3 param_62_1 = (int3)(false); + int3 param_63_1 = s2.d.mB.mA.mB; + _579 = compare_bvec3(¶m_62_1, ¶m_63_1); + } + else + { + _579 = allOk_1; + } + allOk_1 = _579; + bool _588; + if (allOk_1) + { + int4 param_64_1 = (int4)(false, false, false, true); + int4 param_65_1 = s2.d.mB.mA.mC; + _588 = compare_bvec4(¶m_64_1, ¶m_65_1); + } + else + { + _588 = allOk_1; + } + allOk_1 = _588; + bool _597; + if (allOk_1) + { + int3 param_66_1 = (int3)(true, false, false); + int3 param_67_1 = s2.d.mB.mB; + _597 = compare_bvec3(¶m_66_1, ¶m_67_1); + } + else + { + _597 = allOk_1; + } + allOk_1 = _597; + bool _606; + if (allOk_1) + { + int2 param_68_1 = (int2)(-9, 0); + int2 param_69_1 = s2.d.mC; + _606 = compare_ivec2(¶m_68_1, ¶m_69_1); + } + else + { + _606 = allOk_1; + } + allOk_1 = _606; + if (allOk_1) + { + _612[0] += as_uint(1); + } +} + diff --git a/reference/shaders-opencl/comp/shared-std450.double.comp b/reference/shaders-opencl/comp/shared-std450.fp64.comp similarity index 100% rename from reference/shaders-opencl/comp/shared-std450.double.comp rename to reference/shaders-opencl/comp/shared-std450.fp64.comp diff --git a/reference/shaders-opencl/comp/struct-layout.comp b/reference/shaders-opencl/comp/struct-layout.comp index eb416ee27..375cfed37 100644 --- a/reference/shaders-opencl/comp/struct-layout.comp +++ b/reference/shaders-opencl/comp/struct-layout.comp @@ -1,9 +1,11 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) +typedef struct { float4 columns[4]; } spvMat4; + struct Foo { - float4 m; + spvMat4 m; }; typedef struct Foo Foo; @@ -22,11 +24,26 @@ struct SSBO typedef struct SSBO SSBO; +static float4 spvMulMat4Vec4(spvMat4 m, float4 v) +{ + return m.columns[0] * v.x + m.columns[1] * v.y + m.columns[2] * v.z + m.columns[3] * v.w; +} + +static spvMat4 spvMulMat4Mat4(spvMat4 a, spvMat4 b) +{ + spvMat4 r; + r.columns[0] = spvMulMat4Vec4(a, b.columns[0]); + r.columns[1] = spvMulMat4Vec4(a, b.columns[1]); + r.columns[2] = spvMulMat4Vec4(a, b.columns[2]); + r.columns[3] = spvMulMat4Vec4(a, b.columns[3]); + return r; +} + __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global Foo* _23, __global const Foo* _30) { uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); - uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; - _23[ident].m = _30[ident].m * _30[ident].m; + uint ident_1 = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + _23[ident_1].m = spvMulMat4Mat4(_30[ident_1].m, _30[ident_1].m); } diff --git a/reference/shaders-opencl/comp/struct-packing.comp b/reference/shaders-opencl/comp/struct-packing.comp new file mode 100644 index 000000000..6fd919f9f --- /dev/null +++ b/reference/shaders-opencl/comp/struct-packing.comp @@ -0,0 +1,125 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float2 columns[2]; } spvMat2; +typedef struct { float3 columns[2]; } spvMat2x3; +typedef struct { float2 columns[3]; } spvMat3x2; + +struct S0 +{ + float2 a[1]; + float b; +}; + +typedef struct S0 S0; + +struct S1 +{ + float3 a; + float b; +}; + +typedef struct S1 S1; + +struct S2 +{ + float3 a[1]; + float b; +}; + +typedef struct S2 S2; + +struct S3 +{ + float2 a; + float b; +}; + +typedef struct S3 S3; + +struct S4 +{ + float2 c; +}; + +typedef struct S4 S4; + +struct Content +{ + S0 m0s[1]; + S1 m1s[1]; + S2 m2s[1]; + S0 m0; + S1 m1; + S2 m2; + S3 m3; + float m4; + S4 m3s[8]; +}; + +typedef struct Content Content; + +struct SSBO1 +{ + Content content; + Content content1[2]; + Content content2; + spvMat2 m0; + spvMat2 m1; + spvMat2x3 m2[4]; + spvMat3x2 m3; + spvMat2 m4; + spvMat2 m5[9]; + spvMat3x2 m6[4][2]; + spvMat2x3 m7; + float array[1]; +}; + +typedef struct SSBO1 SSBO1; + +struct SSBO0 +{ + Content content; + Content content1[2]; + Content content2; + float array[1]; +}; + +typedef struct SSBO0 SSBO0; + +static float3 spvMulVec2Mat3x2(float2 v, spvMat3x2 m) +{ + return (float3)(dot(v, m.columns[0]), dot(v, m.columns[1]), dot(v, m.columns[2])); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO1* ssbo_430, __global SSBO0* ssbo_140) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + Content _60 = ssbo_140->content; + ssbo_430->content.m0s[0].a[0] = _60.m0s[0].a[0]; + ssbo_430->content.m0s[0].b = _60.m0s[0].b; + ssbo_430->content.m1s[0].a = _60.m1s[0].a; + ssbo_430->content.m1s[0].b = _60.m1s[0].b; + ssbo_430->content.m2s[0].a[0] = _60.m2s[0].a[0]; + ssbo_430->content.m2s[0].b = _60.m2s[0].b; + ssbo_430->content.m0.a[0] = _60.m0.a[0]; + ssbo_430->content.m0.b = _60.m0.b; + ssbo_430->content.m1.a = _60.m1.a; + ssbo_430->content.m1.b = _60.m1.b; + ssbo_430->content.m2.a[0] = _60.m2.a[0]; + ssbo_430->content.m2.b = _60.m2.b; + ssbo_430->content.m3.a = _60.m3.a; + ssbo_430->content.m3.b = _60.m3.b; + ssbo_430->content.m4 = _60.m4; + ssbo_430->content.m3s[0].c = _60.m3s[0].c; + ssbo_430->content.m3s[1].c = _60.m3s[1].c; + ssbo_430->content.m3s[2].c = _60.m3s[2].c; + ssbo_430->content.m3s[3].c = _60.m3s[3].c; + ssbo_430->content.m3s[4].c = _60.m3s[4].c; + ssbo_430->content.m3s[5].c = _60.m3s[5].c; + ssbo_430->content.m3s[6].c = _60.m3s[6].c; + ssbo_430->content.m3s[7].c = _60.m3s[7].c; + ssbo_430->content.m1.a = spvMulVec2Mat3x2(ssbo_430->content.m3.a, ssbo_430->m6[1][1]); +} + diff --git a/reference/shaders-opencl/comp/struct-packing.invalid.comp b/reference/shaders-opencl/comp/struct-packing.invalid.comp deleted file mode 100644 index e69de29bb..000000000 diff --git a/reference/shaders-opencl/comp/torture-loop.comp b/reference/shaders-opencl/comp/torture-loop.comp index 45f32a55b..6a978b9d6 100644 --- a/reference/shaders-opencl/comp/torture-loop.comp +++ b/reference/shaders-opencl/comp/torture-loop.comp @@ -1,9 +1,11 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) +typedef struct { float4 columns[4]; } spvMat4; + struct SSBO { - float4 mvp; + spvMat4 mvp; float4 in_data[1]; }; @@ -16,6 +18,11 @@ struct SSBO2 typedef struct SSBO2 SSBO2; +static float4 spvMulMat4Vec4(spvMat4 m, float4 v) +{ + return m.columns[0] * v.x + m.columns[1] * v.y + m.columns[2] * v.z + m.columns[3] * v.w; +} + __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global const SSBO* _24, __global float4* _89) { @@ -43,7 +50,7 @@ __kernel void comp_main(__global const SSBO* _24, __global float4* _89) { for (uint j_1 = 0u; j_1 < 30u; j_1 += as_uint(1)) { - idat_1 = _24->mvp * idat_1; + idat_1 = spvMulMat4Vec4(_24->mvp, idat_1); } } do diff --git a/shaders-opencl/asm/comp/relaxed-block-layout.asm.comp b/shaders-opencl/asm/comp/relaxed-block-layout.fp16.asm.comp similarity index 100% rename from shaders-opencl/asm/comp/relaxed-block-layout.asm.comp rename to shaders-opencl/asm/comp/relaxed-block-layout.fp16.asm.comp diff --git a/shaders-opencl/comp/inverse.comp b/shaders-opencl/comp/inverse.comp new file mode 100644 index 000000000..03b06d646 --- /dev/null +++ b/shaders-opencl/comp/inverse.comp @@ -0,0 +1,23 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std430, binding = 0) writeonly buffer MatrixOut +{ + mat2 m2out; + mat3 m3out; + mat4 m4out; +}; + +layout(std430, binding = 1) readonly buffer MatrixIn +{ + mat2 m2in; + mat3 m3in; + mat4 m4in; +}; + +void main() +{ + m2out = inverse(m2in); + m3out = inverse(m3in); + m4out = inverse(m4in); +} diff --git a/shaders-opencl/comp/mat3-row-maj-read-write-const.comp b/shaders-opencl/comp/mat3-row-maj-read-write-const.comp new file mode 100644 index 000000000..068ad7972 --- /dev/null +++ b/shaders-opencl/comp/mat3-row-maj-read-write-const.comp @@ -0,0 +1,17 @@ +#version 450 +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + +layout(set = 0, binding = 1, std430) buffer model_t +{ + layout(row_major) mediump mat3 mtx_rm; +} model; + +void main() +{ + mat3 mtx_cm = model.mtx_rm; + mat3 mtx1 = mtx_cm * mat3(vec3(4.0, -3.0, 1.0), vec3(-7.0, 7.0, -7.0), vec3(-5.0, 6.0, -8.0)); + if (mtx1[0][0] != 0.0) + { + model.mtx_rm = mat3(vec3(-5.0, -3.0, -5.0), vec3(-2.0, 2.0, -5.0), vec3(6.0, 3.0, -8.0)); + } +} diff --git a/shaders-opencl/comp/mat3.comp b/shaders-opencl/comp/mat3.comp new file mode 100644 index 000000000..7c5bb1e4f --- /dev/null +++ b/shaders-opencl/comp/mat3.comp @@ -0,0 +1,14 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + mat3 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + out_data[ident] = mat3(vec3(10.0), vec3(20.0), vec3(40.0)); +} + diff --git a/shaders-opencl/comp/rmw-matrix.comp b/shaders-opencl/comp/rmw-matrix.comp new file mode 100644 index 000000000..c158ab4dd --- /dev/null +++ b/shaders-opencl/comp/rmw-matrix.comp @@ -0,0 +1,20 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + float a; + vec4 b; + mat4 c; + + float a1; + vec4 b1; + mat4 c1; +}; + +void main() +{ + a *= a1; + b *= b1; + c *= c1; +} diff --git a/shaders-opencl/comp/shared-matrix-array-of-array.comp b/shaders-opencl/comp/shared-matrix-array-of-array.comp new file mode 100644 index 000000000..3bbd4c0f0 --- /dev/null +++ b/shaders-opencl/comp/shared-matrix-array-of-array.comp @@ -0,0 +1,65 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, binding = 0) buffer block { highp uint passed; }; +struct S1 { + mediump mat4x3 a[2]; + lowp float b; + lowp vec2 c[3]; +}; +struct S2 { + highp ivec4 a; + bool b[3][1][3]; +}; + +bool compare_float (highp float a, highp float b) { return abs(a - b) < 0.05; } +bool compare_vec2 (highp vec2 a, highp vec2 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); } +bool compare_vec3 (highp vec3 a, highp vec3 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z); } +bool compare_mat4x3 (highp mat4x3 a, highp mat4x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2])&&compare_vec3(a[3], b[3]); } +bool compare_ivec4 (highp ivec4 a, highp ivec4 b) { return a == b; } +bool compare_bool (bool a, bool b) { return a == b; } + +shared S1 s1; +shared S2 s2; + +void main (void) { + s1.a[0] = mat4x3(0.0, 2.0, -8.0, 6.0, 7.0, 5.0, -6.0, 1.0, 9.0, -4.0, -3.0, 4.0); + s1.a[1] = mat4x3(4.0, 9.0, -9.0, -8.0, -9.0, 8.0, 0.0, 4.0, -4.0, 7.0, 2.0, -1.0); + s1.b = 7.0; + s1.c[0] = vec2(-5.0, -4.0); + s1.c[1] = vec2(3.0, -5.0); + s1.c[2] = vec2(-3.0, -1.0); + s2.a = ivec4(1, 0, -3, 1); + s2.b[0][0][0] = true; + s2.b[0][0][1] = false; + s2.b[0][0][2] = false; + s2.b[1][0][0] = true; + s2.b[1][0][1] = false; + s2.b[1][0][2] = true; + s2.b[2][0][0] = false; + s2.b[2][0][1] = true; + s2.b[2][0][2] = true; + + barrier(); + memoryBarrier(); + bool allOk = true; + allOk = allOk && compare_mat4x3(mat4x3(0.0, 2.0, -8.0, 6.0, 7.0, 5.0, -6.0, 1.0, 9.0, -4.0, -3.0, 4.0), s1.a[0]); + allOk = allOk && compare_mat4x3(mat4x3(4.0, 9.0, -9.0, -8.0, -9.0, 8.0, 0.0, 4.0, -4.0, 7.0, 2.0, -1.0), s1.a[1]); + allOk = allOk && compare_float(7.0, s1.b); + allOk = allOk && compare_vec2(vec2(-5.0, -4.0), s1.c[0]); + allOk = allOk && compare_vec2(vec2(3.0, -5.0), s1.c[1]); + allOk = allOk && compare_vec2(vec2(-3.0, -1.0), s1.c[2]); + allOk = allOk && compare_ivec4(ivec4(1, 0, -3, 1), s2.a); + allOk = allOk && compare_bool(true, s2.b[0][0][0]); + allOk = allOk && compare_bool(false, s2.b[0][0][1]); + allOk = allOk && compare_bool(false, s2.b[0][0][2]); + allOk = allOk && compare_bool(true, s2.b[1][0][0]); + allOk = allOk && compare_bool(false, s2.b[1][0][1]); + allOk = allOk && compare_bool(true, s2.b[1][0][2]); + allOk = allOk && compare_bool(false, s2.b[2][0][0]); + allOk = allOk && compare_bool(true, s2.b[2][0][1]); + allOk = allOk && compare_bool(true, s2.b[2][0][2]); + if (allOk) + passed++; + +} diff --git a/shaders-opencl/comp/shared-matrix-cast.comp b/shaders-opencl/comp/shared-matrix-cast.comp new file mode 100644 index 000000000..7e46fed7a --- /dev/null +++ b/shaders-opencl/comp/shared-matrix-cast.comp @@ -0,0 +1,33 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, binding = 0) buffer block { highp uint passed; }; +struct S1 { + mediump vec4 a; + highp mat3x2 b; + bvec4 c; +}; + +bool compare_float (highp float a, highp float b) { return abs(a - b) < 0.05; } +bool compare_vec2 (highp vec2 a, highp vec2 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); } +bool compare_vec4 (highp vec4 a, highp vec4 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z)&&compare_float(a.w, b.w); } +bool compare_mat3x2 (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); } +bool compare_bvec4 (bvec4 a, bvec4 b) { return a == b; } + +shared S1 s1; + +void main (void) { + s1.a = vec4(1.0, -5.0, -9.0, -5.0); + s1.b = mat3x2(1.0, -7.0, 1.0, 2.0, 8.0, 7.0); + s1.c = bvec4(false, true, false, false); + + barrier(); + memoryBarrier(); + bool allOk = true; + allOk = allOk && compare_vec4(vec4(1.0, -5.0, -9.0, -5.0), s1.a); + allOk = allOk && compare_mat3x2(mat3x2(1.0, -7.0, 1.0, 2.0, 8.0, 7.0), s1.b); + allOk = allOk && compare_bvec4(bvec4(false, true, false, false), s1.c); + if (allOk) + passed++; + +} diff --git a/shaders-opencl/comp/shared-matrix-nested-struct-array.comp b/shaders-opencl/comp/shared-matrix-nested-struct-array.comp new file mode 100644 index 000000000..59ab24d84 --- /dev/null +++ b/shaders-opencl/comp/shared-matrix-nested-struct-array.comp @@ -0,0 +1,87 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, binding = 0) buffer block { highp uint passed; }; +struct sA +{ + mediump mat2x3 mA; +}; +struct sB +{ + mediump mat2 mA; + mediump mat3x2 mB; + highp uvec3 mC; +}; +struct sC +{ + sA mA; + sB mB; +}; +struct sD +{ + sC mA; +}; +struct sE +{ + lowp mat3x2 mA; + lowp mat4x3 mB; +}; +struct sF +{ + sE mA; +}; +struct sG +{ + sF mA; +}; +struct sH +{ + bvec3 mA[2]; +}; +struct S1 { + sD a; + sG b; + sH c[2]; +}; + +bool compare_float (highp float a, highp float b) { return abs(a - b) < 0.05; } +bool compare_vec2 (highp vec2 a, highp vec2 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); } +bool compare_vec3 (highp vec3 a, highp vec3 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z); } +bool compare_mat2 (highp mat2 a, highp mat2 b) { return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1]); } +bool compare_mat2x3 (highp mat2x3 a, highp mat2x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1]); } +bool compare_mat3x2 (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); } +bool compare_mat4x3 (highp mat4x3 a, highp mat4x3 b){ return compare_vec3(a[0], b[0])&&compare_vec3(a[1], b[1])&&compare_vec3(a[2], b[2])&&compare_vec3(a[3], b[3]); } +bool compare_uvec3 (highp uvec3 a, highp uvec3 b) { return a == b; } +bool compare_bvec3 (bvec3 a, bvec3 b) { return a == b; } + +shared S1 s1; + +void main (void) { + s1.a.mA.mA.mA = mat2x3(6.0, 8.0, 8.0, 0.0, -4.0, -5.0); + s1.a.mA.mB.mA = mat2(9.0, -4.0, -6.0, -1.0); + s1.a.mA.mB.mB = mat3x2(-1.0, -2.0, 1.0, 6.0, 5.0, 7.0); + s1.a.mA.mB.mC = uvec3(3u, 1u, 5u); + s1.b.mA.mA.mA = mat3x2(8.0, 3.0, 0.0, 2.0, 1.0, 8.0); + s1.b.mA.mA.mB = mat4x3(0.0, 9.0, -1.0, -1.0, -7.0, 7.0, -4.0, -3.0, 1.0, -4.0, -9.0, 1.0); + s1.c[0].mA[0] = bvec3(true, false, false); + s1.c[0].mA[1] = bvec3(true, false, false); + s1.c[1].mA[0] = bvec3(false, false, false); + s1.c[1].mA[1] = bvec3(false, false, false); + + barrier(); + memoryBarrier(); + bool allOk = true; + allOk = allOk && compare_mat2x3(mat2x3(6.0, 8.0, 8.0, 0.0, -4.0, -5.0), s1.a.mA.mA.mA); + allOk = allOk && compare_mat2(mat2(9.0, -4.0, -6.0, -1.0), s1.a.mA.mB.mA); + allOk = allOk && compare_mat3x2(mat3x2(-1.0, -2.0, 1.0, 6.0, 5.0, 7.0), s1.a.mA.mB.mB); + allOk = allOk && compare_uvec3(uvec3(3u, 1u, 5u), s1.a.mA.mB.mC); + allOk = allOk && compare_mat3x2(mat3x2(8.0, 3.0, 0.0, 2.0, 1.0, 8.0), s1.b.mA.mA.mA); + allOk = allOk && compare_mat4x3(mat4x3(0.0, 9.0, -1.0, -1.0, -7.0, 7.0, -4.0, -3.0, 1.0, -4.0, -9.0, 1.0), s1.b.mA.mA.mB); + allOk = allOk && compare_bvec3(bvec3(true, false, false), s1.c[0].mA[0]); + allOk = allOk && compare_bvec3(bvec3(true, false, false), s1.c[0].mA[1]); + allOk = allOk && compare_bvec3(bvec3(false, false, false), s1.c[1].mA[0]); + allOk = allOk && compare_bvec3(bvec3(false, false, false), s1.c[1].mA[1]); + if (allOk) + passed++; + +} diff --git a/shaders-opencl/comp/shared-matrix-nested-struct.comp b/shaders-opencl/comp/shared-matrix-nested-struct.comp new file mode 100644 index 000000000..c481f54a8 --- /dev/null +++ b/shaders-opencl/comp/shared-matrix-nested-struct.comp @@ -0,0 +1,141 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, binding = 0) buffer block { highp uint passed; }; +struct sA +{ + highp mat4 mA; + bvec3 mB; + bvec4 mC; +}; +struct sB +{ + bvec2 mA; +}; +struct sC +{ + highp float mA; + mediump uvec4 mB; + mediump float mC; +}; +struct sD +{ + sA mA; + sB mB; + sC mC; +}; +struct sE +{ + sD mA; +}; +struct sF +{ + lowp uvec3 mA; + bool mB; +}; +struct sG +{ + sF mA; + highp mat3x2 mB; +}; +struct sH +{ + sG mA; + mediump vec2 mB; +}; +struct sI +{ + mediump mat2 mA; + bvec3 mB; + bvec4 mC; +}; +struct sJ +{ + sI mA; + bvec3 mB; +}; +struct sK +{ + bvec2 mA; + sJ mB; + mediump ivec2 mC; +}; +struct S1 { + lowp uint a; + mediump vec4 b; +}; +struct S2 { + sE a; + highp ivec3 b; + sH c; + sK d; +}; + +bool compare_float (highp float a, highp float b) { return abs(a - b) < 0.05; } +bool compare_vec2 (highp vec2 a, highp vec2 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y); } +bool compare_vec4 (highp vec4 a, highp vec4 b) { return compare_float(a.x, b.x)&&compare_float(a.y, b.y)&&compare_float(a.z, b.z)&&compare_float(a.w, b.w); } +bool compare_mat2 (highp mat2 a, highp mat2 b) { return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1]); } +bool compare_mat3x2 (highp mat3x2 a, highp mat3x2 b){ return compare_vec2(a[0], b[0])&&compare_vec2(a[1], b[1])&&compare_vec2(a[2], b[2]); } +bool compare_mat4 (highp mat4 a, highp mat4 b) { return compare_vec4(a[0], b[0])&&compare_vec4(a[1], b[1])&&compare_vec4(a[2], b[2])&&compare_vec4(a[3], b[3]); } +bool compare_ivec2 (highp ivec2 a, highp ivec2 b) { return a == b; } +bool compare_ivec3 (highp ivec3 a, highp ivec3 b) { return a == b; } +bool compare_uint (highp uint a, highp uint b) { return a == b; } +bool compare_uvec3 (highp uvec3 a, highp uvec3 b) { return a == b; } +bool compare_uvec4 (highp uvec4 a, highp uvec4 b) { return a == b; } +bool compare_bool (bool a, bool b) { return a == b; } +bool compare_bvec2 (bvec2 a, bvec2 b) { return a == b; } +bool compare_bvec3 (bvec3 a, bvec3 b) { return a == b; } +bool compare_bvec4 (bvec4 a, bvec4 b) { return a == b; } + +shared S1 s1; +shared S2 s2; + +void main (void) { + s1.a = 0u; + s1.b = vec4(8.0, 8.0, 0.0, -4.0); + s2.a.mA.mA.mA = mat4(-5.0, 9.0, -4.0, -6.0, -1.0, -1.0, -2.0, 1.0, 6.0, 5.0, 7.0, -2.0, -4.0, -9.0, 8.0, 3.0); + s2.a.mA.mA.mB = bvec3(true, false, false); + s2.a.mA.mA.mC = bvec4(true, true, true, false); + s2.a.mA.mB.mA = bvec2(true, true); + s2.a.mA.mC.mA = 7.0; + s2.a.mA.mC.mB = uvec4(8u, 6u, 2u, 0u); + s2.a.mA.mC.mC = -9.0; + s2.b = ivec3(1, -4, 0); + s2.c.mA.mA.mA = uvec3(4u, 9u, 1u); + s2.c.mA.mA.mB = false; + s2.c.mA.mB = mat3x2(3.0, -5.0, -1.0, -5.0, -1.0, -9.0); + s2.c.mB = vec2(-6.0, -9.0); + s2.d.mA = bvec2(true, false); + s2.d.mB.mA.mA = mat2(-2.0, 3.0, 7.0, 2.0); + s2.d.mB.mA.mB = bvec3(false, false, false); + s2.d.mB.mA.mC = bvec4(false, false, false, true); + s2.d.mB.mB = bvec3(true, false, false); + s2.d.mC = ivec2(-9, 0); + + barrier(); + memoryBarrier(); + bool allOk = true; + allOk = allOk && compare_uint(0u, s1.a); + allOk = allOk && compare_vec4(vec4(8.0, 8.0, 0.0, -4.0), s1.b); + allOk = allOk && compare_mat4(mat4(-5.0, 9.0, -4.0, -6.0, -1.0, -1.0, -2.0, 1.0, 6.0, 5.0, 7.0, -2.0, -4.0, -9.0, 8.0, 3.0), s2.a.mA.mA.mA); + allOk = allOk && compare_bvec3(bvec3(true, false, false), s2.a.mA.mA.mB); + allOk = allOk && compare_bvec4(bvec4(true, true, true, false), s2.a.mA.mA.mC); + allOk = allOk && compare_bvec2(bvec2(true, true), s2.a.mA.mB.mA); + allOk = allOk && compare_float(7.0, s2.a.mA.mC.mA); + allOk = allOk && compare_uvec4(uvec4(8u, 6u, 2u, 0u), s2.a.mA.mC.mB); + allOk = allOk && compare_float(-9.0, s2.a.mA.mC.mC); + allOk = allOk && compare_ivec3(ivec3(1, -4, 0), s2.b); + allOk = allOk && compare_uvec3(uvec3(4u, 9u, 1u), s2.c.mA.mA.mA); + allOk = allOk && compare_bool(false, s2.c.mA.mA.mB); + allOk = allOk && compare_mat3x2(mat3x2(3.0, -5.0, -1.0, -5.0, -1.0, -9.0), s2.c.mA.mB); + allOk = allOk && compare_vec2(vec2(-6.0, -9.0), s2.c.mB); + allOk = allOk && compare_bvec2(bvec2(true, false), s2.d.mA); + allOk = allOk && compare_mat2(mat2(-2.0, 3.0, 7.0, 2.0), s2.d.mB.mA.mA); + allOk = allOk && compare_bvec3(bvec3(false, false, false), s2.d.mB.mA.mB); + allOk = allOk && compare_bvec4(bvec4(false, false, false, true), s2.d.mB.mA.mC); + allOk = allOk && compare_bvec3(bvec3(true, false, false), s2.d.mB.mB); + allOk = allOk && compare_ivec2(ivec2(-9, 0), s2.d.mC); + if (allOk) + passed++; + +} diff --git a/shaders-opencl/comp/shared-std450.double.comp b/shaders-opencl/comp/shared-std450.fp64.comp similarity index 100% rename from shaders-opencl/comp/shared-std450.double.comp rename to shaders-opencl/comp/shared-std450.fp64.comp diff --git a/shaders-opencl/comp/struct-packing.invalid.comp b/shaders-opencl/comp/struct-packing.comp similarity index 100% rename from shaders-opencl/comp/struct-packing.invalid.comp rename to shaders-opencl/comp/struct-packing.comp diff --git a/spirv_cross_c.cpp b/spirv_cross_c.cpp index f49366ac4..1146f92d0 100644 --- a/spirv_cross_c.cpp +++ b/spirv_cross_c.cpp @@ -812,6 +812,9 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_OPENCL_VERSION: options->opencl.opencl_version = value; break; + case SPVC_COMPILER_OPTION_OPENCL_ENABLE_FP16: + options->opencl.enable_fp16 = value != 0; + break; case SPVC_COMPILER_OPTION_OPENCL_ENABLE_FP64: options->opencl.enable_fp64 = value != 0; break; @@ -821,8 +824,14 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_OPENCL_ENABLE_SUBGROUPS: options->opencl.enable_subgroups = value != 0; break; - case SPVC_COMPILER_OPTION_OPENCL_ENABLE_SHUFFLE: - options->opencl.enable_shuffle = value != 0; + case SPVC_COMPILER_OPTION_OPENCL_ENABLE_SUBGROUPS_ALL: + options->opencl.enable_subgroups_all = value != 0; + break; + case SPVC_COMPILER_OPTION_OPENCL_EMULATE_SUBGROUPS: + options->opencl.emulate_subgroups = value != 0; + break; + case SPVC_COMPILER_OPTION_OPENCL_FIXED_SUBGROUP_SIZE: + options->opencl.fixed_subgroup_size = value; break; #endif diff --git a/spirv_cross_c.h b/spirv_cross_c.h index c59c299d0..e4d37ce46 100644 --- a/spirv_cross_c.h +++ b/spirv_cross_c.h @@ -759,10 +759,13 @@ extern "C" SPVC_COMPILER_OPTION_HLSL_USER_SEMANTIC = 94 | SPVC_COMPILER_OPTION_HLSL_BIT, SPVC_COMPILER_OPTION_OPENCL_VERSION = 95 | SPVC_COMPILER_OPTION_OPENCL_BIT, - SPVC_COMPILER_OPTION_OPENCL_ENABLE_FP64 = 96 | SPVC_COMPILER_OPTION_OPENCL_BIT, - SPVC_COMPILER_OPTION_OPENCL_ENABLE_64BIT_ATOMICS = 97 | SPVC_COMPILER_OPTION_OPENCL_BIT, - SPVC_COMPILER_OPTION_OPENCL_ENABLE_SUBGROUPS = 98 | SPVC_COMPILER_OPTION_OPENCL_BIT, - SPVC_COMPILER_OPTION_OPENCL_ENABLE_SHUFFLE = 99 | SPVC_COMPILER_OPTION_OPENCL_BIT, + SPVC_COMPILER_OPTION_OPENCL_ENABLE_FP16 = 96 | SPVC_COMPILER_OPTION_OPENCL_BIT, + SPVC_COMPILER_OPTION_OPENCL_ENABLE_FP64 = 97 | SPVC_COMPILER_OPTION_OPENCL_BIT, + SPVC_COMPILER_OPTION_OPENCL_ENABLE_64BIT_ATOMICS = 98 | SPVC_COMPILER_OPTION_OPENCL_BIT, + SPVC_COMPILER_OPTION_OPENCL_ENABLE_SUBGROUPS = 99 | SPVC_COMPILER_OPTION_OPENCL_BIT, + SPVC_COMPILER_OPTION_OPENCL_ENABLE_SUBGROUPS_ALL = 100 | SPVC_COMPILER_OPTION_OPENCL_BIT, + SPVC_COMPILER_OPTION_OPENCL_EMULATE_SUBGROUPS = 101 | SPVC_COMPILER_OPTION_OPENCL_BIT, + SPVC_COMPILER_OPTION_OPENCL_FIXED_SUBGROUP_SIZE = 102 | SPVC_COMPILER_OPTION_OPENCL_BIT, SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff } spvc_compiler_option; diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 10accf077..66ab8c560 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -25,12 +25,12 @@ #include "GLSL.std.450.h" #include "spirv_common.hpp" #include +#include #include #include #include #include #include -#include #ifndef _WIN32 #include @@ -202,7 +202,7 @@ static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard return packing; } } -} +} // namespace SPIRV_CROSS_NAMESPACE void CompilerGLSL::init() { @@ -352,7 +352,8 @@ void CompilerGLSL::reset(uint32_t iteration_count) // and it is not practical with the current architecture // to resolve everything up front. if (iteration_count >= options.force_recompile_max_debug_iterations && !is_force_recompile_forward_progress) - SPIRV_CROSS_THROW("Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!"); + SPIRV_CROSS_THROW( + "Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!"); // We do some speculative optimizations which should pretty much always work out, // but just in case the SPIR-V is rather weird, recompile until it's happy. @@ -376,10 +377,12 @@ void CompilerGLSL::reset(uint32_t iteration_count) reset_name_caches(); - ir.for_each_typed_id([&](uint32_t, SPIRFunction &func) { - func.active = false; - func.flush_undeclared = true; - }); + ir.for_each_typed_id( + [&](uint32_t, SPIRFunction &func) + { + func.active = false; + func.flush_undeclared = true; + }); ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); }); @@ -427,54 +430,54 @@ void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const { return std::find_if(begin(inout_color_attachments), end(inout_color_attachments), - [&](const std::pair &elem) { - return elem.first == location; - }) != end(inout_color_attachments); + [&](const std::pair &elem) + { return elem.first == location; }) != end(inout_color_attachments); } bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const { return std::find_if(begin(inout_color_attachments), end(inout_color_attachments), - [&](const std::pair &elem) { - return elem.first == location && !elem.second; - }) != end(inout_color_attachments); + [&](const std::pair &elem) + { return elem.first == location && !elem.second; }) != end(inout_color_attachments); } void CompilerGLSL::find_static_extensions() { - ir.for_each_typed_id([&](uint32_t, const SPIRType &type) { - if (type.basetype == SPIRType::Double) - { - if (options.es) - SPIRV_CROSS_THROW("FP64 not supported in ES profile."); - if (!options.es && options.version < 400) - require_extension_internal("GL_ARB_gpu_shader_fp64"); - } - else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64) - { - if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310. - SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310."); - require_extension_internal("GL_ARB_gpu_shader_int64"); - } - else if (type.basetype == SPIRType::Half) - { - require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16"); - if (options.vulkan_semantics) - require_extension_internal("GL_EXT_shader_16bit_storage"); - } - else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte) - { - require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8"); - if (options.vulkan_semantics) - require_extension_internal("GL_EXT_shader_8bit_storage"); - } - else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort) - { - require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16"); - if (options.vulkan_semantics) - require_extension_internal("GL_EXT_shader_16bit_storage"); - } - }); + ir.for_each_typed_id( + [&](uint32_t, const SPIRType &type) + { + if (type.basetype == SPIRType::Double) + { + if (options.es) + SPIRV_CROSS_THROW("FP64 not supported in ES profile."); + if (!options.es && options.version < 400) + require_extension_internal("GL_ARB_gpu_shader_fp64"); + } + else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64) + { + if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310. + SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310."); + require_extension_internal("GL_ARB_gpu_shader_int64"); + } + else if (type.basetype == SPIRType::Half) + { + require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16"); + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_shader_16bit_storage"); + } + else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte) + { + require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8"); + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_shader_8bit_storage"); + } + else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort) + { + require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16"); + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_shader_16bit_storage"); + } + }); auto &execution = get_entry_point(); switch (execution.model) @@ -716,8 +719,8 @@ void CompilerGLSL::find_static_extensions() void CompilerGLSL::require_polyfill(Polyfill polyfill, bool relaxed) { - uint32_t &polyfills = (relaxed && (options.es || options.vulkan_semantics)) ? - required_polyfills_relaxed : required_polyfills; + uint32_t &polyfills = + (relaxed && (options.es || options.vulkan_semantics)) ? required_polyfills_relaxed : required_polyfills; if ((polyfills & polyfill) == 0) { @@ -729,15 +732,17 @@ void CompilerGLSL::require_polyfill(Polyfill polyfill, bool relaxed) void CompilerGLSL::ray_tracing_khr_fixup_locations() { uint32_t location = 0; - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - // Incoming payload storage can also be used for tracing. - if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR && - var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR) - return; - if (is_hidden_variable(var)) - return; - set_decoration(var.self, DecorationLocation, location++); - }); + ir.for_each_typed_id( + [&](uint32_t, SPIRVariable &var) + { + // Incoming payload storage can also be used for tracing. + if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR && + var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR) + return; + if (is_hidden_variable(var)) + return; + set_decoration(var.self, DecorationLocation, location++); + }); } string CompilerGLSL::compile() @@ -757,7 +762,7 @@ string CompilerGLSL::compile() backend.workgroup_size_is_hidden = true; backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics; backend.support_precise_qualifier = - (!options.es && options.version >= 400) || (options.es && options.version >= 320); + (!options.es && options.version >= 400) || (options.es && options.version >= 320); backend.constant_null_initializer = "{ }"; backend.requires_matching_array_initializer = true; @@ -2300,8 +2305,7 @@ string CompilerGLSL::layout_for_variable(const SPIRVariable &var) return res; } -string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, - bool support_std430_without_scalar_layout, +string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout, bool support_enhanced_layouts) { if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430)) @@ -2313,8 +2317,7 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, require_extension_internal("GL_EXT_scalar_block_layout"); return "scalar"; } - else if (support_std430_without_scalar_layout && - support_enhanced_layouts && + else if (support_std430_without_scalar_layout && support_enhanced_layouts && buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) { if (options.es && !options.vulkan_semantics) @@ -2326,8 +2329,7 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); return "std430"; } - else if (support_enhanced_layouts && - buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) + else if (support_enhanced_layouts && buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) { // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. @@ -2341,8 +2343,7 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); return "std140"; } - else if (options.vulkan_semantics && - support_enhanced_layouts && + else if (options.vulkan_semantics && support_enhanced_layouts && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout)) { set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); @@ -2356,8 +2357,7 @@ string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, require_extension_internal("GL_EXT_scalar_block_layout"); return "std430"; } - else if (!support_std430_without_scalar_layout && options.vulkan_semantics && - support_enhanced_layouts && + else if (!support_std430_without_scalar_layout && options.vulkan_semantics && support_enhanced_layouts && buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) { // UBOs can support std430 with GL_EXT_scalar_block_layout. @@ -2532,7 +2532,7 @@ void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_de } else if (is_array(get_pointee_type(type))) { - SPIRType wrap_type{OpTypeStruct}; + SPIRType wrap_type{ OpTypeStruct }; wrap_type.self = ir.increase_bound_by(1); wrap_type.member_types.push_back(get_pointee_type_id(type_id)); ir.set_member_decoration(wrap_type.self, 0, DecorationOffset, 0); @@ -2540,7 +2540,8 @@ void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_de } if (alignment) - statement("layout(", packing_standard, "buffer_reference, buffer_reference_align = ", alignment, ") buffer ", buffer_name); + statement("layout(", packing_standard, "buffer_reference, buffer_reference_align = ", alignment, + ") buffer ", buffer_name); else statement("layout(", packing_standard, "buffer_reference) buffer ", buffer_name); } @@ -2656,7 +2657,7 @@ void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var) SPIRType::BaseType basic_type; if (get_common_basic_type(type, basic_type)) { - SPIRType tmp { OpTypeVector }; + SPIRType tmp{ OpTypeVector }; tmp.basetype = basic_type; tmp.vecsize = 4; if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt) @@ -2831,8 +2832,7 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var) { auto &type = get(var.basetype); - if (var.storage == StorageClassInput && type.basetype == SPIRType::Double && - !options.es && options.version < 410) + if (var.storage == StorageClassInput && type.basetype == SPIRType::Double && !options.es && options.version < 410) { require_extension_internal("GL_ARB_vertex_attrib_64bit"); } @@ -3072,42 +3072,48 @@ void CompilerGLSL::emit_entry_point_declarations() void CompilerGLSL::replace_illegal_names(const unordered_set &keywords) { - ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { - if (is_hidden_variable(var)) - return; - - auto *meta = ir.find_meta(var.self); - if (!meta) - return; - - auto &m = meta->decoration; - if (keywords.find(m.alias) != end(keywords)) - m.alias = join("_", m.alias); - }); - - ir.for_each_typed_id([&](uint32_t, const SPIRFunction &func) { - auto *meta = ir.find_meta(func.self); - if (!meta) - return; - - auto &m = meta->decoration; - if (keywords.find(m.alias) != end(keywords)) - m.alias = join("_", m.alias); - }); - - ir.for_each_typed_id([&](uint32_t, const SPIRType &type) { - auto *meta = ir.find_meta(type.self); - if (!meta) - return; - - auto &m = meta->decoration; - if (keywords.find(m.alias) != end(keywords)) - m.alias = join("_", m.alias); - - for (auto &memb : meta->members) - if (keywords.find(memb.alias) != end(keywords)) - memb.alias = join("_", memb.alias); - }); + ir.for_each_typed_id( + [&](uint32_t, const SPIRVariable &var) + { + if (is_hidden_variable(var)) + return; + + auto *meta = ir.find_meta(var.self); + if (!meta) + return; + + auto &m = meta->decoration; + if (keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + }); + + ir.for_each_typed_id( + [&](uint32_t, const SPIRFunction &func) + { + auto *meta = ir.find_meta(func.self); + if (!meta) + return; + + auto &m = meta->decoration; + if (keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + }); + + ir.for_each_typed_id( + [&](uint32_t, const SPIRType &type) + { + auto *meta = ir.find_meta(type.self); + if (!meta) + return; + + auto &m = meta->decoration; + if (keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + + for (auto &memb : meta->members) + if (keywords.find(memb.alias) != end(keywords)) + memb.alias = join("_", memb.alias); + }); } void CompilerGLSL::replace_illegal_names() @@ -3207,12 +3213,15 @@ void CompilerGLSL::replace_fragment_output(SPIRVariable &var) void CompilerGLSL::replace_fragment_outputs() { - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); + ir.for_each_typed_id( + [&](uint32_t, SPIRVariable &var) + { + auto &type = this->get(var.basetype); - if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput) - replace_fragment_output(var); - }); + if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && + var.storage == StorageClassOutput) + replace_fragment_output(var); + }); } string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr) @@ -3274,21 +3283,23 @@ void CompilerGLSL::fixup_image_load_store_access() if (!options.enable_storage_image_qualifier_deduction) return; - ir.for_each_typed_id([&](uint32_t var, const SPIRVariable &) { - auto &vartype = expression_type(var); - if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2) - { - // Very old glslangValidator and HLSL compilers do not emit required qualifiers here. - // Solve this by making the image access as restricted as possible and loosen up if we need to. - // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing. - - if (!has_decoration(var, DecorationNonWritable) && !has_decoration(var, DecorationNonReadable)) - { - set_decoration(var, DecorationNonWritable); - set_decoration(var, DecorationNonReadable); - } - } - }); + ir.for_each_typed_id( + [&](uint32_t var, const SPIRVariable &) + { + auto &vartype = expression_type(var); + if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2) + { + // Very old glslangValidator and HLSL compilers do not emit required qualifiers here. + // Solve this by making the image access as restricted as possible and loosen up if we need to. + // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing. + + if (!has_decoration(var, DecorationNonWritable) && !has_decoration(var, DecorationNonReadable)) + { + set_decoration(var, DecorationNonWritable); + set_decoration(var, DecorationNonReadable); + } + } + }); } static bool is_block_builtin(BuiltIn builtin) @@ -3305,34 +3316,36 @@ bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) return false; bool should_force = false; - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - if (should_force) - return; - - auto &type = this->get(var.basetype); - bool block = has_decoration(type.self, DecorationBlock); - if (var.storage == storage && block && is_builtin_variable(var)) - { - uint32_t member_count = uint32_t(type.member_types.size()); - for (uint32_t i = 0; i < member_count; i++) - { - if (has_member_decoration(type.self, i, DecorationBuiltIn) && - is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) && - has_member_decoration(type.self, i, DecorationOffset)) - { - should_force = true; - } - } - } - else if (var.storage == storage && !block && is_builtin_variable(var)) - { - if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) && - has_decoration(var.self, DecorationOffset)) - { - should_force = true; - } - } - }); + ir.for_each_typed_id( + [&](uint32_t, SPIRVariable &var) + { + if (should_force) + return; + + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if (var.storage == storage && block && is_builtin_variable(var)) + { + uint32_t member_count = uint32_t(type.member_types.size()); + for (uint32_t i = 0; i < member_count; i++) + { + if (has_member_decoration(type.self, i, DecorationBuiltIn) && + is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) && + has_member_decoration(type.self, i, DecorationOffset)) + { + should_force = true; + } + } + } + else if (var.storage == storage && !block && is_builtin_variable(var)) + { + if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) && + has_decoration(var.self, DecorationOffset)) + { + should_force = true; + } + } + }); // If we're declaring clip/cull planes with control points we need to force block declaration. if ((get_execution_model() == ExecutionModelTessellationControl || @@ -3351,51 +3364,53 @@ bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model) { - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - bool block = has_decoration(type.self, DecorationBlock); - if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block && - is_builtin_variable(var)) - { - if (model != ExecutionModelMeshEXT) - { - // Make sure the array has a supported name in the code. - if (var.storage == StorageClassOutput) - set_name(var.self, "gl_out"); - else if (var.storage == StorageClassInput) - set_name(var.self, "gl_in"); - } - else - { - auto flags = get_buffer_block_flags(var.self); - if (flags.get(DecorationPerPrimitiveEXT)) - { - set_name(var.self, "gl_MeshPrimitivesEXT"); - set_name(type.self, "gl_MeshPerPrimitiveEXT"); - } - else - { - set_name(var.self, "gl_MeshVerticesEXT"); - set_name(type.self, "gl_MeshPerVertexEXT"); - } - } - } - - if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block) - { - auto *m = ir.find_meta(var.self); - if (m && m->decoration.builtin) - { - auto builtin_type = m->decoration.builtin_type; - if (builtin_type == BuiltInPrimitivePointIndicesEXT) - set_name(var.self, "gl_PrimitivePointIndicesEXT"); - else if (builtin_type == BuiltInPrimitiveLineIndicesEXT) - set_name(var.self, "gl_PrimitiveLineIndicesEXT"); - else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT) - set_name(var.self, "gl_PrimitiveTriangleIndicesEXT"); - } - } - }); + ir.for_each_typed_id( + [&](uint32_t, SPIRVariable &var) + { + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block && + is_builtin_variable(var)) + { + if (model != ExecutionModelMeshEXT) + { + // Make sure the array has a supported name in the code. + if (var.storage == StorageClassOutput) + set_name(var.self, "gl_out"); + else if (var.storage == StorageClassInput) + set_name(var.self, "gl_in"); + } + else + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT)) + { + set_name(var.self, "gl_MeshPrimitivesEXT"); + set_name(type.self, "gl_MeshPerPrimitiveEXT"); + } + else + { + set_name(var.self, "gl_MeshVerticesEXT"); + set_name(type.self, "gl_MeshPerVertexEXT"); + } + } + } + + if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block) + { + auto *m = ir.find_meta(var.self); + if (m && m->decoration.builtin) + { + auto builtin_type = m->decoration.builtin_type; + if (builtin_type == BuiltInPrimitivePointIndicesEXT) + set_name(var.self, "gl_PrimitivePointIndicesEXT"); + else if (builtin_type == BuiltInPrimitiveLineIndicesEXT) + set_name(var.self, "gl_PrimitiveLineIndicesEXT"); + else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT) + set_name(var.self, "gl_PrimitiveTriangleIndicesEXT"); + } + } + }); } void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model) @@ -3416,121 +3431,124 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; std::unordered_map builtin_xfb_offsets; - const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool { - return builtin == BuiltInPosition || builtin == BuiltInPointSize || - builtin == BuiltInClipDistance || builtin == BuiltInCullDistance; + const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool + { + return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || + builtin == BuiltInCullDistance; }; - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - bool block = has_decoration(type.self, DecorationBlock); - Bitset builtins; - - if (var.storage == storage && block && is_builtin_variable(var)) - { - uint32_t index = 0; - for (auto &m : ir.meta[type.self].members) - { - if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) - { - builtins.set(m.builtin_type); - if (m.builtin_type == BuiltInCullDistance) - cull_distance_size = to_array_size_literal(this->get(type.member_types[index])); - else if (m.builtin_type == BuiltInClipDistance) - clip_distance_size = to_array_size_literal(this->get(type.member_types[index])); - - if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset)) - { - have_any_xfb_offset = true; - builtin_xfb_offsets[m.builtin_type] = m.offset; - } - - if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) - { - uint32_t stream = m.stream; - if (have_geom_stream && geom_stream != stream) - SPIRV_CROSS_THROW("IO block member Stream mismatch."); - have_geom_stream = true; - geom_stream = stream; - } - } - index++; - } - - if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) && - has_decoration(var.self, DecorationXfbStride)) - { - uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer); - uint32_t stride = get_decoration(var.self, DecorationXfbStride); - if (have_xfb_buffer_stride && buffer_index != xfb_buffer) - SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); - if (have_xfb_buffer_stride && stride != xfb_stride) - SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); - have_xfb_buffer_stride = true; - xfb_buffer = buffer_index; - xfb_stride = stride; - } - - if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream)) - { - uint32_t stream = get_decoration(var.self, DecorationStream); - if (have_geom_stream && geom_stream != stream) - SPIRV_CROSS_THROW("IO block member Stream mismatch."); - have_geom_stream = true; - geom_stream = stream; - } - } - else if (var.storage == storage && !block && is_builtin_variable(var)) - { - // While we're at it, collect all declared global builtins (HLSL mostly ...). - auto &m = ir.meta[var.self].decoration; - if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) - { - // For mesh/tesc output, Clip/Cull is an array-of-array. Look at innermost array type - // for correct result. - global_builtins.set(m.builtin_type); - if (m.builtin_type == BuiltInCullDistance) - cull_distance_size = to_array_size_literal(type, 0); - else if (m.builtin_type == BuiltInClipDistance) - clip_distance_size = to_array_size_literal(type, 0); - - if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) && - m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset)) - { - have_any_xfb_offset = true; - builtin_xfb_offsets[m.builtin_type] = m.offset; - uint32_t buffer_index = m.xfb_buffer; - uint32_t stride = m.xfb_stride; - if (have_xfb_buffer_stride && buffer_index != xfb_buffer) - SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); - if (have_xfb_buffer_stride && stride != xfb_stride) - SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); - have_xfb_buffer_stride = true; - xfb_buffer = buffer_index; - xfb_stride = stride; - } - - if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) - { - uint32_t stream = get_decoration(var.self, DecorationStream); - if (have_geom_stream && geom_stream != stream) - SPIRV_CROSS_THROW("IO block member Stream mismatch."); - have_geom_stream = true; - geom_stream = stream; - } - } - } - - if (builtins.empty()) - return; - - if (emitted_block) - SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block."); - - emitted_builtins = builtins; - emitted_block = true; - block_var = &var; - }); + ir.for_each_typed_id( + [&](uint32_t, SPIRVariable &var) + { + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + Bitset builtins; + + if (var.storage == storage && block && is_builtin_variable(var)) + { + uint32_t index = 0; + for (auto &m : ir.meta[type.self].members) + { + if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) + { + builtins.set(m.builtin_type); + if (m.builtin_type == BuiltInCullDistance) + cull_distance_size = to_array_size_literal(this->get(type.member_types[index])); + else if (m.builtin_type == BuiltInClipDistance) + clip_distance_size = to_array_size_literal(this->get(type.member_types[index])); + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset)) + { + have_any_xfb_offset = true; + builtin_xfb_offsets[m.builtin_type] = m.offset; + } + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) + { + uint32_t stream = m.stream; + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } + } + index++; + } + + if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) && + has_decoration(var.self, DecorationXfbStride)) + { + uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer); + uint32_t stride = get_decoration(var.self, DecorationXfbStride); + if (have_xfb_buffer_stride && buffer_index != xfb_buffer) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + if (have_xfb_buffer_stride && stride != xfb_stride) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + have_xfb_buffer_stride = true; + xfb_buffer = buffer_index; + xfb_stride = stride; + } + + if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream)) + { + uint32_t stream = get_decoration(var.self, DecorationStream); + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } + } + else if (var.storage == storage && !block && is_builtin_variable(var)) + { + // While we're at it, collect all declared global builtins (HLSL mostly ...). + auto &m = ir.meta[var.self].decoration; + if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) + { + // For mesh/tesc output, Clip/Cull is an array-of-array. Look at innermost array type + // for correct result. + global_builtins.set(m.builtin_type); + if (m.builtin_type == BuiltInCullDistance) + cull_distance_size = to_array_size_literal(type, 0); + else if (m.builtin_type == BuiltInClipDistance) + clip_distance_size = to_array_size_literal(type, 0); + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) && + m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset)) + { + have_any_xfb_offset = true; + builtin_xfb_offsets[m.builtin_type] = m.offset; + uint32_t buffer_index = m.xfb_buffer; + uint32_t stride = m.xfb_stride; + if (have_xfb_buffer_stride && buffer_index != xfb_buffer) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + if (have_xfb_buffer_stride && stride != xfb_stride) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + have_xfb_buffer_stride = true; + xfb_buffer = buffer_index; + xfb_stride = stride; + } + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) + { + uint32_t stream = get_decoration(var.self, DecorationStream); + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } + } + } + + if (builtins.empty()) + return; + + if (emitted_block) + SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block."); + + emitted_builtins = builtins; + emitted_block = true; + block_var = &var; + }); global_builtins = Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) | @@ -3754,27 +3772,28 @@ void CompilerGLSL::emit_resources() if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64) { // Output buffer reference block forward declarations. - ir.for_each_typed_id([&](uint32_t id, SPIRType &type) - { - if (is_physical_pointer(type)) - { - bool emit_type = true; - if (!is_physical_pointer_to_buffer_block(type)) - { - // Only forward-declare if we intend to emit it in the non_block_pointer types. - // Otherwise, these are just "benign" pointer types that exist as a result of access chains. - emit_type = std::find(physical_storage_non_block_pointer_types.begin(), - physical_storage_non_block_pointer_types.end(), - id) != physical_storage_non_block_pointer_types.end(); - } - - if (emit_type) - { - emit_buffer_reference_block(id, true); - emitted = true; - } - } - }); + ir.for_each_typed_id( + [&](uint32_t id, SPIRType &type) + { + if (is_physical_pointer(type)) + { + bool emit_type = true; + if (!is_physical_pointer_to_buffer_block(type)) + { + // Only forward-declare if we intend to emit it in the non_block_pointer types. + // Otherwise, these are just "benign" pointer types that exist as a result of access chains. + emit_type = std::find(physical_storage_non_block_pointer_types.begin(), + physical_storage_non_block_pointer_types.end(), + id) != physical_storage_non_block_pointer_types.end(); + } + + if (emit_type) + { + emit_buffer_reference_block(id, true); + emitted = true; + } + } + }); } if (emitted) @@ -3897,66 +3916,74 @@ void CompilerGLSL::emit_resources() for (auto type : physical_storage_non_block_pointer_types) emit_buffer_reference_block(type, false); - ir.for_each_typed_id([&](uint32_t id, SPIRType &type) { - if (is_physical_pointer_to_buffer_block(type)) - emit_buffer_reference_block(id, false); - }); + ir.for_each_typed_id( + [&](uint32_t id, SPIRType &type) + { + if (is_physical_pointer_to_buffer_block(type)) + emit_buffer_reference_block(id, false); + }); } // Output UBOs and SSBOs - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - - bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform || - type.storage == StorageClassShaderRecordBufferKHR; - bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || - ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); - - if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && - has_block_flags) - { - emit_buffer_block(var); - } - }); + ir.for_each_typed_id( + [&](uint32_t, SPIRVariable &var) + { + auto &type = this->get(var.basetype); + + bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform || + type.storage == StorageClassShaderRecordBufferKHR; + bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + + if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && + has_block_flags) + { + emit_buffer_block(var); + } + }); // Output push constant blocks - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && - !is_hidden_variable(var)) - { - emit_push_constant_block(var); - } - }); + ir.for_each_typed_id( + [&](uint32_t, SPIRVariable &var) + { + auto &type = this->get(var.basetype); + if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && + !is_hidden_variable(var)) + { + emit_push_constant_block(var); + } + }); bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics; // Output Uniform Constants (values, samplers, images, etc). - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - - // If we're remapping separate samplers and images, only emit the combined samplers. - if (skip_separate_image_sampler) - { - // Sampler buffers are always used without a sampler, and they will also work in regular GL. - bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; - bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; - bool separate_sampler = type.basetype == SPIRType::Sampler; - if (!sampler_buffer && (separate_image || separate_sampler)) - return; - } - - if (var.storage != StorageClassFunction && type.pointer && - (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter || - type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR || - type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR || - type.storage == StorageClassHitAttributeKHR) && - !is_hidden_variable(var)) - { - emit_uniform(var); - emitted = true; - } - }); + ir.for_each_typed_id( + [&](uint32_t, SPIRVariable &var) + { + auto &type = this->get(var.basetype); + + // If we're remapping separate samplers and images, only emit the combined samplers. + if (skip_separate_image_sampler) + { + // Sampler buffers are always used without a sampler, and they will also work in regular GL. + bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + if (!sampler_buffer && (separate_image || separate_sampler)) + return; + } + + if (var.storage != StorageClassFunction && type.pointer && + (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter || + type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR || + type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR || + type.storage == StorageClassHitAttributeKHR) && + !is_hidden_variable(var)) + { + emit_uniform(var); + emitted = true; + } + }); if (emitted) statement(""); @@ -3965,71 +3992,73 @@ void CompilerGLSL::emit_resources() bool emitted_base_instance = false; // Output in/out interfaces. - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - - bool is_hidden = is_hidden_variable(var); - - // Unused output I/O variables might still be required to implement framebuffer fetch. - if (var.storage == StorageClassOutput && !is_legacy() && - location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != 0) - { - is_hidden = false; - } - - if (var.storage != StorageClassFunction && type.pointer && - (var.storage == StorageClassInput || var.storage == StorageClassOutput) && - interface_variable_exists_in_entry_point(var.self) && !is_hidden) - { - if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput && - type.array.size() == 1) - { - SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader."); - } - emit_interface_block(var); - emitted = true; - } - else if (is_builtin_variable(var)) - { - auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); - // For gl_InstanceIndex emulation on GLES, the API user needs to - // supply this uniform. - - // The draw parameter extension is soft-enabled on GL with some fallbacks. - if (!options.vulkan_semantics) - { - if (!emitted_base_instance && - ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) || - (builtin == BuiltInBaseInstance))) - { - statement("#ifdef GL_ARB_shader_draw_parameters"); - statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB"); - statement("#else"); - // A crude, but simple workaround which should be good enough for non-indirect draws. - statement("uniform int SPIRV_Cross_BaseInstance;"); - statement("#endif"); - emitted = true; - emitted_base_instance = true; - } - else if (builtin == BuiltInBaseVertex) - { - statement("#ifdef GL_ARB_shader_draw_parameters"); - statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB"); - statement("#else"); - // A crude, but simple workaround which should be good enough for non-indirect draws. - statement("uniform int SPIRV_Cross_BaseVertex;"); - statement("#endif"); - } - else if (builtin == BuiltInDrawIndex) - { - statement("#ifndef GL_ARB_shader_draw_parameters"); - // Cannot really be worked around. - statement("#error GL_ARB_shader_draw_parameters is not supported."); - statement("#endif"); - } - } - } - }); + ir.for_each_typed_id( + [&](uint32_t, SPIRVariable &var) + { + auto &type = this->get(var.basetype); + + bool is_hidden = is_hidden_variable(var); + + // Unused output I/O variables might still be required to implement framebuffer fetch. + if (var.storage == StorageClassOutput && !is_legacy() && + location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != 0) + { + is_hidden = false; + } + + if (var.storage != StorageClassFunction && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && + interface_variable_exists_in_entry_point(var.self) && !is_hidden) + { + if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput && + type.array.size() == 1) + { + SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader."); + } + emit_interface_block(var); + emitted = true; + } + else if (is_builtin_variable(var)) + { + auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + // For gl_InstanceIndex emulation on GLES, the API user needs to + // supply this uniform. + + // The draw parameter extension is soft-enabled on GL with some fallbacks. + if (!options.vulkan_semantics) + { + if (!emitted_base_instance && + ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) || + (builtin == BuiltInBaseInstance))) + { + statement("#ifdef GL_ARB_shader_draw_parameters"); + statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB"); + statement("#else"); + // A crude, but simple workaround which should be good enough for non-indirect draws. + statement("uniform int SPIRV_Cross_BaseInstance;"); + statement("#endif"); + emitted = true; + emitted_base_instance = true; + } + else if (builtin == BuiltInBaseVertex) + { + statement("#ifdef GL_ARB_shader_draw_parameters"); + statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB"); + statement("#else"); + // A crude, but simple workaround which should be good enough for non-indirect draws. + statement("uniform int SPIRV_Cross_BaseVertex;"); + statement("#endif"); + } + else if (builtin == BuiltInDrawIndex) + { + statement("#ifndef GL_ARB_shader_draw_parameters"); + // Cannot really be worked around. + statement("#error GL_ARB_shader_draw_parameters is not supported."); + statement("#endif"); + } + } + } + }); // Global variables. for (auto global : global_variables) @@ -4123,94 +4152,103 @@ void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var) for (uint32_t j = 0; j < iteration_count; j++) { - entry_func.fixup_hooks_in.push_back([=, &var]() { - AccessChainMeta meta; - auto &c = this->get(var.initializer); - - uint32_t invocation_id = 0; - uint32_t member_index_id = 0; - if (is_control_point) - { - uint32_t ids = ir.increase_bound_by(3); - auto &uint_type = set(ids, OpTypeInt); - uint_type.basetype = SPIRType::UInt; - uint_type.width = 32; - set(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true); - set(ids + 2, ids, i, false); - invocation_id = ids + 1; - member_index_id = ids + 2; - } - - if (is_patch) - { - statement("if (gl_InvocationID == 0)"); - begin_scope(); - } - - if (type_is_array && !is_control_point) - { - uint32_t indices[2] = { j, i }; - auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); - statement(chain, " = ", lut_name, "[", j, "];"); - } - else if (is_control_point) - { - uint32_t indices[2] = { invocation_id, member_index_id }; - auto chain = access_chain_internal(var.self, indices, 2, 0, &meta); - statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];"); - } - else - { - auto chain = - access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); - statement(chain, " = ", to_expression(c.subconstants[i]), ";"); - } - - if (is_patch) - end_scope(); - }); + entry_func.fixup_hooks_in.push_back( + [=, &var]() + { + AccessChainMeta meta; + auto &c = this->get(var.initializer); + + uint32_t invocation_id = 0; + uint32_t member_index_id = 0; + if (is_control_point) + { + uint32_t ids = ir.increase_bound_by(3); + auto &uint_type = set(ids, OpTypeInt); + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + set(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, + true); + set(ids + 2, ids, i, false); + invocation_id = ids + 1; + member_index_id = ids + 2; + } + + if (is_patch) + { + statement("if (gl_InvocationID == 0)"); + begin_scope(); + } + + if (type_is_array && !is_control_point) + { + uint32_t indices[2] = { j, i }; + auto chain = + access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); + statement(chain, " = ", lut_name, "[", j, "];"); + } + else if (is_control_point) + { + uint32_t indices[2] = { invocation_id, member_index_id }; + auto chain = access_chain_internal(var.self, indices, 2, 0, &meta); + statement(chain, " = ", lut_name, "[", + builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];"); + } + else + { + auto chain = + access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); + statement(chain, " = ", to_expression(c.subconstants[i]), ";"); + } + + if (is_patch) + end_scope(); + }); } } } else if (is_control_point) { auto lut_name = join("_", var.self, "_init"); - statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type, 0), - " = ", to_expression(var.initializer), ";"); - entry_func.fixup_hooks_in.push_back([&, lut_name]() { - statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];"); - }); + statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type, 0), " = ", + to_expression(var.initializer), ";"); + entry_func.fixup_hooks_in.push_back( + [&, lut_name]() + { statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];"); }); } else if (has_decoration(var.self, DecorationBuiltIn) && BuiltIn(get_decoration(var.self, DecorationBuiltIn)) == BuiltInSampleMask) { // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_< - entry_func.fixup_hooks_in.push_back([&] { - auto &c = this->get(var.initializer); - uint32_t num_constants = uint32_t(c.subconstants.size()); - for (uint32_t i = 0; i < num_constants; i++) - { - // Don't use to_expression on constant since it might be uint, just fish out the raw int. - statement(to_expression(var.self), "[", i, "] = ", - convert_to_string(this->get(c.subconstants[i]).scalar_i32()), ";"); - } - }); + entry_func.fixup_hooks_in.push_back( + [&] + { + auto &c = this->get(var.initializer); + uint32_t num_constants = uint32_t(c.subconstants.size()); + for (uint32_t i = 0; i < num_constants; i++) + { + // Don't use to_expression on constant since it might be uint, just fish out the raw int. + statement(to_expression(var.self), "[", i, + "] = ", convert_to_string(this->get(c.subconstants[i]).scalar_i32()), ";"); + } + }); } else { auto lut_name = join("_", var.self, "_init"); - statement("const ", type_to_glsl(type), " ", lut_name, - type_to_array_glsl(type, var.self), " = ", to_expression(var.initializer), ";"); - entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() { - if (is_patch) - { - statement("if (gl_InvocationID == 0)"); - begin_scope(); - } - statement(to_expression(var.self), " = ", lut_name, ";"); - if (is_patch) - end_scope(); - }); + statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type, var.self), " = ", + to_expression(var.initializer), ";"); + entry_func.fixup_hooks_in.push_back( + [&, lut_name, is_patch]() + { + if (is_patch) + { + statement("if (gl_InvocationID == 0)"); + begin_scope(); + } + statement(to_expression(var.self), " = ", lut_name, ";"); + if (is_patch) + end_scope(); + }); } } @@ -4781,8 +4819,7 @@ void CompilerGLSL::emit_extension_workarounds(ExecutionModel model) statement(""); } - auto arithmetic_feature_helper = - [&](Supp::Feature feat, std::string func_name, Op op, GroupOperation group_op) + auto arithmetic_feature_helper = [&](Supp::Feature feat, std::string func_name, Op op, GroupOperation group_op) { if (shader_subgroup_supporter.is_feature_requested(feat)) { @@ -4843,8 +4880,10 @@ void CompilerGLSL::emit_extension_workarounds(ExecutionModel model) { // Need both variants. // GLSL cannot overload on precision, so need to dispatch appropriately. - statement("highp ", type_to_glsl(type), " spvWorkaroundRowMajor(highp ", type_to_glsl(type), " wrap) { return wrap; }"); - statement("mediump ", type_to_glsl(type), " spvWorkaroundRowMajorMP(mediump ", type_to_glsl(type), " wrap) { return wrap; }"); + statement("highp ", type_to_glsl(type), " spvWorkaroundRowMajor(highp ", type_to_glsl(type), + " wrap) { return wrap; }"); + statement("mediump ", type_to_glsl(type), " spvWorkaroundRowMajorMP(mediump ", type_to_glsl(type), + " wrap) { return wrap; }"); } else { @@ -4904,8 +4943,8 @@ void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) statement(qual, "float spvDeterminant", suffix, "(", qual, "mat3 m)"); begin_scope(); statement("return dot(m[0], vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], " - "m[1][2] * m[2][0] - m[1][0] * m[2][2], " - "m[1][0] * m[2][1] - m[1][1] * m[2][0]));"); + "m[1][2] * m[2][0] - m[1][0] * m[2][2], " + "m[1][0] * m[2][1] - m[1][1] * m[2][0]));"); end_scope(); statement(""); } @@ -4915,10 +4954,14 @@ void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) statement(qual, "float spvDeterminant", suffix, "(", qual, "mat4 m)"); begin_scope(); statement("return dot(m[0], vec4(" - "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], " - "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], " - "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], " - "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]));"); + "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * " + "m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], " + "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * " + "m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], " + "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * " + "m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], " + "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * " + "m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]));"); end_scope(); statement(""); } @@ -4937,17 +4980,18 @@ void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) { statement(qual, "mat3 spvInverse", suffix, "(", qual, "mat3 m)"); begin_scope(); - statement(qual, "vec3 t = vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], m[1][2] * m[2][0] - m[1][0] * m[2][2], m[1][0] * m[2][1] - m[1][1] * m[2][0]);"); + statement(qual, "vec3 t = vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], m[1][2] * m[2][0] - m[1][0] * m[2][2], " + "m[1][0] * m[2][1] - m[1][1] * m[2][0]);"); statement("return mat3(t[0], " - "m[0][2] * m[2][1] - m[0][1] * m[2][2], " - "m[0][1] * m[1][2] - m[0][2] * m[1][1], " - "t[1], " - "m[0][0] * m[2][2] - m[0][2] * m[2][0], " - "m[0][2] * m[1][0] - m[0][0] * m[1][2], " - "t[2], " - "m[0][1] * m[2][0] - m[0][0] * m[2][1], " - "m[0][0] * m[1][1] - m[0][1] * m[1][0]) " - "* (1.0 / dot(m[0], t));"); + "m[0][2] * m[2][1] - m[0][1] * m[2][2], " + "m[0][1] * m[1][2] - m[0][2] * m[1][1], " + "t[1], " + "m[0][0] * m[2][2] - m[0][2] * m[2][0], " + "m[0][2] * m[1][0] - m[0][0] * m[1][2], " + "t[2], " + "m[0][1] * m[2][0] - m[0][0] * m[2][1], " + "m[0][0] * m[1][1] - m[0][1] * m[1][0]) " + "* (1.0 / dot(m[0], t));"); end_scope(); statement(""); } @@ -4957,27 +5001,43 @@ void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) statement(qual, "mat4 spvInverse", suffix, "(", qual, "mat4 m)"); begin_scope(); statement(qual, "vec4 t = vec4(" - "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], " - "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], " - "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], " - "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]);"); + "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - " + "m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], " + "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + " + "m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], " + "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - " + "m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], " + "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + " + "m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]);"); statement("return mat4(" "t[0], " - "m[3][1] * m[2][2] * m[0][3] - m[2][1] * m[3][2] * m[0][3] - m[3][1] * m[0][2] * m[2][3] + m[0][1] * m[3][2] * m[2][3] + m[2][1] * m[0][2] * m[3][3] - m[0][1] * m[2][2] * m[3][3], " - "m[1][1] * m[3][2] * m[0][3] - m[3][1] * m[1][2] * m[0][3] + m[3][1] * m[0][2] * m[1][3] - m[0][1] * m[3][2] * m[1][3] - m[1][1] * m[0][2] * m[3][3] + m[0][1] * m[1][2] * m[3][3], " - "m[2][1] * m[1][2] * m[0][3] - m[1][1] * m[2][2] * m[0][3] - m[2][1] * m[0][2] * m[1][3] + m[0][1] * m[2][2] * m[1][3] + m[1][1] * m[0][2] * m[2][3] - m[0][1] * m[1][2] * m[2][3], " + "m[3][1] * m[2][2] * m[0][3] - m[2][1] * m[3][2] * m[0][3] - m[3][1] * m[0][2] * m[2][3] + m[0][1] * " + "m[3][2] * m[2][3] + m[2][1] * m[0][2] * m[3][3] - m[0][1] * m[2][2] * m[3][3], " + "m[1][1] * m[3][2] * m[0][3] - m[3][1] * m[1][2] * m[0][3] + m[3][1] * m[0][2] * m[1][3] - m[0][1] * " + "m[3][2] * m[1][3] - m[1][1] * m[0][2] * m[3][3] + m[0][1] * m[1][2] * m[3][3], " + "m[2][1] * m[1][2] * m[0][3] - m[1][1] * m[2][2] * m[0][3] - m[2][1] * m[0][2] * m[1][3] + m[0][1] * " + "m[2][2] * m[1][3] + m[1][1] * m[0][2] * m[2][3] - m[0][1] * m[1][2] * m[2][3], " "t[1], " - "m[2][0] * m[3][2] * m[0][3] - m[3][0] * m[2][2] * m[0][3] + m[3][0] * m[0][2] * m[2][3] - m[0][0] * m[3][2] * m[2][3] - m[2][0] * m[0][2] * m[3][3] + m[0][0] * m[2][2] * m[3][3], " - "m[3][0] * m[1][2] * m[0][3] - m[1][0] * m[3][2] * m[0][3] - m[3][0] * m[0][2] * m[1][3] + m[0][0] * m[3][2] * m[1][3] + m[1][0] * m[0][2] * m[3][3] - m[0][0] * m[1][2] * m[3][3], " - "m[1][0] * m[2][2] * m[0][3] - m[2][0] * m[1][2] * m[0][3] + m[2][0] * m[0][2] * m[1][3] - m[0][0] * m[2][2] * m[1][3] - m[1][0] * m[0][2] * m[2][3] + m[0][0] * m[1][2] * m[2][3], " + "m[2][0] * m[3][2] * m[0][3] - m[3][0] * m[2][2] * m[0][3] + m[3][0] * m[0][2] * m[2][3] - m[0][0] * " + "m[3][2] * m[2][3] - m[2][0] * m[0][2] * m[3][3] + m[0][0] * m[2][2] * m[3][3], " + "m[3][0] * m[1][2] * m[0][3] - m[1][0] * m[3][2] * m[0][3] - m[3][0] * m[0][2] * m[1][3] + m[0][0] * " + "m[3][2] * m[1][3] + m[1][0] * m[0][2] * m[3][3] - m[0][0] * m[1][2] * m[3][3], " + "m[1][0] * m[2][2] * m[0][3] - m[2][0] * m[1][2] * m[0][3] + m[2][0] * m[0][2] * m[1][3] - m[0][0] * " + "m[2][2] * m[1][3] - m[1][0] * m[0][2] * m[2][3] + m[0][0] * m[1][2] * m[2][3], " "t[2], " - "m[3][0] * m[2][1] * m[0][3] - m[2][0] * m[3][1] * m[0][3] - m[3][0] * m[0][1] * m[2][3] + m[0][0] * m[3][1] * m[2][3] + m[2][0] * m[0][1] * m[3][3] - m[0][0] * m[2][1] * m[3][3], " - "m[1][0] * m[3][1] * m[0][3] - m[3][0] * m[1][1] * m[0][3] + m[3][0] * m[0][1] * m[1][3] - m[0][0] * m[3][1] * m[1][3] - m[1][0] * m[0][1] * m[3][3] + m[0][0] * m[1][1] * m[3][3], " - "m[2][0] * m[1][1] * m[0][3] - m[1][0] * m[2][1] * m[0][3] - m[2][0] * m[0][1] * m[1][3] + m[0][0] * m[2][1] * m[1][3] + m[1][0] * m[0][1] * m[2][3] - m[0][0] * m[1][1] * m[2][3], " + "m[3][0] * m[2][1] * m[0][3] - m[2][0] * m[3][1] * m[0][3] - m[3][0] * m[0][1] * m[2][3] + m[0][0] * " + "m[3][1] * m[2][3] + m[2][0] * m[0][1] * m[3][3] - m[0][0] * m[2][1] * m[3][3], " + "m[1][0] * m[3][1] * m[0][3] - m[3][0] * m[1][1] * m[0][3] + m[3][0] * m[0][1] * m[1][3] - m[0][0] * " + "m[3][1] * m[1][3] - m[1][0] * m[0][1] * m[3][3] + m[0][0] * m[1][1] * m[3][3], " + "m[2][0] * m[1][1] * m[0][3] - m[1][0] * m[2][1] * m[0][3] - m[2][0] * m[0][1] * m[1][3] + m[0][0] * " + "m[2][1] * m[1][3] + m[1][0] * m[0][1] * m[2][3] - m[0][0] * m[1][1] * m[2][3], " "t[3], " - "m[2][0] * m[3][1] * m[0][2] - m[3][0] * m[2][1] * m[0][2] + m[3][0] * m[0][1] * m[2][2] - m[0][0] * m[3][1] * m[2][2] - m[2][0] * m[0][1] * m[3][2] + m[0][0] * m[2][1] * m[3][2], " - "m[3][0] * m[1][1] * m[0][2] - m[1][0] * m[3][1] * m[0][2] - m[3][0] * m[0][1] * m[1][2] + m[0][0] * m[3][1] * m[1][2] + m[1][0] * m[0][1] * m[3][2] - m[0][0] * m[1][1] * m[3][2], " - "m[1][0] * m[2][1] * m[0][2] - m[2][0] * m[1][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] - m[0][0] * m[2][1] * m[1][2] - m[1][0] * m[0][1] * m[2][2] + m[0][0] * m[1][1] * m[2][2]) " + "m[2][0] * m[3][1] * m[0][2] - m[3][0] * m[2][1] * m[0][2] + m[3][0] * m[0][1] * m[2][2] - m[0][0] * " + "m[3][1] * m[2][2] - m[2][0] * m[0][1] * m[3][2] + m[0][0] * m[2][1] * m[3][2], " + "m[3][0] * m[1][1] * m[0][2] - m[1][0] * m[3][1] * m[0][2] - m[3][0] * m[0][1] * m[1][2] + m[0][0] * " + "m[3][1] * m[1][2] + m[1][0] * m[0][1] * m[3][2] - m[0][0] * m[1][1] * m[3][2], " + "m[1][0] * m[2][1] * m[0][2] - m[2][0] * m[1][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] - m[0][0] * " + "m[2][1] * m[1][2] - m[1][0] * m[0][1] * m[2][2] + m[0][0] * m[1][1] * m[2][2]) " "* (1.0 / dot(m[0], t));"); end_scope(); statement(""); @@ -5004,8 +5064,8 @@ void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) const char *types[3][4] = { { "float16_t", "f16vec2", "f16vec3", "f16vec4" }, - { "float", "vec2", "vec3", "vec4" }, - { "double", "dvec2", "dvec3", "dvec4" }, + { "float", "vec2", "vec3", "vec4" }, + { "double", "dvec2", "dvec3", "dvec4" }, }; for (uint32_t k = 0; k < 4; k++) @@ -5014,13 +5074,13 @@ void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) if (i < 2) { - statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ", - type, " ", spv_ops[i], "(", type, ", ", type, ");"); + statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ", type, " ", + spv_ops[i], "(", type, ", ", type, ");"); } else { - statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ", - type, " ", spv_ops[i], "(", type, ", ", type, ", ", type, ");"); + statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ", type, " ", + spv_ops[i], "(", type, ", ", type, ", ", type, ");"); } has_poly = true; @@ -5053,8 +5113,8 @@ void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) const char *types[3][4] = { { "float16_t", "f16vec2", "f16vec3", "f16vec4" }, - { "float", "vec2", "vec3", "vec4" }, - { "double", "dvec2", "dvec3", "dvec4" }, + { "float", "vec2", "vec3", "vec4" }, + { "double", "dvec2", "dvec3", "dvec4" }, }; for (uint32_t k = 0; k < 4; k++) @@ -5063,8 +5123,8 @@ void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) if (i < 2) { - statement("mediump ", type, " ", spv_ops[i], "Relaxed(", - "mediump ", type, " a, mediump ", type, " b)"); + statement("mediump ", type, " ", spv_ops[i], "Relaxed(", "mediump ", type, " a, mediump ", type, + " b)"); begin_scope(); statement("mediump ", type, " res = ", spv_ops[i], "(a, b);"); statement("return res;"); @@ -5073,8 +5133,8 @@ void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) } else { - statement("mediump ", type, " ", spv_ops[i], "Relaxed(", - "mediump ", type, " a, mediump ", type, " b, mediump ", type, " c)"); + statement("mediump ", type, " ", spv_ops[i], "Relaxed(", "mediump ", type, " a, mediump ", type, + " b, mediump ", type, " c)"); begin_scope(); statement("mediump ", type, " res = ", spv_ops[i], "(a, b, c);"); statement("return res;"); @@ -5115,7 +5175,8 @@ void CompilerGLSL::force_temporary_and_recompile(uint32_t id) force_recompile(); } -uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision) +uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, + Options::Precision precision) { // Constants do not have innate precision. auto handle_type = ir.ids[id].get_type(); @@ -5428,14 +5489,13 @@ string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_ty return constant_expression(tmp); } -string CompilerGLSL::to_rerolled_array_expression(const SPIRType &parent_type, - const string &base_expr, const SPIRType &type) +string CompilerGLSL::to_rerolled_array_expression(const SPIRType &parent_type, const string &base_expr, + const SPIRType &type) { - bool remapped_boolean = parent_type.basetype == SPIRType::Struct && - type.basetype == SPIRType::Boolean && + bool remapped_boolean = parent_type.basetype == SPIRType::Struct && type.basetype == SPIRType::Boolean && backend.boolean_in_struct_remapped_type != SPIRType::Boolean; - SPIRType tmp_type { OpNop }; + SPIRType tmp_type{ OpNop }; if (remapped_boolean) { tmp_type = get(type.parent_type); @@ -5478,14 +5538,13 @@ string CompilerGLSL::to_composite_constructor_expression(const SPIRType &parent_ auto &type = expression_type(id); bool reroll_array = false; - bool remapped_boolean = parent_type.basetype == SPIRType::Struct && - type.basetype == SPIRType::Boolean && + bool remapped_boolean = parent_type.basetype == SPIRType::Struct && type.basetype == SPIRType::Boolean && backend.boolean_in_struct_remapped_type != SPIRType::Boolean; if (is_array(type)) { - reroll_array = !backend.array_is_value_type || - (block_like_type && !backend.array_is_value_type_in_buffer_blocks); + reroll_array = + !backend.array_is_value_type || (block_like_type && !backend.array_is_value_type_in_buffer_blocks); if (remapped_boolean) { @@ -5583,8 +5642,8 @@ string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); bool relaxed = has_decoration(id, DecorationRelaxedPrecision); - return convert_row_major_matrix(e.expression, get(e.expression_type), physical_type_id, - is_packed, relaxed); + return convert_row_major_matrix(e.expression, get(e.expression_type), physical_type_id, is_packed, + relaxed); } else if (flattened_structs.count(id)) { @@ -5619,7 +5678,8 @@ string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) int wg_index = get_constant_mapping_to_workgroup_component(c); if (wg_index >= 0) { - auto wg_size = join(builtin_to_glsl(BuiltInWorkgroupSize, StorageClassInput), vector_swizzle(1, wg_index)); + auto wg_size = + join(builtin_to_glsl(BuiltInWorkgroupSize, StorageClassInput), vector_swizzle(1, wg_index)); if (type.basetype != SPIRType::UInt) wg_size = bitcast_expression(type, SPIRType::UInt, wg_size); return wg_size; @@ -5841,7 +5901,7 @@ string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) uint32_t op0 = cop.arguments[0]; uint32_t op1 = cop.arguments[1]; return join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(", - to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); + to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); } case OpSelect: @@ -5910,8 +5970,7 @@ string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) string expr; if (c && cop.arguments.size() == 2 && c->is_used_as_array_length && - !backend.supports_spec_constant_array_size && - is_vector(get(c->constant_type))) + !backend.supports_spec_constant_array_size && is_vector(get(c->constant_type))) { expr = to_expression(c->specialization_constant_id(0, cop.arguments[1])); } @@ -6052,8 +6111,7 @@ string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) } } -string CompilerGLSL::constant_expression(const SPIRConstant &c, - bool inside_block_like_struct_scope, +string CompilerGLSL::constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope, bool inside_struct_scope) { auto &type = get(c.constant_type); @@ -6101,9 +6159,8 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, // Should look at ArrayStride here as well, but it's possible to declare a constant struct // with Offset = 0, using no ArrayStride on the enclosed array type. // A particular CTS test hits this scenario. - bool array_type_decays = inside_block_like_struct_scope && - is_array(type) && - !backend.array_is_value_type_in_buffer_blocks; + bool array_type_decays = + inside_block_like_struct_scope && is_array(type) && !backend.array_is_value_type_in_buffer_blocks; // Allow Metal to use the array template to make arrays a value type bool needs_trailing_tracket = false; @@ -6116,10 +6173,9 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, is_array(type) && !array_type_decays) { const auto *p_type = &type; - SPIRType tmp_type { OpNop }; + SPIRType tmp_type{ OpNop }; - if (inside_struct_scope && - backend.boolean_in_struct_remapped_type != SPIRType::Boolean && + if (inside_struct_scope && backend.boolean_in_struct_remapped_type != SPIRType::Boolean && type.basetype == SPIRType::Boolean) { tmp_type = type; @@ -6208,8 +6264,7 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, { auto res = constant_expression_vector(c, 0); - if (inside_struct_scope && - backend.boolean_in_struct_remapped_type != SPIRType::Boolean && + if (inside_struct_scope && backend.boolean_in_struct_remapped_type != SPIRType::Boolean && type.basetype == SPIRType::Boolean) { SPIRType tmp_type = type; @@ -6234,8 +6289,7 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, } res += ")"; - if (inside_struct_scope && - backend.boolean_in_struct_remapped_type != SPIRType::Boolean && + if (inside_struct_scope && backend.boolean_in_struct_remapped_type != SPIRType::Boolean && type.basetype == SPIRType::Boolean) { SPIRType tmp_type = type; @@ -6262,7 +6316,7 @@ string CompilerGLSL::convert_floate4m3_to_string(const SPIRConstant &c, uint32_t // There is no infinity in e4m3. if (std::isnan(float_value)) { - SPIRType type { OpTypeFloat }; + SPIRType type{ OpTypeFloat }; type.basetype = SPIRType::Half; type.vecsize = 1; type.columns = 1; @@ -6270,7 +6324,7 @@ string CompilerGLSL::convert_floate4m3_to_string(const SPIRConstant &c, uint32_t } else { - SPIRType type { OpTypeFloat }; + SPIRType type{ OpTypeFloat }; type.basetype = SPIRType::FloatE4M3; type.vecsize = 1; type.columns = 1; @@ -6290,7 +6344,7 @@ string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, // of complicated workarounds, just value-cast to the half type always. if (std::isnan(float_value) || std::isinf(float_value)) { - SPIRType type { OpTypeFloat }; + SPIRType type{ OpTypeFloat }; type.basetype = is_bfloat8 ? SPIRType::FloatE5M2 : SPIRType::Half; type.vecsize = 1; type.columns = 1; @@ -6306,7 +6360,7 @@ string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, } else { - SPIRType type { OpTypeFloat }; + SPIRType type{ OpTypeFloat }; type.basetype = is_bfloat8 ? SPIRType::FloatE5M2 : SPIRType::Half; type.vecsize = 1; type.columns = 1; @@ -6328,8 +6382,8 @@ string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col // Use special representation. if (!is_legacy()) { - SPIRType out_type { OpTypeFloat }; - SPIRType in_type { OpTypeInt }; + SPIRType out_type{ OpTypeFloat }; + SPIRType in_type{ OpTypeInt }; out_type.basetype = SPIRType::Float; in_type.basetype = SPIRType::UInt; out_type.vecsize = 1; @@ -6401,8 +6455,8 @@ std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32 // Use special representation. if (!is_legacy()) { - SPIRType out_type { OpTypeFloat }; - SPIRType in_type { OpTypeInt }; + SPIRType out_type{ OpTypeFloat }; + SPIRType in_type{ OpTypeInt }; out_type.basetype = SPIRType::Double; in_type.basetype = SPIRType::UInt64; out_type.vecsize = 1; @@ -6910,9 +6964,8 @@ void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t r { auto &header = get(current_continue_block->loop_dominator); if (find_if(begin(header.declare_temporary), end(header.declare_temporary), - [result_type, result_id](const pair &tmp) { - return tmp.first == result_type && tmp.second == result_id; - }) == end(header.declare_temporary)) + [result_type, result_id](const pair &tmp) + { return tmp.first == result_type && tmp.second == result_id; }) == end(header.declare_temporary)) { header.declare_temporary.emplace_back(result_type, result_id); hoisted_temporaries.insert(result_id); @@ -6931,7 +6984,8 @@ void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t r if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) initializer = join(" = ", to_zero_initialized_expression(result_type)); - statement(flags_to_qualifiers_glsl(type, result_id, flags), variable_decl(type, to_name(result_id)), initializer, ";"); + statement(flags_to_qualifiers_glsl(type, result_id, flags), variable_decl(type, to_name(result_id)), + initializer, ";"); } } @@ -6955,9 +7009,8 @@ string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) { auto &header = get(current_continue_block->loop_dominator); if (find_if(begin(header.declare_temporary), end(header.declare_temporary), - [result_type, result_id](const pair &tmp) { - return tmp.first == result_type && tmp.second == result_id; - }) == end(header.declare_temporary)) + [result_type, result_id](const pair &tmp) + { return tmp.first == result_type && tmp.second == result_id; }) == end(header.declare_temporary)) { header.declare_temporary.emplace_back(result_type, result_id); hoisted_temporaries.insert(result_id); @@ -7069,23 +7122,21 @@ void CompilerGLSL::emit_unary_op_cast(uint32_t result_type, uint32_t result_id, { auto &type = get(result_type); bool forward = should_forward(op0); - emit_op(result_type, result_id, join(type_to_glsl(type), "(", op, to_enclosed_unpacked_expression(op0), ")"), forward); + emit_op(result_type, result_id, join(type_to_glsl(type), "(", op, to_enclosed_unpacked_expression(op0), ")"), + forward); inherit_expression_dependencies(result_id, op0); } void CompilerGLSL::emit_mesh_tasks(SPIRBlock &block) { - statement("EmitMeshTasksEXT(", - to_unpacked_expression(block.mesh.groups[0]), ", ", - to_unpacked_expression(block.mesh.groups[1]), ", ", - to_unpacked_expression(block.mesh.groups[2]), ");"); + statement("EmitMeshTasksEXT(", to_unpacked_expression(block.mesh.groups[0]), ", ", + to_unpacked_expression(block.mesh.groups[1]), ", ", to_unpacked_expression(block.mesh.groups[2]), ");"); } void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) { // Various FP arithmetic opcodes such as add, sub, mul will hit this. - bool force_temporary_precise = backend.support_precise_qualifier && - has_legacy_nocontract(result_type, result_id) && + bool force_temporary_precise = backend.support_precise_qualifier && has_legacy_nocontract(result_type, result_id) && type_is_floating_point(get(result_type)); bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise; @@ -7180,7 +7231,7 @@ SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_o // Create a fake type so we can bitcast to it. // We only deal with regular arithmetic types here like int, uints and so on. - SPIRType expected_type{type0.op}; + SPIRType expected_type{ type0.op }; expected_type.basetype = input_type; expected_type.vecsize = type0.vecsize; expected_type.columns = type0.columns; @@ -7224,8 +7275,7 @@ bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint3 } void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, - const char *op, SPIRType::BaseType input_type, - bool skip_cast_if_equal_type, + const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type, bool implicit_integer_promotion) { string cast_op0, cast_op1; @@ -7296,19 +7346,18 @@ void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, forced_temporaries.insert(result_id); emit_op(result_type, result_id, - join(op, "(", to_atomic_ptr_expression(op0), ", ", - to_unpacked_expression(op1), ")"), false); + join(op, "(", to_atomic_ptr_expression(op0), ", ", to_unpacked_expression(op1), ")"), false); flush_all_atomic_capable_variables(); } -void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, - uint32_t op0, uint32_t op1, uint32_t op2, - const char *op) +void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op) { forced_temporaries.insert(result_id); emit_op(result_type, result_id, - join(op, "(", to_non_uniform_aware_expression(op0), ", ", - to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false); + join(op, "(", to_non_uniform_aware_expression(op0), ", ", to_unpacked_expression(op1), ", ", + to_unpacked_expression(op2), ")"), + false); flush_all_atomic_capable_variables(); } @@ -7538,7 +7587,7 @@ void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result auto op3_expr = to_unpacked_expression(op3); assert(offset_count_type == SPIRType::UInt || offset_count_type == SPIRType::Int); - SPIRType target_type { OpTypeInt }; + SPIRType target_type{ OpTypeInt }; target_type.width = 32; target_type.vecsize = 1; target_type.basetype = offset_count_type; @@ -7667,7 +7716,9 @@ string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtyp return join(type_prefix, type, "LodOffset"); else if (op == "textureProjGrad") return join(type_prefix, type, - is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad"); + is_legacy_es() ? "ProjGradEXT" : + is_legacy_desktop() ? "ProjGradARB" : + "ProjGrad"); else if (op == "textureProjLodOffset") return join(type_prefix, type, "ProjLodOffset"); else if (op == "textureSize") @@ -7869,10 +7920,12 @@ string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID s VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args))); auto &combined = current_function->combined_parameters; - auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) { - return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid && - p.sampler_id == sid; - }); + auto itr = find_if(begin(combined), end(combined), + [=](const SPIRFunction::CombinedImageSamplerParameter &p) + { + return p.global_image == global_image && p.global_sampler == global_sampler && + p.image_id == iid && p.sampler_id == sid; + }); if (itr != end(combined)) return to_expression(itr->id) + array_expr; @@ -7887,9 +7940,8 @@ string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID s { // For global sampler2D, look directly at the global remapping table. auto &mapping = combined_image_samplers; - auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) { - return combined.image_id == image_id && combined.sampler_id == samp_id; - }); + auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) + { return combined.image_id == image_id && combined.sampler_id == samp_id; }); if (itr != end(combined_image_samplers)) return to_expression(itr->combined_id) + array_expr; @@ -8208,7 +8260,8 @@ std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool length--; } - auto test = [&](uint32_t &v, uint32_t flag) { + auto test = [&](uint32_t &v, uint32_t flag) + { if (length && (flags & flag)) { v = *opt++; @@ -8470,7 +8523,8 @@ string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool } bool swizz_func = backend.swizzle_is_function; - auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * { + auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * + { if (comps == in_comps) return ""; @@ -8608,9 +8662,8 @@ string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool { if (imgtype.image.arrayed) { - coord_expr = join("ivec3(", enclose_expression(coord_expr), - ".x, 0, ", - enclose_expression(coord_expr), ".y)"); + coord_expr = join("ivec3(", enclose_expression(coord_expr), ".x, 0, ", + enclose_expression(coord_expr), ".y)"); } else coord_expr = join("ivec2(", coord_expr, ", 0)"); @@ -8878,9 +8931,8 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, auto &op1_type = expression_type(args[1]); auto via_type = op1_type; via_type.basetype = SPIRType::Int; - statement(to_expression(args[1]), " = ", - type_to_glsl(op1_type), "(", type_to_glsl(via_type), - "(", to_expression(args[0]), "));"); + statement(to_expression(args[1]), " = ", type_to_glsl(op1_type), "(", type_to_glsl(via_type), "(", + to_expression(args[0]), "));"); emit_binary_op(result_type, id, args[0], args[1], "-"); } break; @@ -8900,8 +8952,8 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, auto &op0_type = expression_type(args[0]); auto via_type = op0_type; via_type.basetype = SPIRType::Int; - statement(to_expression(id), ".", to_member_name(type, 1), " = ", type_to_glsl(op0_type), - "(", type_to_glsl(via_type), "(", to_expression(args[0]), "));"); + statement(to_expression(id), ".", to_member_name(type, 1), " = ", type_to_glsl(op0_type), "(", + type_to_glsl(via_type), "(", to_expression(args[0]), "));"); statement(to_expression(id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(args[0]), " - ", to_expression(id), ".", to_member_name(type, 1), ";"); } @@ -9012,8 +9064,10 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, inherit_expression_dependencies(epos_id, args[0]); inherit_expression_dependencies(eneg_id, args[0]); - auto expr = join("(", to_enclosed_expression(epos_id), " - ", to_enclosed_expression(eneg_id), ") / " - "(", to_enclosed_expression(epos_id), " + ", to_enclosed_expression(eneg_id), ")"); + auto expr = join("(", to_enclosed_expression(epos_id), " - ", to_enclosed_expression(eneg_id), + ") / " + "(", + to_enclosed_expression(epos_id), " + ", to_enclosed_expression(eneg_id), ")"); emit_op(result_type, id, expr, true); inherit_expression_dependencies(id, epos_id); inherit_expression_dependencies(id, eneg_id); @@ -9084,8 +9138,7 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, SPIRV_CROSS_THROW("Unsupported type for matrix determinant"); bool relaxed = has_decoration(id, DecorationRelaxedPrecision); - require_polyfill(static_cast(PolyfillDeterminant2x2 << (type.vecsize - 2)), - relaxed); + require_polyfill(static_cast(PolyfillDeterminant2x2 << (type.vecsize - 2)), relaxed); emit_unary_func_op(result_type, id, args[0], (options.es && relaxed) ? "spvDeterminantMP" : "spvDeterminant"); } @@ -9118,8 +9171,7 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, SPIRV_CROSS_THROW("Unsupported type for matrix inverse"); bool relaxed = has_decoration(id, DecorationRelaxedPrecision); - require_polyfill(static_cast(PolyfillMatrixInverse2x2 << (type.vecsize - 2)), - relaxed); + require_polyfill(static_cast(PolyfillMatrixInverse2x2 << (type.vecsize - 2)), relaxed); func = (options.es && relaxed) ? "spvInverseMP" : "spvInverse"; } @@ -9353,7 +9405,8 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, if (relaxed) require_polyfill(poly, false); - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], relaxed ? "spvNClampRelaxed" : "spvNClamp"); + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], + relaxed ? "spvNClampRelaxed" : "spvNClamp"); } else { @@ -9430,20 +9483,21 @@ void CompilerGLSL::emit_emulated_ahyper_op(uint32_t result_type, uint32_t id, ui switch (op) { case GLSLstd450Asinh: - expr = join("log(", to_enclosed_expression(op0), " + sqrt(", - to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " + ", one, "))"); + expr = join("log(", to_enclosed_expression(op0), " + sqrt(", to_enclosed_expression(op0), " * ", + to_enclosed_expression(op0), " + ", one, "))"); emit_op(result_type, id, expr, forward); break; case GLSLstd450Acosh: - expr = join("log(", to_enclosed_expression(op0), " + sqrt(", - to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " - ", one, "))"); + expr = join("log(", to_enclosed_expression(op0), " + sqrt(", to_enclosed_expression(op0), " * ", + to_enclosed_expression(op0), " - ", one, "))"); break; case GLSLstd450Atanh: - expr = join("log((", one, " + ", to_enclosed_expression(op0), ") / " - "(", one, " - ", to_enclosed_expression(op0), ")) * 0.5", - backend.float_literal_suffix ? "f" : ""); + expr = join("log((", one, " + ", to_enclosed_expression(op0), + ") / " + "(", + one, " - ", to_enclosed_expression(op0), ")) * 0.5", backend.float_literal_suffix ? "f" : ""); break; default: @@ -9733,7 +9787,7 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i) GLSL_GROUP_OP(FMul) #undef GLSL_GROUP_OP - // clang-format on + // clang-format on case OpGroupNonUniformFMin: case OpGroupNonUniformFMax: @@ -10532,7 +10586,7 @@ const char *CompilerGLSL::index_to_swizzle(uint32_t index) case 3: return "w"; default: - return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec. + return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec. } } @@ -10544,7 +10598,8 @@ void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_ bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; - string idx_expr = index_is_literal ? convert_to_string(index) : to_unpacked_expression(index, register_expression_read); + string idx_expr = + index_is_literal ? convert_to_string(index) : to_unpacked_expression(index, register_expression_read); // For the case where the base of an OpPtrAccessChain already ends in [n], // we need to use the index as an offset to the existing index, otherwise, @@ -10636,7 +10691,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice // If we are translating access to a structured buffer, the first subscript '._m0' must be hidden bool hide_first_subscript = count > 1 && is_user_type_structured(base); - const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) { + const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) + { AccessChainFlags mod_flags = flags; if (!is_literal) mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT; @@ -10731,8 +10787,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (flags & ACCESS_CHAIN_PTR_CHAIN_CAST_TO_SCALAR_BIT) { is_packed = true; - expr = join("*reinterpret_cast(", intptr_expr, ")"); + expr = join("*reinterpret_cast(", intptr_expr, + ")"); } else { @@ -10816,7 +10872,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice case BuiltInCullPrimitiveEXT: case BuiltInPrimitiveShadingRateKHR: if (mesh_shader) - expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr); + expr = + join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr); else append_index(index, is_literal); break; @@ -10826,8 +10883,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice break; } } - else if (backend.force_merged_mesh_block && i == 0 && var && - !is_builtin_variable(*var) && var->storage == StorageClassOutput) + else if (backend.force_merged_mesh_block && i == 0 && var && !is_builtin_variable(*var) && + var->storage == StorageClassOutput) { if (is_per_primitive_variable(*var)) expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr); @@ -10858,7 +10915,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (!pending_array_enclose) expr += "]"; } - else if (index_is_literal || !builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn)))) + else if (index_is_literal || + !builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn)))) { // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal. // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask. @@ -10977,6 +11035,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice // is used to store a column. We can resolve it right here and now if we access a scalar directly, // by flipping indexing order of the matrix. + if (!backend.matrix_column_accessor.empty()) + expr += "." + backend.matrix_column_accessor; expr += "["; if (is_literal) expr += convert_to_string(index); @@ -11016,8 +11076,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong, // and needs to be [1].data[0] instead. end_deferred_index++; - deferred_index = deferred_index.substr(end_deferred_index) + - deferred_index.substr(0, end_deferred_index); + deferred_index = + deferred_index.substr(end_deferred_index) + deferred_index.substr(0, end_deferred_index); } expr.resize(column_index); @@ -11742,9 +11802,8 @@ bool CompilerGLSL::should_forward(uint32_t id) const if (expr && expr->expression_dependencies.size() >= max_expression_dependencies) return false; - if (expr && expr->loaded_from - && has_decoration(expr->loaded_from, DecorationBuiltIn) - && has_decoration(expr->loaded_from, DecorationVolatile)) + if (expr && expr->loaded_from && has_decoration(expr->loaded_from, DecorationBuiltIn) && + has_decoration(expr->loaded_from, DecorationVolatile)) { // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation. return false; @@ -11873,7 +11932,8 @@ void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var) { auto &type = get(var.basetype); auto &flags = get_decoration_bitset(var.self); - statement(flags_to_qualifiers_glsl(type, var.self, flags), variable_decl(type, join("_", var.self, "_copy")), ";"); + statement(flags_to_qualifiers_glsl(type, var.self, flags), variable_decl(type, join("_", var.self, "_copy")), + ";"); flushed_phi_variables.insert(var.self); } } @@ -12605,9 +12665,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t length = instruction.length; #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) -#define GLSL_BOP_CAST(op, type) \ - emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, \ - opcode_is_sign_invariant(opcode), implicit_integer_promotion) +#define GLSL_BOP_CAST(op, type) \ + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode), \ + implicit_integer_promotion) #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) #define GLSL_UOP_CAST(op) emit_unary_op_cast(ops[0], ops[1], ops[2], #op) #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) @@ -12791,9 +12851,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (flattened_buffer_blocks.count(ops[2]) && target_type.basetype == SPIRType::Struct) requires_temporary = !backend.can_declare_struct_inline; - auto &expr = requires_temporary ? - emit_op(ops[0], ops[1], std::move(e), false) : - set(ops[1], std::move(e), ops[0], should_forward(ops[2])); + auto &expr = requires_temporary ? emit_op(ops[0], ops[1], std::move(e), false) : + set(ops[1], std::move(e), ops[0], should_forward(ops[2])); auto *backing_variable = maybe_get_backing_variable(ops[2]); expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]); @@ -13163,7 +13222,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto expr = to_extract_constant_composite_expression(result_type, *c, ops + 3, length); e = &emit_op(result_type, id, expr, true, true); } - else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) + else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && + length == 1) { // Only apply this optimization if result is scalar. @@ -13181,7 +13241,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // from expression causing it to be forced to an actual temporary in GLSL. auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT | - ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta); + ACCESS_CHAIN_FORCE_COMPOSITE_BIT, + &meta); e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2])); inherit_expression_dependencies(id, ops[2]); e->base_expression = ops[2]; @@ -13191,8 +13252,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } else { - auto expr = access_chain_internal(ops[2], &ops[3], length, - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta); + auto expr = access_chain_internal( + ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta); e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2])); inherit_expression_dependencies(id, ops[2]); } @@ -13236,8 +13297,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // that loop variable, since we won't be able to override the expression after the fact. // If the composite is hoisted, we might never be able to properly invalidate any usage // of that composite in a subsequent loop iteration. - if (invalid_expressions.count(composite) || - block_composite_insert_overwrite.count(composite) || + if (invalid_expressions.count(composite) || block_composite_insert_overwrite.count(composite) || hoisted_temporaries.count(id) || hoisted_temporaries.count(composite) || maybe_get(composite) == nullptr) { @@ -13245,7 +13305,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } else if (backend.requires_relaxed_precision_analysis && has_decoration(composite, DecorationRelaxedPrecision) != - has_decoration(id, DecorationRelaxedPrecision) && + has_decoration(id, DecorationRelaxedPrecision) && get(result_type).basetype != SPIRType::Struct) { // Similarly, if precision does not match for input and output, @@ -13863,9 +13923,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto &op0_type = expression_type(op0); auto via_type = op0_type; via_type.basetype = SPIRType::Int; - expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", - type_to_glsl(op0_type), "(", type_to_glsl(via_type), "(", - to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), "))"); + expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", type_to_glsl(op0_type), + "(", type_to_glsl(via_type), "(", to_enclosed_expression(op0), " / ", + to_enclosed_expression(op1), "))"); } emit_op(result_type, result_id, expr, forward); @@ -13919,7 +13979,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { auto &type = get(ops[0]); if (type.vecsize > 1) - GLSL_UFOP(not ); + GLSL_UFOP(not); else GLSL_UOP(!); break; @@ -14337,9 +14397,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) (atomic_image && get(type.image.type).basetype == SPIRType::UInt); const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; const char *increment = unsigned_type ? "0u" : "0"; - emit_op(ops[0], ops[1], - join(op, "(", - to_atomic_ptr_expression(ops[2]), ", ", increment, ")"), false); + emit_op(ops[0], ops[1], join(op, "(", to_atomic_ptr_expression(ops[2]), ", ", increment, ")"), false); flush_all_atomic_capable_variables(); if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64) @@ -14394,8 +14452,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) else increment = "-1"; - emit_op(ops[0], ops[1], - join(op, "(", to_atomic_ptr_expression(ops[2]), ", ", increment, ")"), false); + emit_op(ops[0], ops[1], join(op, "(", to_atomic_ptr_expression(ops[2]), ", ", increment, ")"), false); if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64) require_extension_internal("GL_EXT_shader_atomic_int64"); @@ -14582,9 +14639,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } bool forward = should_forward(ops[3]); - emit_op(ops[0], ops[1], - join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"), - forward); + emit_op(ops[0], ops[1], join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"), forward); inherit_expression_dependencies(ops[1], ops[2]); inherit_expression_dependencies(ops[1], ops[3]); register_control_dependent_expression(ops[1]); @@ -14737,7 +14792,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) "operand mask was used."); uint32_t samples = ops[5]; - imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", to_expression(samples), ")"); + imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", + to_expression(samples), ")"); } else imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ")"); @@ -14758,7 +14814,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) else { // Implement subpass loads via texture barrier style sampling. - imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); + imgexpr = + join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); } } imgexpr = remap_swizzle(get(result_type), 4, imgexpr); @@ -14793,13 +14850,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) "operand mask was used."); uint32_t samples = ops[5]; - statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ", - coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");"); + statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", + to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), + ", ", to_expression(sparse_texel_id), ");"); } else { - statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ", - coord_expr, ", ", to_expression(sparse_texel_id), ");"); + statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", + to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", + to_expression(sparse_texel_id), ");"); } imgexpr = join(type_to_glsl(get(result_type)), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id), ")"); @@ -14814,8 +14873,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) "operand mask was used."); uint32_t samples = ops[5]; - imgexpr = - join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")"); + imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", + to_expression(samples), ")"); } else imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ")"); @@ -14904,7 +14963,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (operands != ImageOperandsSampleMask || length != 5) SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); uint32_t samples = ops[4]; - statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ", + statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", + to_expression(samples), ", ", remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); } else @@ -15231,8 +15291,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { uint32_t extension_set = ops[2]; auto ext = get(extension_set).ext; - if (ext != SPIRExtension::SPV_debug_info && - ext != SPIRExtension::NonSemanticShaderDebugInfo && + if (ext != SPIRExtension::SPV_debug_info && ext != SPIRExtension::NonSemanticShaderDebugInfo && ext != SPIRExtension::NonSemanticGeneric) { SPIRV_CROSS_THROW("Unexpected use of ExtInstWithForwardRefsKHR."); @@ -15270,8 +15329,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { emit_non_semantic_shader_debug_info(ops[0], ops[1], ops[3], &ops[4], length - 4); } - else if (ext == SPIRExtension::SPV_debug_info || - ext == SPIRExtension::NonSemanticGeneric) + else if (ext == SPIRExtension::SPV_debug_info || ext == SPIRExtension::NonSemanticGeneric) { break; // Ignore SPIR-V debug information extended instructions. } @@ -15594,19 +15652,20 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) flush_control_dependent_expressions(current_emitting_block->self); break; case OpTraceNV: - statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", - to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", - to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", - to_expression(ops[9]), ", ", to_expression(ops[10]), ");"); + statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", + to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", + to_expression(ops[5]), ", ", to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", + to_expression(ops[8]), ", ", to_expression(ops[9]), ", ", to_expression(ops[10]), ");"); flush_control_dependent_expressions(current_emitting_block->self); break; case OpTraceRayKHR: if (!has_decoration(ops[10], DecorationLocation)) SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR."); - statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", - to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", - to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", - to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");"); + statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", + to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", + to_expression(ops[5]), ", ", to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", + to_expression(ops[8]), ", ", to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), + ");"); flush_control_dependent_expressions(current_emitting_block->self); break; case OpExecuteCallableNV: @@ -15623,11 +15682,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects. case OpRayQueryInitializeKHR: flush_variable_declaration(ops[0]); - statement("rayQueryInitializeEXT(", - to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", - to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", - to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", - to_expression(ops[6]), ", ", to_expression(ops[7]), ");"); + statement("rayQueryInitializeEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", + to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", + to_expression(ops[5]), ", ", to_expression(ops[6]), ", ", to_expression(ops[7]), ");"); break; case OpRayQueryProceedKHR: flush_variable_declaration(ops[0]); @@ -15648,41 +15705,47 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpRayQueryGetIntersectionTriangleVertexPositionsKHR: flush_variable_declaration(ops[1]); emit_uninitialized_temporary_expression(ops[0], ops[1]); - statement("rayQueryGetIntersectionTriangleVertexPositionsEXT(", to_expression(ops[2]), ", bool(", to_expression(ops[3]), "), ", to_expression(ops[1]), ");"); + statement("rayQueryGetIntersectionTriangleVertexPositionsEXT(", to_expression(ops[2]), ", bool(", + to_expression(ops[3]), "), ", to_expression(ops[1]), ");"); break; -#define GLSL_RAY_QUERY_GET_OP(op) \ - case OpRayQueryGet##op##KHR: \ - flush_variable_declaration(ops[2]); \ +#define GLSL_RAY_QUERY_GET_OP(op) \ + case OpRayQueryGet##op##KHR: \ + flush_variable_declaration(ops[2]); \ emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \ break -#define GLSL_RAY_QUERY_GET_OP2(op) \ - case OpRayQueryGet##op##KHR: \ - flush_variable_declaration(ops[2]); \ - emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \ +#define GLSL_RAY_QUERY_GET_OP2(op) \ + case OpRayQueryGet##op##KHR: \ + flush_variable_declaration(ops[2]); \ + emit_op(ops[0], ops[1], \ + join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), \ + false); \ break - GLSL_RAY_QUERY_GET_OP(RayTMin); - GLSL_RAY_QUERY_GET_OP(RayFlags); - GLSL_RAY_QUERY_GET_OP(WorldRayOrigin); - GLSL_RAY_QUERY_GET_OP(WorldRayDirection); - GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque); - GLSL_RAY_QUERY_GET_OP2(IntersectionType); - GLSL_RAY_QUERY_GET_OP2(IntersectionT); - GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex); - GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId); - GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset); - GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex); - GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex); - GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics); - GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace); - GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection); - GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin); - GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld); - GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject); + GLSL_RAY_QUERY_GET_OP(RayTMin); + GLSL_RAY_QUERY_GET_OP(RayFlags); + GLSL_RAY_QUERY_GET_OP(WorldRayOrigin); + GLSL_RAY_QUERY_GET_OP(WorldRayDirection); + GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque); + GLSL_RAY_QUERY_GET_OP2(IntersectionType); + GLSL_RAY_QUERY_GET_OP2(IntersectionT); + GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex); + GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId); + GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset); + GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex); + GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex); + GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics); + GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace); + GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection); + GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin); + GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld); + GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject); #undef GLSL_RAY_QUERY_GET_OP #undef GLSL_RAY_QUERY_GET_OP2 case OpRayQueryGetClusterIdNV: flush_variable_declaration(ops[2]); - emit_op(ops[0], ops[1], join("rayQueryGetIntersectionClusterIdNV(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); + emit_op(ops[0], ops[1], + join("rayQueryGetIntersectionClusterIdNV(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), + "))"), + false); break; case OpTensorQuerySizeARM: flush_variable_declaration(ops[1]); @@ -15694,7 +15757,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) flush_variable_declaration(ops[1]); emit_uninitialized_temporary_expression(ops[0], ops[1]); - SmallVector args { + SmallVector args{ to_expression(ops[2]), // tensor to_expression(ops[3]), // coordinates to_expression(ops[1]), // out value @@ -15731,7 +15794,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { flush_variable_declaration(ops[0]); - SmallVector args { + SmallVector args{ to_expression(ops[0]), // tensor to_expression(ops[1]), // coordinates to_expression(ops[2]), // out value @@ -15759,8 +15822,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { require_extension_internal("GL_EXT_ray_tracing"); - bool elide_temporary = should_forward(ops[2]) && forced_temporaries.count(ops[1]) == 0 && - !hoisted_temporaries.count(ops[1]); + bool elide_temporary = + should_forward(ops[2]) && forced_temporaries.count(ops[1]) == 0 && !hoisted_temporaries.count(ops[1]); if (elide_temporary) { @@ -15930,14 +15993,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto matrix_layout_id = ops[4]; auto matrix_iterpretation_id = ops[5]; auto matrix_stride_id = length >= 6 ? ops[6] : 0; - statement(join("coopVecOuterProductAccumulateNV(", to_expression(v1), ", ", to_expression(v2), ", ", - to_expression(buf), ", ", to_expression(offset), ", ", - matrix_stride_id ? to_expression(matrix_stride_id) : "0", - ", ", to_pretty_expression_if_int_constant( - matrix_layout_id, std::begin(CoopVecMatrixLayoutNames), std::end(CoopVecMatrixLayoutNames)), - ", ", to_pretty_expression_if_int_constant( - matrix_iterpretation_id, std::begin(CoopVecComponentTypeNames), std::end(CoopVecComponentTypeNames)), - ");")); + statement(join( + "coopVecOuterProductAccumulateNV(", to_expression(v1), ", ", to_expression(v2), ", ", to_expression(buf), + ", ", to_expression(offset), ", ", matrix_stride_id ? to_expression(matrix_stride_id) : "0", ", ", + to_pretty_expression_if_int_constant(matrix_layout_id, std::begin(CoopVecMatrixLayoutNames), + std::end(CoopVecMatrixLayoutNames)), + ", ", + to_pretty_expression_if_int_constant(matrix_iterpretation_id, std::begin(CoopVecComponentTypeNames), + std::end(CoopVecComponentTypeNames)), + ");")); register_write(ops[0]); break; } @@ -15978,14 +16042,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // arguments 3, 6 and in case of MulAddNv also 9 use component type int constants if (i == 3 || i == 6 || (i == 9 && opcode == OpCooperativeVectorMatrixMulAddNV)) { - stmt += to_pretty_expression_if_int_constant( - ops[i], std::begin(CoopVecComponentTypeNames), std::end(CoopVecComponentTypeNames)); + stmt += to_pretty_expression_if_int_constant(ops[i], std::begin(CoopVecComponentTypeNames), + std::end(CoopVecComponentTypeNames)); } else if ((i == 12 && opcode == OpCooperativeVectorMatrixMulAddNV) || (i == 9 && opcode == OpCooperativeVectorMatrixMulNV)) { - stmt += to_pretty_expression_if_int_constant( - ops[i], std::begin(CoopVecMatrixLayoutNames), std::end(CoopVecMatrixLayoutNames)); + stmt += to_pretty_expression_if_int_constant(ops[i], std::begin(CoopVecMatrixLayoutNames), + std::end(CoopVecMatrixLayoutNames)); } else stmt += to_expression(ops[i]); @@ -16004,9 +16068,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t result_type = ops[0]; uint32_t id = ops[1]; set( - id, join(type_to_glsl(get(result_type)), - "(", type_to_glsl(get(ops[2])), "(0).length())"), - result_type, true); + id, + join(type_to_glsl(get(result_type)), "(", type_to_glsl(get(ops[2])), "(0).length())"), + result_type, true); break; } @@ -16025,8 +16089,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (!is_forcing_recompilation()) split_expr = split_coopmat_pointer(expr); - string layout_expr = to_pretty_expression_if_int_constant( - ops[3], std::begin(CoopMatMatrixLayoutNames), std::end(CoopMatMatrixLayoutNames)); + string layout_expr = to_pretty_expression_if_int_constant(ops[3], std::begin(CoopMatMatrixLayoutNames), + std::end(CoopMatMatrixLayoutNames)); statement("coopMatLoad(", to_expression(id), ", ", split_expr.first, ", ", split_expr.second, ", ", to_expression(ops[4]), ", ", layout_expr, ");"); @@ -16048,8 +16112,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (!is_forcing_recompilation()) split_expr = split_coopmat_pointer(expr); - string layout_expr = to_pretty_expression_if_int_constant( - ops[2], std::begin(CoopMatMatrixLayoutNames), std::end(CoopMatMatrixLayoutNames)); + string layout_expr = to_pretty_expression_if_int_constant(ops[2], std::begin(CoopMatMatrixLayoutNames), + std::end(CoopMatMatrixLayoutNames)); statement("coopMatStore(", to_expression(ops[1]), ", ", split_expr.first, ", ", split_expr.second, ", ", to_expression(ops[3]), ", ", layout_expr, ");"); @@ -16069,12 +16133,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) uint32_t C = ops[4]; bool forward = should_forward(A) && should_forward(B) && should_forward(C); emit_op(result_type, id, - join("coopMatMulAdd(", - to_unpacked_expression(A), ", ", - to_unpacked_expression(B), ", ", - to_unpacked_expression(C), ", ", - (length >= 6 ? ops[5] : 0), - ")"), + join("coopMatMulAdd(", to_unpacked_expression(A), ", ", to_unpacked_expression(B), ", ", + to_unpacked_expression(C), ", ", (length >= 6 ? ops[5] : 0), ")"), forward); inherit_expression_dependencies(id, A); @@ -16279,8 +16339,7 @@ string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &ex // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong, // and needs to be [1].data[0] instead. end_deferred_index++; - column_expr = column_expr.substr(end_deferred_index) + - column_expr.substr(0, end_deferred_index); + column_expr = column_expr.substr(end_deferred_index) + column_expr.substr(0, end_deferred_index); } auto transposed_expr = type_to_glsl_constructor(exp_type) + "("; @@ -16345,7 +16404,8 @@ void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type if (is_block) qualifiers = to_interpolation_qualifiers(memberflags); - statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, 0, memberflags), + statement(layout_for_member(type, index), qualifiers, qualifier, + flags_to_qualifiers_glsl(membertype, 0, memberflags), variable_decl(membertype, to_member_name(type, index)), ";"); } @@ -16365,10 +16425,9 @@ string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, uint32_t id, } // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp). - bool type_supports_precision = - type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt || - type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || - type.basetype == SPIRType::Sampler; + bool type_supports_precision = type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || + type.basetype == SPIRType::UInt || type.basetype == SPIRType::Image || + type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Sampler; if (!type_supports_precision) return qual; @@ -16533,10 +16592,8 @@ string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) auto &type = expression_type(arg.id); const char *direction = ""; - if (is_pointer(type) && - (type.storage == StorageClassFunction || - type.storage == StorageClassPrivate || - type.storage == StorageClassOutput)) + if (is_pointer(type) && (type.storage == StorageClassFunction || type.storage == StorageClassPrivate || + type.storage == StorageClassOutput)) { // If we're passing around block types to function, we really mean reference in a pointer sense, // but DXC does not like inout for mesh blocks, so workaround that. out is technically not correct, @@ -16648,7 +16705,7 @@ string CompilerGLSL::pls_decl(const PlsRemap &var) auto op_and_basetype = pls_format_to_basetype(var.format); - SPIRType type { op_and_basetype.first }; + SPIRType type{ op_and_basetype.first }; type.basetype = op_and_basetype.second; auto vecsize = pls_format_to_components(var.format); if (vecsize > 1) @@ -16857,8 +16914,7 @@ string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id, bool /*m } // "Shadow" state in GLSL only exists for samplers and combined image samplers. - if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && - is_depth_image(type, id)) + if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && is_depth_image(type, id)) { res += "Shadow"; @@ -16961,7 +17017,7 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) if (type.ext.tensor.shape != 0) SPIRV_CROSS_THROW("GLSL tensors cannot have a Shape."); return join("tensorARM<", type_to_glsl(get(type.ext.tensor.type)), ", ", - to_expression(type.ext.tensor.rank), ">"); + to_expression(type.ext.tensor.rank), ">"); case SPIRType::Void: return "void"; @@ -17050,8 +17106,7 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) if (scope_expr.empty()) scope_expr = to_expression(coop_type->ext.cooperative.scope_id); - return join("coopmat<", type_to_glsl(get(coop_type->parent_type)), ", ", - scope_expr, ", ", + return join("coopmat<", type_to_glsl(get(coop_type->parent_type)), ", ", scope_expr, ", ", to_expression(coop_type->ext.cooperative.rows_id), ", ", to_expression(coop_type->ext.cooperative.columns_id), ", ", use, ">"); } @@ -17643,10 +17698,12 @@ void CompilerGLSL::flush_phi(BlockID from, BlockID to) // as part of another Phi node in our target block. // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads. // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm. - bool need_saved_temporary = - find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool { - return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from; - }) != end(child.phi_variables); + bool need_saved_temporary = find_if(itr + 1, end(child.phi_variables), + [&](const SPIRBlock::Phi &future_phi) -> bool + { + return future_phi.local_variable == ID(phi.function_variable) && + future_phi.parent == from; + }) != end(child.phi_variables); if (need_saved_temporary) { @@ -18053,8 +18110,8 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries); - bool flushes_phi = flush_phi_required(block.self, block.true_block) || - flush_phi_required(block.self, block.false_block); + bool flushes_phi = + flush_phi_required(block.self, block.true_block) || flush_phi_required(block.self, block.false_block); // This can work! We only did trivial things which could be forwarded in block body! if (!flushes_phi && current_count == statement_count && condition_is_temporary) @@ -18136,8 +18193,8 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); - bool flushes_phi = flush_phi_required(child.self, child.true_block) || - flush_phi_required(child.self, child.false_block); + bool flushes_phi = + flush_phi_required(child.self, child.true_block) || flush_phi_required(child.self, child.false_block); if (!flushes_phi && current_count == statement_count && condition_is_temporary) { @@ -18236,7 +18293,8 @@ void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &tempo if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) initializer = join(" = ", to_zero_initialized_expression(tmp.first)); - statement(flags_to_qualifiers_glsl(type, tmp.second, flags), variable_decl(type, to_name(tmp.second)), initializer, ";"); + statement(flags_to_qualifiers_glsl(type, tmp.second, flags), variable_decl(type, to_name(tmp.second)), + initializer, ";"); hoisted_temporaries.insert(tmp.second); forced_temporaries.insert(tmp.second); @@ -18251,8 +18309,7 @@ void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &tempo { uint32_t mirror_id = mirrored_precision_itr->second; auto &mirror_flags = get_decoration_bitset(mirror_id); - statement(flags_to_qualifiers_glsl(type, mirror_id, mirror_flags), - variable_decl(type, to_name(mirror_id)), + statement(flags_to_qualifiers_glsl(type, mirror_id, mirror_flags), variable_decl(type, to_name(mirror_id)), initializer, ";"); // The temporary might be read from before it's assigned, set up the expression now. set(mirror_id, to_name(mirror_id), tmp.first, true); @@ -18302,9 +18359,8 @@ BlockID CompilerGLSL::emit_block_chain_inner(SPIRBlock &block) auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(var_id); if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end() && find_if(block.declare_temporary.begin(), block.declare_temporary.end(), - [mirrored_precision_itr](const std::pair &p) { - return p.second == mirrored_precision_itr->second; - }) == block.declare_temporary.end()) + [mirrored_precision_itr](const std::pair &p) + { return p.second == mirrored_precision_itr->second; }) == block.declare_temporary.end()) { block.declare_temporary.push_back({ var.basetype, mirrored_precision_itr->second }); } @@ -18612,7 +18668,8 @@ BlockID CompilerGLSL::emit_block_chain_inner(SPIRBlock &block) }; const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector &labels, - const char *suffix) -> string { + const char *suffix) -> string + { string ret; size_t count = labels.size(); for (size_t i = 0; i < count; i++) @@ -18889,11 +18946,10 @@ BlockID CompilerGLSL::emit_block_chain_inner(SPIRBlock &block) while (id) { auto &iter_block = get(id); - if (iter_block.terminator == SPIRBlock::MultiSelect || - iter_block.merge == SPIRBlock::MergeLoop) + if (iter_block.terminator == SPIRBlock::MultiSelect || iter_block.merge == SPIRBlock::MergeLoop) { - ID next_block = iter_block.merge == SPIRBlock::MergeLoop ? - iter_block.merge_block : iter_block.next_block; + ID next_block = + iter_block.merge == SPIRBlock::MergeLoop ? iter_block.merge_block : iter_block.next_block; bool outside_construct = next_block && cfg.find_common_dominator(next_block, block.self) == next_block; if (!outside_construct) { @@ -19157,14 +19213,13 @@ bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t so else array_expr = to_expression(type.array.back()); - SPIRType target_type { OpTypeInt }; + SPIRType target_type{ OpTypeInt }; target_type.basetype = SPIRType::Int; statement("for (int i = 0; i < int(", array_expr, "); i++)"); begin_scope(); - statement(to_expression(target_id), "[i] = ", - bitcast_expression(target_type, type.basetype, join(to_expression(source_id), "[i]")), - ";"); + statement(to_expression(target_id), + "[i] = ", bitcast_expression(target_type, type.basetype, join(to_expression(source_id), "[i]")), ";"); end_scope(); return true; @@ -19189,9 +19244,7 @@ void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t s auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); bool is_builtin = is_builtin_variable(*var) && - (builtin == BuiltInPointSize || - builtin == BuiltInPosition || - builtin == BuiltInSampleMask); + (builtin == BuiltInPointSize || builtin == BuiltInPosition || builtin == BuiltInSampleMask); bool is_tess = is_tessellation_shader(); bool is_patch = has_decoration(var->self, DecorationPatch); bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask; @@ -19222,7 +19275,7 @@ void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t s statement(new_expr, "[i] = gl_in[i].", expr, ";"); else if (is_sample_mask) { - SPIRType target_type { OpTypeInt }; + SPIRType target_type{ OpTypeInt }; target_type.basetype = SPIRType::Int; statement(new_expr, "[i] = ", bitcast_expression(target_type, type.basetype, join(expr, "[i]")), ";"); } @@ -19350,8 +19403,7 @@ void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id) if (!var) return; - if (var->storage != StorageClassUniformConstant && - var->storage != StorageClassStorageBuffer && + if (var->storage != StorageClassUniformConstant && var->storage != StorageClassStorageBuffer && var->storage != StorageClassUniform) return; @@ -19483,45 +19535,50 @@ void CompilerGLSL::fixup_anonymous_struct_names() // Breaks exponential explosion with weird type trees. std::unordered_set visited; - ir.for_each_typed_id([&](uint32_t, SPIRType &type) { - if (type.basetype == SPIRType::Struct && - (has_decoration(type.self, DecorationBlock) || - has_decoration(type.self, DecorationBufferBlock))) - { - fixup_anonymous_struct_names(visited, type); - } - }); + ir.for_each_typed_id( + [&](uint32_t, SPIRType &type) + { + if (type.basetype == SPIRType::Struct && + (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))) + { + fixup_anonymous_struct_names(visited, type); + } + }); } void CompilerGLSL::fixup_type_alias() { // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists. - ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { - if (!type.type_alias) - return; - - if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) - { - // Top-level block types should never alias anything else. - type.type_alias = 0; - } - else if (type_is_block_like(type) && type.self == ID(self)) - { - // A block-like type is any type which contains Offset decoration, but not top-level blocks, - // i.e. blocks which are placed inside buffers. - // Become the master. - ir.for_each_typed_id([&](uint32_t other_id, SPIRType &other_type) { - if (other_id == self) - return; - - if (other_type.type_alias == type.type_alias) - other_type.type_alias = self; - }); - - this->get(type.type_alias).type_alias = self; - type.type_alias = 0; - } - }); + ir.for_each_typed_id( + [&](uint32_t self, SPIRType &type) + { + if (!type.type_alias) + return; + + if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) + { + // Top-level block types should never alias anything else. + type.type_alias = 0; + } + else if (type_is_block_like(type) && type.self == ID(self)) + { + // A block-like type is any type which contains Offset decoration, but not top-level blocks, + // i.e. blocks which are placed inside buffers. + // Become the master. + ir.for_each_typed_id( + [&](uint32_t other_id, SPIRType &other_type) + { + if (other_id == self) + return; + + if (other_type.type_alias == type.type_alias) + other_type.type_alias = self; + }); + + this->get(type.type_alias).type_alias = self; + type.type_alias = 0; + } + }); } void CompilerGLSL::reorder_type_alias() @@ -19577,8 +19634,8 @@ void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal) } } -void CompilerGLSL::emit_non_semantic_shader_debug_info(uint32_t, uint32_t result_id, uint32_t eop, - const uint32_t *args, uint32_t) +void CompilerGLSL::emit_non_semantic_shader_debug_info(uint32_t, uint32_t result_id, uint32_t eop, const uint32_t *args, + uint32_t) { if (!options.emit_line_directives) return; @@ -19699,23 +19756,27 @@ bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const { const SPIRVariable *ret = nullptr; - ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { - if (has_decoration(var.self, DecorationInputAttachmentIndex) && - get_decoration(var.self, DecorationInputAttachmentIndex) == index) - { - ret = &var; - } - }); + ir.for_each_typed_id( + [&](uint32_t, const SPIRVariable &var) + { + if (has_decoration(var.self, DecorationInputAttachmentIndex) && + get_decoration(var.self, DecorationInputAttachmentIndex) == index) + { + ret = &var; + } + }); return ret; } const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const { const SPIRVariable *ret = nullptr; - ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { - if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location) - ret = &var; - }); + ir.for_each_typed_id( + [&](uint32_t, const SPIRVariable &var) + { + if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location) + ret = &var; + }); return ret; } @@ -19734,19 +19795,21 @@ void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs() SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs."); auto &func = get(get_entry_point().self); - func.fixup_hooks_in.push_back([=]() { - if (is_legacy()) - { - statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[", - get_decoration(output_var->self, DecorationLocation), "];"); - } - else - { - uint32_t num_rt_components = this->get(output_var->basetype).vecsize; - statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ", - to_expression(output_var->self), ";"); - } - }); + func.fixup_hooks_in.push_back( + [=]() + { + if (is_legacy()) + { + statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[", + get_decoration(output_var->self, DecorationLocation), "];"); + } + else + { + uint32_t num_rt_components = this->get(output_var->basetype).vecsize; + statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ", + to_expression(output_var->self), ";"); + } + }); } } @@ -19842,8 +19905,7 @@ bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_witho true, // SubgroupBalloFindLSB_MSB false, false, false, false, true, // SubgroupMemBarrier - replaced with workgroup memory barriers - false, false, true, false, - false, false, false, false, false, false, // iadd, fadd + false, false, true, false, false, false, false, false, false, false, // iadd, fadd false, false, false, false, false, false, // imul , fmul }; @@ -19854,14 +19916,16 @@ CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgrou get_KHR_extension_for_feature(Feature feature) { static const Candidate extensions[FeatureCount] = { - KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, - KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote, - KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, - KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, - KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, + KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, + KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, + KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote, KHR_shader_subgroup_vote, + KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, + KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, + KHR_shader_subgroup_ballot, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, + KHR_shader_subgroup_arithmetic, }; return extensions[feature]; @@ -19911,7 +19975,8 @@ CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderS get_candidates_for_feature(Feature ft, const Result &r) { auto c = get_candidates_for_feature(ft); - auto cmp = [&r](Candidate a, Candidate b) { + auto cmp = [&r](Candidate a, Candidate b) + { if (r.weights[a] == r.weights[b]) return a < b; // Prefer candidates with lower enum value return r.weights[a] > r.weights[b]; @@ -20093,9 +20158,8 @@ bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) cons if (!has_decoration(var.self, DecorationLocation)) return false; - return is_stage_output_location_masked( - get_decoration(var.self, DecorationLocation), - get_decoration(var.self, DecorationComponent)); + return is_stage_output_location_masked(get_decoration(var.self, DecorationLocation), + get_decoration(var.self, DecorationComponent)); } } @@ -20154,7 +20218,8 @@ uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uin return get_accumulated_member_location(var, mbr_idx, strip_array); } -uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const +uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, + bool strip_array) const { auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); uint32_t location = get_decoration(var.self, DecorationLocation); @@ -20244,17 +20309,17 @@ std::string CompilerGLSL::format_double(double value) const return convert_to_string(value, current_locale_radix_character); } -std::string CompilerGLSL::to_pretty_expression_if_int_constant( - uint32_t id, - const GlslConstantNameMapping *mapping_start, const GlslConstantNameMapping *mapping_end, - bool register_expression_read) +std::string CompilerGLSL::to_pretty_expression_if_int_constant(uint32_t id, + const GlslConstantNameMapping *mapping_start, + const GlslConstantNameMapping *mapping_end, + bool register_expression_read) { auto *c = maybe_get(id); if (c && !c->specialization) { auto value = c->scalar(); - auto pretty_name = std::find_if(mapping_start, mapping_end, - [value](const GlslConstantNameMapping &mapping) { return mapping.value == value; }); + auto pretty_name = std::find_if(mapping_start, mapping_end, [value](const GlslConstantNameMapping &mapping) + { return mapping.value == value; }); if (pretty_name != mapping_end) return pretty_name->alias; } @@ -20306,7 +20371,8 @@ uint32_t CompilerGLSL::get_fp_fast_math_flags_for_op(uint32_t result_type, uint3 // Legacy NoContraction deals with any kind of transform to the expression. if (id != 0 && has_decoration(id, DecorationNoContraction)) - fp_flags &= ~(FPFastMathModeAllowContractMask | FPFastMathModeAllowTransformMask | FPFastMathModeAllowReassocMask); + fp_flags &= + ~(FPFastMathModeAllowContractMask | FPFastMathModeAllowTransformMask | FPFastMathModeAllowReassocMask); // Handle float_controls2 execution modes. bool found_default = false; @@ -20332,8 +20398,7 @@ uint32_t CompilerGLSL::get_fp_fast_math_flags_for_op(uint32_t result_type, uint3 bool CompilerGLSL::has_legacy_nocontract(uint32_t result_type, uint32_t id) const { - const auto fp_flags = FPFastMathModeAllowContractMask | - FPFastMathModeAllowTransformMask | - FPFastMathModeAllowReassocMask; + const auto fp_flags = + FPFastMathModeAllowContractMask | FPFastMathModeAllowTransformMask | FPFastMathModeAllowReassocMask; return (get_fp_fast_math_flags_for_op(result_type, id) & fp_flags) != fp_flags; } diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index 24e34d7b0..70c93fcd4 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -668,6 +668,9 @@ class CompilerGLSL : public Compiler bool requires_relaxed_precision_analysis = false; bool implicit_c_integer_promotion_rules = false; bool supports_spec_constant_array_size = true; + // When non-empty, matrix column access uses this member name instead of raw array indexing. + // e.g., "columns" -> m.columns[i] instead of m[i]. + std::string matrix_column_accessor; } backend; virtual void emit_struct(SPIRType &type); @@ -708,7 +711,7 @@ class CompilerGLSL : public Compiler void flush_undeclared_variables(SPIRBlock &block); void emit_variable_temporary_copies(const SPIRVariable &var); - bool should_dereference(uint32_t id); + virtual bool should_dereference(uint32_t id); bool should_dereference_caller_param(uint32_t id); bool should_forward(uint32_t id) const; bool should_suppress_usage_tracking(uint32_t id) const; diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp index 5a72f14e4..3aaaa6150 100644 --- a/spirv_opencl.cpp +++ b/spirv_opencl.cpp @@ -112,6 +112,7 @@ string CompilerOpenCL::compile() backend.support_pointer_to_pointer = true; backend.implicit_c_integer_promotion_rules = true; backend.supports_spec_constant_array_size = false; + backend.matrix_column_accessor = "columns"; fixup_anonymous_struct_names(); fixup_type_alias(); @@ -124,17 +125,32 @@ string CompilerOpenCL::compile() set_enabled_interface_variables(get_active_interface_variables()); reorder_type_alias(); + // Pre-scan: discover all matrix types used in the IR so that typedefs + // and helpers can be emitted in the first pass without forcing a recompile. + prepass_discover_matrix_types(); + uint32_t pass_count = 0; do { + auto prev_matrix_types = used_matrix_types; + auto prev_helpers = need_mul_mat_vec.size() + need_mul_vec_mat.size() + need_mul_mat_mat.size() + + need_mul_mat_scalar.size() + need_transpose.size() + need_outer_product.size(); + reset(pass_count); buffer.reset(); emit_header(); + emit_matrix_typedefs(); emit_specialization_constants_and_structs(); + emit_matrix_helpers(); emit_resources(); emit_function(get(ir.default_entry_point), Bitset()); + auto new_helpers = need_mul_mat_vec.size() + need_mul_vec_mat.size() + need_mul_mat_mat.size() + + need_mul_mat_scalar.size() + need_transpose.size() + need_outer_product.size(); + if (used_matrix_types != prev_matrix_types || new_helpers != prev_helpers) + force_recompile(); + pass_count++; } while (is_forcing_recompilation()); @@ -153,6 +169,8 @@ void CompilerOpenCL::emit_header() if (opencl_options.opencl_version >= 200) statement("#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable"); + if (opencl_options.enable_fp16) + statement("#pragma OPENCL EXTENSION cl_khr_fp16 : enable"); if (opencl_options.enable_fp64) statement("#pragma OPENCL EXTENSION cl_khr_fp64 : enable"); if (opencl_options.enable_64bit_atomics && opencl_options.opencl_version >= 200) @@ -490,6 +508,227 @@ void CompilerOpenCL::emit_resources() statement(""); } + // FindLSB polyfill: GLSL findLSB returns the bit position of the lowest set bit, or -1 if 0. + // OpenCL 2.0+ has ctz() but OpenCL 1.2 does not. Use (x & -x) to isolate lowest bit, + // then 31 - clz() to get its position. + if (needs_findlsb_polyfill) + { + statement("static int spvFindLSB(uint x) {"); + statement(" if (x == 0u) return -1;"); + statement(" return 31 - as_int(clz(x & (0u - x)));"); + statement("}"); + statement(""); + } + + // Pack/Unpack Snorm/Unorm polyfills. + if (needs_pack_snorm_4x8) + { + statement("static uint spvPackSnorm4x8(float4 v) {"); + statement(" char4 packed = convert_char4_sat_rte(v * 127.0f);"); + statement(" return as_uint(packed);"); + statement("}"); + statement(""); + } + if (needs_pack_unorm_4x8) + { + statement("static uint spvPackUnorm4x8(float4 v) {"); + statement(" uchar4 packed = convert_uchar4_sat_rte(v * 255.0f);"); + statement(" return as_uint(packed);"); + statement("}"); + statement(""); + } + if (needs_pack_snorm_2x16) + { + statement("static uint spvPackSnorm2x16(float2 v) {"); + statement(" short2 packed = convert_short2_sat_rte(v * 32767.0f);"); + statement(" return as_uint(packed);"); + statement("}"); + statement(""); + } + if (needs_pack_unorm_2x16) + { + statement("static uint spvPackUnorm2x16(float2 v) {"); + statement(" ushort2 packed = convert_ushort2_sat_rte(v * 65535.0f);"); + statement(" return as_uint(packed);"); + statement("}"); + statement(""); + } + if (needs_unpack_snorm_4x8) + { + statement("static float4 spvUnpackSnorm4x8(uint v) {"); + statement(" char4 packed = as_char4(v);"); + statement(" return max(convert_float4(packed) / 127.0f, (float4)(-1.0f));"); + statement("}"); + statement(""); + } + if (needs_unpack_unorm_4x8) + { + statement("static float4 spvUnpackUnorm4x8(uint v) {"); + statement(" uchar4 packed = as_uchar4(v);"); + statement(" return convert_float4(packed) / 255.0f;"); + statement("}"); + statement(""); + } + if (needs_unpack_snorm_2x16) + { + statement("static float2 spvUnpackSnorm2x16(uint v) {"); + statement(" short2 packed = as_short2(v);"); + statement(" return max(convert_float2(packed) / 32767.0f, (float2)(-1.0f));"); + statement("}"); + statement(""); + } + if (needs_unpack_unorm_2x16) + { + statement("static float2 spvUnpackUnorm2x16(uint v) {"); + statement(" ushort2 packed = as_ushort2(v);"); + statement(" return convert_float2(packed) / 65535.0f;"); + statement("}"); + statement(""); + } + + // Determinant polyfills using struct-wrapped matrix types (unique names per size for C). + if (needs_determinant_2) + { + auto mat = opencl_matrix_type_name(SPIRType::Float, 2, 2); + statement("static float spvDeterminant2(", mat, " m) {"); + statement(" return m.columns[0].x * m.columns[1].y - m.columns[0].y * m.columns[1].x;"); + statement("}"); + statement(""); + } + if (needs_determinant_3) + { + auto mat = opencl_matrix_type_name(SPIRType::Float, 3, 3); + statement("static float spvDeterminant3(", mat, " m) {"); + statement(" return dot(m.columns[0], (float3)(" + "m.columns[1].y * m.columns[2].z - m.columns[1].z * m.columns[2].y, " + "m.columns[1].z * m.columns[2].x - m.columns[1].x * m.columns[2].z, " + "m.columns[1].x * m.columns[2].y - m.columns[1].y * m.columns[2].x));"); + statement("}"); + statement(""); + } + if (needs_determinant_4) + { + auto mat = opencl_matrix_type_name(SPIRType::Float, 4, 4); + statement("static float spvDeterminant4(", mat, " m) {"); + statement( + " return dot(m.columns[0], (float4)(" + "m.columns[2].y * m.columns[3].z * m.columns[1].w - m.columns[3].y * m.columns[2].z * m.columns[1].w + " + "m.columns[3].y * m.columns[1].z * m.columns[2].w - m.columns[1].y * m.columns[3].z * m.columns[2].w - " + "m.columns[2].y * m.columns[1].z * m.columns[3].w + m.columns[1].y * m.columns[2].z * m.columns[3].w, " + "m.columns[3].x * m.columns[2].z * m.columns[1].w - m.columns[2].x * m.columns[3].z * m.columns[1].w - " + "m.columns[3].x * m.columns[1].z * m.columns[2].w + m.columns[1].x * m.columns[3].z * m.columns[2].w + " + "m.columns[2].x * m.columns[1].z * m.columns[3].w - m.columns[1].x * m.columns[2].z * m.columns[3].w, " + "m.columns[2].x * m.columns[3].y * m.columns[1].w - m.columns[3].x * m.columns[2].y * m.columns[1].w + " + "m.columns[3].x * m.columns[1].y * m.columns[2].w - m.columns[1].x * m.columns[3].y * m.columns[2].w - " + "m.columns[2].x * m.columns[1].y * m.columns[3].w + m.columns[1].x * m.columns[2].y * m.columns[3].w, " + "m.columns[3].x * m.columns[2].y * m.columns[1].z - m.columns[2].x * m.columns[3].y * m.columns[1].z - " + "m.columns[3].x * m.columns[1].y * m.columns[2].z + m.columns[1].x * m.columns[3].y * m.columns[2].z + " + "m.columns[2].x * m.columns[1].y * m.columns[3].z - m.columns[1].x * m.columns[2].y * m.columns[3].z));"); + statement("}"); + statement(""); + } + + // Matrix inverse polyfills. + if (needs_inverse_2) + { + auto mat = opencl_matrix_type_name(SPIRType::Float, 2, 2); + statement("static ", mat, " spvInverse2(", mat, " m) {"); + statement(" float d = 1.0f / (m.columns[0].x * m.columns[1].y - m.columns[1].x * m.columns[0].y);"); + statement(" return (", mat, + "){ { (float2)(m.columns[1].y * d, -m.columns[0].y * d), (float2)(-m.columns[1].x * d, " + "m.columns[0].x * d) } };"); + statement("}"); + statement(""); + } + if (needs_inverse_3) + { + auto mat = opencl_matrix_type_name(SPIRType::Float, 3, 3); + statement("static ", mat, " spvInverse3(", mat, " m) {"); + statement(" float3 t = (float3)(" + "m.columns[1].y * m.columns[2].z - m.columns[1].z * m.columns[2].y, " + "m.columns[1].z * m.columns[2].x - m.columns[1].x * m.columns[2].z, " + "m.columns[1].x * m.columns[2].y - m.columns[1].y * m.columns[2].x);"); + statement(" float d = 1.0f / dot(m.columns[0], t);"); + statement(" return (", mat, + "){ { t * d, " + "(float3)(m.columns[0].z * m.columns[2].y - m.columns[0].y * m.columns[2].z, " + "m.columns[0].x * m.columns[2].z - m.columns[0].z * m.columns[2].x, " + "m.columns[0].y * m.columns[2].x - m.columns[0].x * m.columns[2].y) * d, " + "(float3)(m.columns[0].y * m.columns[1].z - m.columns[0].z * m.columns[1].y, " + "m.columns[0].z * m.columns[1].x - m.columns[0].x * m.columns[1].z, " + "m.columns[0].x * m.columns[1].y - m.columns[0].y * m.columns[1].x) * d } };"); + statement("}"); + statement(""); + } + if (needs_inverse_4) + { + auto mat = opencl_matrix_type_name(SPIRType::Float, 4, 4); + statement("static ", mat, " spvInverse4(", mat, " m) {"); + statement( + " float4 t = (float4)(" + "m.columns[2].y * m.columns[3].z * m.columns[1].w - m.columns[3].y * m.columns[2].z * m.columns[1].w + " + "m.columns[3].y * m.columns[1].z * m.columns[2].w - m.columns[1].y * m.columns[3].z * m.columns[2].w - " + "m.columns[2].y * m.columns[1].z * m.columns[3].w + m.columns[1].y * m.columns[2].z * m.columns[3].w, " + "m.columns[3].x * m.columns[2].z * m.columns[1].w - m.columns[2].x * m.columns[3].z * m.columns[1].w - " + "m.columns[3].x * m.columns[1].z * m.columns[2].w + m.columns[1].x * m.columns[3].z * m.columns[2].w + " + "m.columns[2].x * m.columns[1].z * m.columns[3].w - m.columns[1].x * m.columns[2].z * m.columns[3].w, " + "m.columns[2].x * m.columns[3].y * m.columns[1].w - m.columns[3].x * m.columns[2].y * m.columns[1].w + " + "m.columns[3].x * m.columns[1].y * m.columns[2].w - m.columns[1].x * m.columns[3].y * m.columns[2].w - " + "m.columns[2].x * m.columns[1].y * m.columns[3].w + m.columns[1].x * m.columns[2].y * m.columns[3].w, " + "m.columns[3].x * m.columns[2].y * m.columns[1].z - m.columns[2].x * m.columns[3].y * m.columns[1].z - " + "m.columns[3].x * m.columns[1].y * m.columns[2].z + m.columns[1].x * m.columns[3].y * m.columns[2].z + " + "m.columns[2].x * m.columns[1].y * m.columns[3].z - m.columns[1].x * m.columns[2].y * m.columns[3].z);"); + statement( + " ", mat, " r = (", mat, + "){ { " + "(float4)(t.x, " + "m.columns[3].y * m.columns[2].z * m.columns[0].w - m.columns[2].y * m.columns[3].z * m.columns[0].w - " + "m.columns[3].y * m.columns[0].z * m.columns[2].w + m.columns[0].y * m.columns[3].z * m.columns[2].w + " + "m.columns[2].y * m.columns[0].z * m.columns[3].w - m.columns[0].y * m.columns[2].z * m.columns[3].w, " + "m.columns[1].y * m.columns[3].z * m.columns[0].w - m.columns[3].y * m.columns[1].z * m.columns[0].w + " + "m.columns[3].y * m.columns[0].z * m.columns[1].w - m.columns[0].y * m.columns[3].z * m.columns[1].w - " + "m.columns[1].y * m.columns[0].z * m.columns[3].w + m.columns[0].y * m.columns[1].z * m.columns[3].w, " + "m.columns[2].y * m.columns[1].z * m.columns[0].w - m.columns[1].y * m.columns[2].z * m.columns[0].w - " + "m.columns[2].y * m.columns[0].z * m.columns[1].w + m.columns[0].y * m.columns[2].z * m.columns[1].w + " + "m.columns[1].y * m.columns[0].z * m.columns[2].w - m.columns[0].y * m.columns[1].z * m.columns[2].w), " + "(float4)(t.y, " + "m.columns[2].x * m.columns[3].z * m.columns[0].w - m.columns[3].x * m.columns[2].z * m.columns[0].w + " + "m.columns[3].x * m.columns[0].z * m.columns[2].w - m.columns[0].x * m.columns[3].z * m.columns[2].w - " + "m.columns[2].x * m.columns[0].z * m.columns[3].w + m.columns[0].x * m.columns[2].z * m.columns[3].w, " + "m.columns[3].x * m.columns[1].z * m.columns[0].w - m.columns[1].x * m.columns[3].z * m.columns[0].w - " + "m.columns[3].x * m.columns[0].z * m.columns[1].w + m.columns[0].x * m.columns[3].z * m.columns[1].w + " + "m.columns[1].x * m.columns[0].z * m.columns[3].w - m.columns[0].x * m.columns[1].z * m.columns[3].w, " + "m.columns[1].x * m.columns[2].z * m.columns[0].w - m.columns[2].x * m.columns[1].z * m.columns[0].w + " + "m.columns[2].x * m.columns[0].z * m.columns[1].w - m.columns[0].x * m.columns[2].z * m.columns[1].w - " + "m.columns[1].x * m.columns[0].z * m.columns[2].w + m.columns[0].x * m.columns[1].z * m.columns[2].w), " + "(float4)(t.z, " + "m.columns[3].x * m.columns[2].y * m.columns[0].w - m.columns[2].x * m.columns[3].y * m.columns[0].w - " + "m.columns[3].x * m.columns[0].y * m.columns[2].w + m.columns[0].x * m.columns[3].y * m.columns[2].w + " + "m.columns[2].x * m.columns[0].y * m.columns[3].w - m.columns[0].x * m.columns[2].y * m.columns[3].w, " + "m.columns[1].x * m.columns[3].y * m.columns[0].w - m.columns[3].x * m.columns[1].y * m.columns[0].w + " + "m.columns[3].x * m.columns[0].y * m.columns[1].w - m.columns[0].x * m.columns[3].y * m.columns[1].w - " + "m.columns[1].x * m.columns[0].y * m.columns[3].w + m.columns[0].x * m.columns[1].y * m.columns[3].w, " + "m.columns[2].x * m.columns[1].y * m.columns[0].w - m.columns[1].x * m.columns[2].y * m.columns[0].w - " + "m.columns[2].x * m.columns[0].y * m.columns[1].w + m.columns[0].x * m.columns[2].y * m.columns[1].w + " + "m.columns[1].x * m.columns[0].y * m.columns[2].w - m.columns[0].x * m.columns[1].y * m.columns[2].w), " + "(float4)(t.w, " + "m.columns[2].x * m.columns[3].y * m.columns[0].z - m.columns[3].x * m.columns[2].y * m.columns[0].z + " + "m.columns[3].x * m.columns[0].y * m.columns[2].z - m.columns[0].x * m.columns[3].y * m.columns[2].z - " + "m.columns[2].x * m.columns[0].y * m.columns[3].z + m.columns[0].x * m.columns[2].y * m.columns[3].z, " + "m.columns[3].x * m.columns[1].y * m.columns[0].z - m.columns[1].x * m.columns[3].y * m.columns[0].z - " + "m.columns[3].x * m.columns[0].y * m.columns[1].z + m.columns[0].x * m.columns[3].y * m.columns[1].z + " + "m.columns[1].x * m.columns[0].y * m.columns[3].z - m.columns[0].x * m.columns[1].y * m.columns[3].z, " + "m.columns[1].x * m.columns[2].y * m.columns[0].z - m.columns[2].x * m.columns[1].y * m.columns[0].z + " + "m.columns[2].x * m.columns[0].y * m.columns[1].z - m.columns[0].x * m.columns[2].y * m.columns[1].z - " + "m.columns[1].x * m.columns[0].y * m.columns[2].z + m.columns[0].x * m.columns[1].y * m.columns[2].z) } " + "};"); + statement(" float d = 1.0f / dot(m.columns[0], t);"); + statement(" r.columns[0] *= d; r.columns[1] *= d; r.columns[2] *= d; r.columns[3] *= d;"); + statement(" return r;"); + statement("}"); + statement(""); + } + // Default sampler for combined image+sampler usage (OpenCL C requires file-scope const sampler_t). if (needs_default_sampler) { @@ -1008,6 +1247,8 @@ string CompilerOpenCL::type_to_glsl(const SPIRType &type, uint32_t id, bool memb type_name = "ulong"; break; case SPIRType::Half: + if (!opencl_options.enable_fp16) + SPIRV_CROSS_THROW("Half requires cl_khr_fp16."); type_name = "half"; break; case SPIRType::Float: @@ -1023,6 +1264,13 @@ string CompilerOpenCL::type_to_glsl(const SPIRType &type, uint32_t id, bool memb return "unknown_type"; } + // Matrix? (columns > 1) + if (type.columns > 1) + { + used_matrix_types.insert(make_matrix_key(type)); + return opencl_matrix_type_name(type); + } + // Vector? if (type.vecsize > 1) type_name += to_string(type.vecsize); @@ -1035,6 +1283,366 @@ string CompilerOpenCL::type_to_glsl(const SPIRType &type, uint32_t id) return type_to_glsl(type, id, false); } +CompilerOpenCL::MatrixTypeKey CompilerOpenCL::make_matrix_key(const SPIRType &type) +{ + return { type.basetype, type.vecsize, type.columns }; +} + +string CompilerOpenCL::opencl_column_type_name(SPIRType::BaseType basetype, uint32_t vecsize) +{ + string name; + switch (basetype) + { + case SPIRType::Float: + name = "float"; + break; + case SPIRType::Double: + name = "double"; + break; + case SPIRType::Half: + name = "half"; + break; + default: + name = "float"; + break; + } + if (vecsize > 1) + name += to_string(vecsize); + return name; +} + +string CompilerOpenCL::opencl_matrix_type_name(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns) +{ + string prefix = "spv"; + if (basetype == SPIRType::Double) + prefix += "D"; + else if (basetype == SPIRType::Half) + prefix += "H"; + prefix += "Mat"; + if (columns == vecsize) + return prefix + to_string(columns); + return prefix + to_string(columns) + "x" + to_string(vecsize); +} + +string CompilerOpenCL::opencl_matrix_type_name(const SPIRType &type) +{ + return opencl_matrix_type_name(type.basetype, type.vecsize, type.columns); +} + +string CompilerOpenCL::opencl_matrix_short_name(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns) +{ + // Returns e.g. "Mat4", "DMat4", "HMat4", "Mat4x3", "DMat4x3" + string prefix; + if (basetype == SPIRType::Double) + prefix = "D"; + else if (basetype == SPIRType::Half) + prefix = "H"; + prefix += "Mat"; + if (columns == vecsize) + return prefix + to_string(columns); + return prefix + to_string(columns) + "x" + to_string(vecsize); +} + +string CompilerOpenCL::opencl_vector_short_name(SPIRType::BaseType basetype, uint32_t vecsize) +{ + // Returns e.g. "Vec4", "DVec4", "HVec4", "Scalar" for vecsize 1 + if (vecsize == 1) + { + if (basetype == SPIRType::Double) + return "DScalar"; + if (basetype == SPIRType::Half) + return "HScalar"; + return "Scalar"; + } + string prefix; + if (basetype == SPIRType::Double) + prefix = "D"; + else if (basetype == SPIRType::Half) + prefix = "H"; + return prefix + "Vec" + to_string(vecsize); +} + +void CompilerOpenCL::prepass_discover_matrix_types() +{ + used_matrix_types.clear(); + need_mul_mat_vec.clear(); + need_mul_vec_mat.clear(); + need_mul_mat_mat.clear(); + need_mul_mat_scalar.clear(); + need_transpose.clear(); + need_outer_product.clear(); + + // Scan all types for matrix members. + ir.for_each_typed_id( + [&](uint32_t, SPIRType &type) + { + if (type.columns > 1 && type.basetype != SPIRType::Struct) + used_matrix_types.insert(make_matrix_key(type)); + for (auto &member_type_id : type.member_types) + { + auto &member_type = get(member_type_id); + if (member_type.columns > 1) + used_matrix_types.insert(make_matrix_key(member_type)); + } + }); + + // Scan all instructions for matrix operations to discover helpers needed. + // We can resolve the matrix type from the SPIR-V type of operands at pre-scan time. + auto get_id_type = [&](uint32_t id) -> const SPIRType & + { + // For value IDs, look up the type from variable, constant, or the instruction result. + auto *var = maybe_get(id); + if (var) + return get_variable_data_type(*var); + auto *c = maybe_get(id); + if (c) + return get(c->constant_type); + // For instruction results, the type is stored in the expression or type_id. + if (ir.ids[id].get_type() == TypeExpression) + return get(get(id).expression_type); + // For types themselves + if (ir.ids[id].get_type() == TypeType) + return get(id); + // Fallback: check if there's a result type mapping + return get(id); + }; + + ir.for_each_typed_id( + [&](uint32_t, SPIRFunction &f) + { + for (auto &block_id : f.blocks) + { + auto &block = get(block_id); + for (auto &instruction : block.ops) + { + auto ops = stream(instruction); + auto opcode = static_cast(instruction.op); + + // Helper lambda to resolve the type of a SPIR-V value ID from the instruction. + // For OpMatrixTimesVector etc., ops[2] and ops[3] are value IDs whose types + // may not be directly available at pre-scan time. Instead, we check the + // instruction result type to infer what's needed. + switch (opcode) + { + case OpMatrixTimesVector: + { + // ops[0] = result type (vector), ops[2] = matrix, ops[3] = vector + // The matrix type is not directly available from ops[2] here. + // We infer from the result: result is vec(vecsize), matrix has same vecsize. + // But we need the column count too. Let's look up the variable type. + // At pre-scan time, not all IDs have resolved types, so we'll rely on + // the recompile mechanism for helpers that can't be pre-discovered. + break; + } + case OpOuterProduct: + { + auto &res_type = get(ops[0]); + if (res_type.columns > 1) + { + used_matrix_types.insert(make_matrix_key(res_type)); + auto col_short = opencl_vector_short_name(res_type.basetype, res_type.vecsize); + auto row_short = opencl_vector_short_name(res_type.basetype, res_type.columns); + (void)col_short; + (void)row_short; + need_outer_product.insert(make_matrix_key(res_type)); + } + break; + } + case OpTranspose: + { + auto &res_type = get(ops[0]); + if (res_type.columns > 1) + { + used_matrix_types.insert(make_matrix_key(res_type)); + // The input type has swapped dimensions. + MatrixTypeKey input_key = { res_type.basetype, res_type.columns, res_type.vecsize }; + used_matrix_types.insert(input_key); + need_transpose.insert(input_key); + } + break; + } + case OpMatrixTimesScalar: + case OpMatrixTimesMatrix: + case OpVectorTimesMatrix: + // These will be discovered during emit_instruction and trigger recompile if needed. + break; + default: + break; + } + } + } + }); +} + +void CompilerOpenCL::emit_matrix_typedefs() +{ + if (used_matrix_types.empty()) + return; + + for (auto &key : used_matrix_types) + { + auto col_type = opencl_column_type_name(key.basetype, key.vecsize); + auto mat_name = opencl_matrix_type_name(key.basetype, key.vecsize, key.columns); + statement("typedef struct { ", col_type, " columns[", key.columns, "]; } ", mat_name, ";"); + } + statement(""); +} + +void CompilerOpenCL::emit_matrix_helpers() +{ + for (auto &key : need_mul_mat_vec) + emit_mul_mat_vec_helper(key.basetype, key.vecsize, key.columns); + for (auto &key : need_mul_vec_mat) + emit_mul_vec_mat_helper(key.basetype, key.vecsize, key.columns); + for (auto &key : need_mul_mat_mat) + emit_mul_mat_mat_helper(key.first, key.second); + for (auto &key : need_mul_mat_scalar) + emit_mul_mat_scalar_helper(key.basetype, key.vecsize, key.columns); + for (auto &key : need_transpose) + emit_transpose_helper(key.basetype, key.vecsize, key.columns); + for (auto &key : need_outer_product) + emit_outer_product_helper(key.basetype, key.vecsize, key.columns); +} + +void CompilerOpenCL::emit_mul_mat_vec_helper(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns) +{ + auto mat_type = opencl_matrix_type_name(basetype, vecsize, columns); + auto vec_result = opencl_column_type_name(basetype, vecsize); + auto vec_arg = opencl_column_type_name(basetype, columns); + auto mat_short = opencl_matrix_short_name(basetype, vecsize, columns); + auto vec_short = opencl_vector_short_name(basetype, columns); + string func_name = "spvMul" + mat_short + vec_short; + + statement("static ", vec_result, " ", func_name, "(", mat_type, " m, ", vec_arg, " v)"); + begin_scope(); + string expr = "return "; + const char *swizzles[] = { "x", "y", "z", "w", "s4", "s5", "s6", "s7", + "s8", "s9", "sa", "sb", "sc", "sd", "se", "sf" }; + for (uint32_t i = 0; i < columns; i++) + { + if (i > 0) + expr += " + "; + expr += "m.columns[" + to_string(i) + "]"; + if (columns > 1) + expr += string(" * v.") + swizzles[i]; + } + expr += ";"; + statement(expr); + end_scope(); + statement(""); +} + +void CompilerOpenCL::emit_mul_vec_mat_helper(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns) +{ + auto mat_type = opencl_matrix_type_name(basetype, vecsize, columns); + auto in_vec = opencl_column_type_name(basetype, vecsize); + auto out_vec = opencl_column_type_name(basetype, columns); + auto vec_short = opencl_vector_short_name(basetype, vecsize); + auto mat_short = opencl_matrix_short_name(basetype, vecsize, columns); + string func_name = "spvMul" + vec_short + mat_short; + + statement("static ", out_vec, " ", func_name, "(", in_vec, " v, ", mat_type, " m)"); + begin_scope(); + string expr = "return (" + out_vec + ")("; + for (uint32_t i = 0; i < columns; i++) + { + if (i > 0) + expr += ", "; + expr += "dot(v, m.columns[" + to_string(i) + "])"; + } + expr += ");"; + statement(expr); + end_scope(); + statement(""); +} + +void CompilerOpenCL::emit_mul_mat_mat_helper(const MatrixTypeKey &a, const MatrixTypeKey &b) +{ + auto mat_a_type = opencl_matrix_type_name(a.basetype, a.vecsize, a.columns); + auto mat_b_type = opencl_matrix_type_name(b.basetype, b.vecsize, b.columns); + auto result_type = opencl_matrix_type_name(a.basetype, a.vecsize, b.columns); + auto mat_a_short = opencl_matrix_short_name(a.basetype, a.vecsize, a.columns); + auto mat_b_short = opencl_matrix_short_name(b.basetype, b.vecsize, b.columns); + string func_name = "spvMul" + mat_a_short + mat_b_short; + + auto mv_vec_short = opencl_vector_short_name(a.basetype, a.columns); + string mul_mv_func = "spvMul" + mat_a_short + mv_vec_short; + + statement("static ", result_type, " ", func_name, "(", mat_a_type, " a, ", mat_b_type, " b)"); + begin_scope(); + statement(result_type, " r;"); + for (uint32_t i = 0; i < b.columns; i++) + statement("r.columns[", i, "] = ", mul_mv_func, "(a, b.columns[", i, "]);"); + statement("return r;"); + end_scope(); + statement(""); +} + +void CompilerOpenCL::emit_mul_mat_scalar_helper(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns) +{ + auto mat_type = opencl_matrix_type_name(basetype, vecsize, columns); + auto scalar_type = opencl_column_type_name(basetype, 1); + auto mat_short = opencl_matrix_short_name(basetype, vecsize, columns); + string func_name = "spvMul" + mat_short + "Scalar"; + + statement("static ", mat_type, " ", func_name, "(", mat_type, " m, ", scalar_type, " s)"); + begin_scope(); + statement(mat_type, " r;"); + for (uint32_t i = 0; i < columns; i++) + statement("r.columns[", i, "] = m.columns[", i, "] * s;"); + statement("return r;"); + end_scope(); + statement(""); +} + +void CompilerOpenCL::emit_transpose_helper(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns) +{ + auto in_type = opencl_matrix_type_name(basetype, vecsize, columns); + auto out_type = opencl_matrix_type_name(basetype, columns, vecsize); + auto in_short = opencl_matrix_short_name(basetype, vecsize, columns); + string func_name = "spvTranspose" + in_short; + const char *swizzles[] = { "x", "y", "z", "w" }; + + statement("static ", out_type, " ", func_name, "(", in_type, " m)"); + begin_scope(); + statement(out_type, " r;"); + for (uint32_t i = 0; i < vecsize; i++) + { + string expr = "r.columns[" + to_string(i) + "] = (" + opencl_column_type_name(basetype, columns) + ")("; + for (uint32_t j = 0; j < columns; j++) + { + if (j > 0) + expr += ", "; + expr += "m.columns[" + to_string(j) + "]." + swizzles[i]; + } + expr += ");"; + statement(expr); + } + statement("return r;"); + end_scope(); + statement(""); +} + +void CompilerOpenCL::emit_outer_product_helper(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns) +{ + auto mat_type = opencl_matrix_type_name(basetype, vecsize, columns); + auto col_type = opencl_column_type_name(basetype, vecsize); + auto row_type = opencl_column_type_name(basetype, columns); + auto col_short = opencl_vector_short_name(basetype, vecsize); + auto row_short = opencl_vector_short_name(basetype, columns); + string func_name = "spvOuterProduct" + col_short + row_short; + const char *swizzles[] = { "x", "y", "z", "w" }; + + statement("static ", mat_type, " ", func_name, "(", col_type, " c, ", row_type, " r)"); + begin_scope(); + statement(mat_type, " m;"); + for (uint32_t i = 0; i < columns; i++) + statement("m.columns[", i, "] = c * r.", swizzles[i], ";"); + statement("return m;"); + end_scope(); + statement(""); +} + string CompilerOpenCL::image_type_glsl(const SPIRType &type, uint32_t id, bool member) { (void)member; @@ -1122,6 +1730,46 @@ uint32_t CompilerOpenCL::get_physical_type_id_stride(TypeID type_id) const return vecsize * type.columns * (type.width / 8u); } +bool CompilerOpenCL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) +{ + // OpenCL backend uses struct-wrapped matrices with transpose helpers, + // so we can handle non-square row-major matrices (unlike the base GLSL class). + if (!has_member_decoration(type.self, index, DecorationRowMajor)) + return false; + + const auto mbr_type = get(type.member_types[index]); + if (mbr_type.columns <= 1) + return false; + + return true; +} + +string CompilerOpenCL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t physical_type_id, + bool is_packed, bool relaxed) +{ + strip_enclosed_expression(exp_str); + if (!is_matrix(exp_type)) + { + // Column access from a row-major matrix — delegate to base class unrolling. + return CompilerGLSL::convert_row_major_matrix(std::move(exp_str), exp_type, physical_type_id, is_packed, + relaxed); + } + + // Full matrix transpose: use our spvTranspose helper. + // The expression string is in the physical (transposed) layout. + // exp_type is the SPIR-V logical type. The physical type has swapped dimensions. + // We transpose FROM physical TO logical: spvTranspose_PhysType_(phys_expr) -> logical_type + uint32_t phys_vecsize = exp_type.columns; + uint32_t phys_columns = exp_type.vecsize; + auto phys_short = opencl_matrix_short_name(exp_type.basetype, phys_vecsize, phys_columns); + MatrixTypeKey phys_key = { exp_type.basetype, phys_vecsize, phys_columns }; + need_transpose.insert(phys_key); + used_matrix_types.insert(phys_key); + used_matrix_types.insert(make_matrix_key(exp_type)); + + return join("spvTranspose", phys_short, "(", exp_str, ")"); +} + std::string CompilerOpenCL::type_to_glsl_constructor(const SPIRType &type) { string ret = CompilerGLSL::type_to_glsl_constructor(type); @@ -1136,20 +1784,40 @@ std::string CompilerOpenCL::constant_expression(const SPIRConstant &c, bool insi bool inside_struct_scope) { auto &type = get(c.constant_type); - if (c.replicated && type.op != OpTypeArray) + + // Matrix constant: emit as struct compound literal. + if (type.columns > 1) { - auto sub_expr = to_expression(c.subconstants[0]); - if (type.op == OpTypeMatrix) + auto mat_name = opencl_matrix_type_name(type); + string expr = "(" + mat_name + "){ { "; + if (c.replicated) { - // OpenCL C has no native matrix type; matrices are represented as their column vector type. - // For a replicated matrix constant, just use the column value directly. - return sub_expr; + auto sub_expr = to_expression(c.subconstants[0]); + for (uint32_t i = 0; i < type.columns; i++) + { + if (i > 0) + expr += ", "; + expr += sub_expr; + } } else { - // Vector replicate: (float4)(scalar) - return join(type_to_glsl_constructor(type), "(", sub_expr, ")"); + for (uint32_t i = 0; i < type.columns; i++) + { + if (i > 0) + expr += ", "; + expr += constant_expression_vector(c, i); + } } + expr += " } }"; + return expr; + } + + if (c.replicated && type.op != OpTypeArray) + { + auto sub_expr = to_expression(c.subconstants[0]); + // Vector replicate: (float4)(scalar) + return join(type_to_glsl_constructor(type), "(", sub_expr, ")"); } return CompilerGLSL::constant_expression(c, inside_block_like_struct_scope, inside_struct_scope); } @@ -1251,105 +1919,531 @@ void CompilerOpenCL::emit_glsl_op(uint32_t result_type, uint32_t result_id, uint expr = join(bitcast_glsl_op(out_type, abs_ret_type), "(", expr, ")"); } - emit_op(result_type, result_id, expr, should_forward(args[0])); - inherit_expression_dependencies(result_id, args[0]); + emit_op(result_type, result_id, expr, should_forward(args[0])); + inherit_expression_dependencies(result_id, args[0]); + break; + } + + case GLSLstd450SSign: + { + // OpenCL has no integer sign(). Use clamp(x, -1, 1). + auto &expr_type = expression_type(args[0]); + auto &out_type = get(result_type); + + auto expected_basetype = to_signed_basetype(expr_type.width); + string input_expr; + if (expr_type.basetype != expected_basetype) + input_expr = bitcast_expression(expected_basetype, args[0]); + else + input_expr = to_expression(args[0]); + + string expr = join("clamp(", input_expr, ", -1, 1)"); + + // Cast to result type if needed (e.g. result is unsigned). + if (out_type.basetype != expected_basetype) + { + SPIRType signed_type = out_type; + signed_type.basetype = expected_basetype; + expr = join(bitcast_glsl_op(out_type, signed_type), "(", expr, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(args[0])); + inherit_expression_dependencies(result_id, args[0]); + break; + } + + case GLSLstd450FindSMsb: + { + // GLSL findMSB for signed: position of highest bit that differs from sign bit. + // OpenCL: (W-1) - clz(x ^ (x >> (W-1))) + // x >> (W-1) is arithmetic shift: 0 for positive, -1 for negative. + // x ^ -1 = ~x, x ^ 0 = x. So this gives clz(x) for positive, clz(~x) for negative. + auto &expr_type = expression_type(args[0]); + auto &out_type = get(result_type); + uint32_t width = expr_type.width; + + // Input must be signed for arithmetic right shift. + auto signed_basetype = to_signed_basetype(width); + SPIRType signed_type = expr_type; + signed_type.basetype = signed_basetype; + + string input_expr; + if (expr_type.basetype != signed_basetype) + input_expr = bitcast_expression(signed_basetype, args[0]); + else + input_expr = to_enclosed_expression(args[0]); + + string xor_expr = join(input_expr, " ^ (", input_expr, " >> ", width - 1, ")"); + string expr = join(width - 1, " - clz(", xor_expr, ")"); + + // clz on signed type returns signed, so result is signed. Cast if output is unsigned. + if (out_type.basetype != signed_basetype) + expr = join(bitcast_glsl_op(out_type, signed_type), "(", expr, ")"); + + emit_op(result_type, result_id, expr, should_forward(args[0])); + inherit_expression_dependencies(result_id, args[0]); + break; + } + + case GLSLstd450FindUMsb: + { + // GLSL findMSB for unsigned: position of highest set bit, -1 for 0. + // OpenCL: (W-1) - clz(x). clz(0) = W, so result = -1 for 0. + auto &expr_type = expression_type(args[0]); + auto &out_type = get(result_type); + uint32_t width = expr_type.width; + + auto unsigned_basetype = to_unsigned_basetype(width); + string input_expr; + if (expr_type.basetype != unsigned_basetype) + input_expr = bitcast_expression(unsigned_basetype, args[0]); + else + input_expr = to_expression(args[0]); + + // Cast to signed for the subtraction so result can be -1. + auto signed_basetype = to_signed_basetype(width); + SPIRType signed_type = out_type; + signed_type.basetype = signed_basetype; + string clz_expr = join("as_", type_to_glsl(signed_type), "(clz(", input_expr, "))"); + + string expr = join(width - 1, " - ", clz_expr); + + // findMSB returns int (signed). Cast if output type differs. + if (out_type.basetype != signed_basetype) + { + expr = join(bitcast_glsl_op(out_type, signed_type), "(", expr, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(args[0])); + inherit_expression_dependencies(result_id, args[0]); + break; + } + + case GLSLstd450InverseSqrt: + emit_unary_func_op(result_type, result_id, args[0], "rsqrt"); + break; + + case GLSLstd450RoundEven: + emit_unary_func_op(result_type, result_id, args[0], "rint"); + break; + + case GLSLstd450Fract: + { + // OpenCL fract() requires a pointer argument. Use (x - floor(x)) inline. + auto expr = join("(", to_expression(args[0]), " - floor(", to_expression(args[0]), "))"); + emit_op(result_type, result_id, expr, should_forward(args[0])); + inherit_expression_dependencies(result_id, args[0]); + break; + } + + case GLSLstd450Atan2: + emit_binary_func_op(result_type, result_id, args[0], args[1], "atan2"); + break; + + case GLSLstd450Radians: + emit_unary_func_op(result_type, result_id, args[0], "radians"); + break; + + case GLSLstd450Degrees: + emit_unary_func_op(result_type, result_id, args[0], "degrees"); + break; + + case GLSLstd450FindILsb: + { + if (!needs_findlsb_polyfill) + { + needs_findlsb_polyfill = true; + force_recompile(); + } + auto &input_type = expression_type(args[0]); + auto &out_type = get(result_type); + // spvFindLSB takes uint. Cast input to uint if signed, and handle vector by component. + if (input_type.vecsize > 1) + { + // Vector: apply per-component using .s0, .s1, etc. + string expr = "(" + type_to_glsl(out_type) + ")("; + const char *swizzles[] = { "x", "y", "z", "w" }; + for (uint32_t i = 0; i < input_type.vecsize; i++) + { + if (i > 0) + expr += ", "; + if (input_type.basetype == SPIRType::Int) + expr += join("spvFindLSB(as_uint(", to_expression(args[0]), ".", swizzles[i], "))"); + else + expr += join("spvFindLSB(", to_expression(args[0]), ".", swizzles[i], ")"); + } + expr += ")"; + emit_op(result_type, result_id, expr, should_forward(args[0])); + inherit_expression_dependencies(result_id, args[0]); + } + else + { + string input_expr; + if (input_type.basetype == SPIRType::Int) + input_expr = join("spvFindLSB(as_uint(", to_expression(args[0]), "))"); + else + input_expr = join("spvFindLSB(", to_expression(args[0]), ")"); + emit_op(result_type, result_id, input_expr, should_forward(args[0])); + inherit_expression_dependencies(result_id, args[0]); + } + break; + } + + case GLSLstd450FaceForward: + { + // OpenCL C has no faceforward(). Implement inline. + // faceforward(N, I, Nref) = dot(Nref, I) < 0 ? N : -N + auto &type = get(result_type); + if (type.vecsize == 1) + { + auto expr = join("(", to_expression(args[2]), " * ", to_expression(args[1]), " < 0.0f ? ", + to_expression(args[0]), " : -", to_expression(args[0]), ")"); + emit_op(result_type, result_id, expr, + should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2])); + } + else + { + auto expr = join("(dot(", to_expression(args[2]), ", ", to_expression(args[1]), ") < 0.0f ? ", + to_expression(args[0]), " : -", to_expression(args[0]), ")"); + emit_op(result_type, result_id, expr, + should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2])); + } + for (uint32_t i = 0; i < 3; i++) + inherit_expression_dependencies(result_id, args[i]); + break; + } + + case GLSLstd450Reflect: + { + // OpenCL C has no reflect(). Implement inline. + // reflect(I, N) = I - 2 * dot(N, I) * N + auto &type = get(result_type); + if (type.vecsize == 1) + { + auto expr = join(to_enclosed_expression(args[0]), " - 2.0f * ", to_enclosed_expression(args[1]), " * ", + to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1])); + emit_op(result_type, result_id, expr, should_forward(args[0]) && should_forward(args[1])); + } + else + { + auto expr = join(to_expression(args[0]), " - 2.0f * dot(", to_expression(args[1]), ", ", + to_expression(args[0]), ") * ", to_expression(args[1])); + emit_op(result_type, result_id, expr, should_forward(args[0]) && should_forward(args[1])); + } + inherit_expression_dependencies(result_id, args[0]); + inherit_expression_dependencies(result_id, args[1]); + break; + } + + case GLSLstd450Refract: + { + // OpenCL C has no refract(). Implement inline. + // refract(I, N, eta): k = 1 - eta^2*(1 - dot(N,I)^2); k < 0 ? 0 : eta*I - (eta*dot(N,I)+sqrt(k))*N + auto &type = get(result_type); + forced_temporaries.insert(result_id); + auto type_name = type_to_glsl(type); + emit_op(result_type, result_id, join("(", type_name, ")(0.0f)"), false); + auto I = to_expression(args[0]); + auto N = to_expression(args[1]); + auto eta = to_expression(args[2]); + auto res = to_expression(result_id); + statement("{"); + if (type.vecsize == 1) + { + statement(" float spv_NdotI = ", N, " * ", I, ";"); + } + else + { + statement(" float spv_NdotI = dot(", N, ", ", I, ");"); + } + statement(" float spv_k = 1.0f - ", eta, " * ", eta, " * (1.0f - spv_NdotI * spv_NdotI);"); + statement(" if (spv_k >= 0.0f)"); + statement(" ", res, " = ", eta, " * ", I, " - (", eta, " * spv_NdotI + sqrt(spv_k)) * ", N, ";"); + statement("}"); + break; + } + + case GLSLstd450Length: + { + auto &type = expression_type(args[0]); + if (type.vecsize == 1) + emit_unary_func_op(result_type, result_id, args[0], "fabs"); + else + emit_unary_func_op(result_type, result_id, args[0], "length"); + break; + } + + case GLSLstd450Distance: + { + auto &type = expression_type(args[0]); + if (type.vecsize == 1) + { + auto expr = join("fabs(", to_expression(args[0]), " - ", to_expression(args[1]), ")"); + emit_op(result_type, result_id, expr, should_forward(args[0]) && should_forward(args[1])); + inherit_expression_dependencies(result_id, args[0]); + inherit_expression_dependencies(result_id, args[1]); + } + else + emit_binary_func_op(result_type, result_id, args[0], args[1], "distance"); + break; + } + + case GLSLstd450Normalize: + { + auto &type = expression_type(args[0]); + if (type.vecsize == 1) + emit_unary_func_op(result_type, result_id, args[0], "sign"); + else + emit_unary_func_op(result_type, result_id, args[0], "normalize"); + break; + } + + case GLSLstd450PackSnorm4x8: + if (!needs_pack_snorm_4x8) + { + needs_pack_snorm_4x8 = true; + force_recompile(); + } + emit_unary_func_op(result_type, result_id, args[0], "spvPackSnorm4x8"); + break; + case GLSLstd450PackUnorm4x8: + if (!needs_pack_unorm_4x8) + { + needs_pack_unorm_4x8 = true; + force_recompile(); + } + emit_unary_func_op(result_type, result_id, args[0], "spvPackUnorm4x8"); + break; + case GLSLstd450PackSnorm2x16: + if (!needs_pack_snorm_2x16) + { + needs_pack_snorm_2x16 = true; + force_recompile(); + } + emit_unary_func_op(result_type, result_id, args[0], "spvPackSnorm2x16"); + break; + case GLSLstd450PackUnorm2x16: + if (!needs_pack_unorm_2x16) + { + needs_pack_unorm_2x16 = true; + force_recompile(); + } + emit_unary_func_op(result_type, result_id, args[0], "spvPackUnorm2x16"); + break; + case GLSLstd450UnpackSnorm4x8: + if (!needs_unpack_snorm_4x8) + { + needs_unpack_snorm_4x8 = true; + force_recompile(); + } + emit_unary_func_op(result_type, result_id, args[0], "spvUnpackSnorm4x8"); + break; + case GLSLstd450UnpackUnorm4x8: + if (!needs_unpack_unorm_4x8) + { + needs_unpack_unorm_4x8 = true; + force_recompile(); + } + emit_unary_func_op(result_type, result_id, args[0], "spvUnpackUnorm4x8"); + break; + case GLSLstd450UnpackSnorm2x16: + if (!needs_unpack_snorm_2x16) + { + needs_unpack_snorm_2x16 = true; + force_recompile(); + } + emit_unary_func_op(result_type, result_id, args[0], "spvUnpackSnorm2x16"); + break; + case GLSLstd450UnpackUnorm2x16: + if (!needs_unpack_unorm_2x16) + { + needs_unpack_unorm_2x16 = true; + force_recompile(); + } + emit_unary_func_op(result_type, result_id, args[0], "spvUnpackUnorm2x16"); + break; + + case GLSLstd450Determinant: + { + auto *e = maybe_get(args[0]); + bool old_transpose = e && e->need_transpose; + if (old_transpose) + e->need_transpose = false; + + auto &type = expression_type(args[0]); + assert(type.vecsize == type.columns); + const char *func = "spvDeterminant2"; + if (type.vecsize == 2) + { + if (!needs_determinant_2) + { + needs_determinant_2 = true; + force_recompile(); + } + } + else if (type.vecsize == 3) + { + func = "spvDeterminant3"; + if (!needs_determinant_3) + { + needs_determinant_3 = true; + force_recompile(); + } + } + else if (type.vecsize == 4) + { + func = "spvDeterminant4"; + if (!needs_determinant_4) + { + needs_determinant_4 = true; + force_recompile(); + } + } + + emit_unary_func_op(result_type, result_id, args[0], func); + + if (old_transpose) + e->need_transpose = true; break; } - case GLSLstd450SSign: + case GLSLstd450MatrixInverse: { - // OpenCL has no integer sign(). Use clamp(x, -1, 1). - auto &expr_type = expression_type(args[0]); - auto &out_type = get(result_type); - - auto expected_basetype = to_signed_basetype(expr_type.width); - string input_expr; - if (expr_type.basetype != expected_basetype) - input_expr = bitcast_expression(expected_basetype, args[0]); - else - input_expr = to_expression(args[0]); - - string expr = join("clamp(", input_expr, ", -1, 1)"); + auto *a = maybe_get(args[0]); + bool old_transpose = a && a->need_transpose; + if (old_transpose) + a->need_transpose = false; - // Cast to result type if needed (e.g. result is unsigned). - if (out_type.basetype != expected_basetype) + auto &type = get(result_type); + assert(type.vecsize == type.columns); + const char *inv_func = "spvInverse2"; + if (type.vecsize == 2) { - SPIRType signed_type = out_type; - signed_type.basetype = expected_basetype; - expr = join(bitcast_glsl_op(out_type, signed_type), "(", expr, ")"); + if (!needs_inverse_2) + { + needs_inverse_2 = true; + force_recompile(); + } + } + else if (type.vecsize == 3) + { + inv_func = "spvInverse3"; + if (!needs_inverse_3) + { + needs_inverse_3 = true; + force_recompile(); + } + } + else if (type.vecsize == 4) + { + inv_func = "spvInverse4"; + if (!needs_inverse_4) + { + needs_inverse_4 = true; + force_recompile(); + } } - emit_op(result_type, result_id, expr, should_forward(args[0])); + bool forward = should_forward(args[0]); + auto &expr_out = + emit_op(result_type, result_id, join(inv_func, "(", to_unpacked_expression(args[0]), ")"), forward); inherit_expression_dependencies(result_id, args[0]); + + if (old_transpose) + { + expr_out.need_transpose = true; + a->need_transpose = true; + } break; } - case GLSLstd450FindSMsb: - { - // GLSL findMSB for signed: position of highest bit that differs from sign bit. - // OpenCL: (W-1) - clz(x ^ (x >> (W-1))) - // x >> (W-1) is arithmetic shift: 0 for positive, -1 for negative. - // x ^ -1 = ~x, x ^ 0 = x. So this gives clz(x) for positive, clz(~x) for negative. - auto &expr_type = expression_type(args[0]); - auto &out_type = get(result_type); - uint32_t width = expr_type.width; - - // Input must be signed for arithmetic right shift. - auto signed_basetype = to_signed_basetype(width); - SPIRType signed_type = expr_type; - signed_type.basetype = signed_basetype; - - string input_expr; - if (expr_type.basetype != signed_basetype) - input_expr = bitcast_expression(signed_basetype, args[0]); - else - input_expr = to_enclosed_expression(args[0]); - - string xor_expr = join(input_expr, " ^ (", input_expr, " >> ", width - 1, ")"); - string expr = join(width - 1, " - clz(", xor_expr, ")"); - - // clz on signed type returns signed, so result is signed. Cast if output is unsigned. - if (out_type.basetype != signed_basetype) - expr = join(bitcast_glsl_op(out_type, signed_type), "(", expr, ")"); + // NMin / NMax / NClamp: OpenCL fmin/fmax propagate NaN correctly, use them directly. + case GLSLstd450NMin: + emit_binary_func_op(result_type, result_id, args[0], args[1], "fmin"); + break; + case GLSLstd450NMax: + emit_binary_func_op(result_type, result_id, args[0], args[1], "fmax"); + break; + case GLSLstd450NClamp: + emit_trinary_func_op(result_type, result_id, args[0], args[1], args[2], "clamp"); + break; - emit_op(result_type, result_id, expr, should_forward(args[0])); - inherit_expression_dependencies(result_id, args[0]); + case GLSLstd450Frexp: + { + // OpenCL frexp signature matches GLSL: frexp(x, &exp) + register_call_out_argument(args[1]); + forced_temporaries.insert(result_id); + emit_op(result_type, result_id, join("frexp(", to_expression(args[0]), ", &", to_expression(args[1]), ")"), + false); break; } - case GLSLstd450FindUMsb: + case GLSLstd450FrexpStruct: { - // GLSL findMSB for unsigned: position of highest set bit, -1 for 0. - // OpenCL: (W-1) - clz(x). clz(0) = W, so result = -1 for 0. - auto &expr_type = expression_type(args[0]); - auto &out_type = get(result_type); - uint32_t width = expr_type.width; + auto &type = get(result_type); + emit_uninitialized_temporary_expression(result_type, result_id); + statement(to_expression(result_id), ".", to_member_name(type, 0), " = frexp(", to_expression(args[0]), ", &", + to_expression(result_id), ".", to_member_name(type, 1), ");"); + break; + } - auto unsigned_basetype = to_unsigned_basetype(width); - string input_expr; - if (expr_type.basetype != unsigned_basetype) - input_expr = bitcast_expression(unsigned_basetype, args[0]); - else - input_expr = to_expression(args[0]); + case GLSLstd450Ldexp: + emit_binary_func_op(result_type, result_id, args[0], args[1], "ldexp"); + break; - // Cast to signed for the subtraction so result can be -1. - auto signed_basetype = to_signed_basetype(width); - SPIRType signed_type = out_type; - signed_type.basetype = signed_basetype; - string clz_expr = join("as_", type_to_glsl(signed_type), "(clz(", input_expr, "))"); + case GLSLstd450Cross: + emit_binary_func_op(result_type, result_id, args[0], args[1], "cross"); + break; - string expr = join(width - 1, " - ", clz_expr); + case GLSLstd450FSign: + emit_unary_func_op(result_type, result_id, args[0], "sign"); + break; - // findMSB returns int (signed). Cast if output type differs. - if (out_type.basetype != signed_basetype) - { - expr = join(bitcast_glsl_op(out_type, signed_type), "(", expr, ")"); - } + case GLSLstd450FAbs: + emit_unary_func_op(result_type, result_id, args[0], "fabs"); + break; - emit_op(result_type, result_id, expr, should_forward(args[0])); - inherit_expression_dependencies(result_id, args[0]); + case GLSLstd450FMin: + emit_binary_func_op(result_type, result_id, args[0], args[1], "fmin"); + break; + case GLSLstd450FMax: + emit_binary_func_op(result_type, result_id, args[0], args[1], "fmax"); + break; + case GLSLstd450FClamp: + emit_trinary_func_op(result_type, result_id, args[0], args[1], args[2], "clamp"); + break; + case GLSLstd450SMin: + emit_binary_func_op(result_type, result_id, args[0], args[1], "min"); + break; + case GLSLstd450SMax: + emit_binary_func_op(result_type, result_id, args[0], args[1], "max"); + break; + case GLSLstd450UMin: + emit_binary_func_op(result_type, result_id, args[0], args[1], "min"); + break; + case GLSLstd450UMax: + emit_binary_func_op(result_type, result_id, args[0], args[1], "max"); + break; + case GLSLstd450SClamp: + emit_trinary_func_op(result_type, result_id, args[0], args[1], args[2], "clamp"); + break; + case GLSLstd450UClamp: + emit_trinary_func_op(result_type, result_id, args[0], args[1], args[2], "clamp"); + break; + + case GLSLstd450FMix: + case GLSLstd450IMix: + emit_trinary_func_op(result_type, result_id, args[0], args[1], args[2], "mix"); + break; + case GLSLstd450Step: + emit_binary_func_op(result_type, result_id, args[0], args[1], "step"); + break; + case GLSLstd450SmoothStep: + emit_trinary_func_op(result_type, result_id, args[0], args[1], args[2], "smoothstep"); + break; + case GLSLstd450Fma: + emit_trinary_func_op(result_type, result_id, args[0], args[1], args[2], "fma"); break; - } default: CompilerGLSL::emit_glsl_op(result_type, result_id, op, args, count); @@ -1410,6 +2504,21 @@ std::string CompilerOpenCL::to_atomic_ptr_expression(uint32_t id) // Task #3: In OpenCL C, pointer-to-struct member access uses -> instead of . // ptr_chain_is_resolved == false means this is the first member access from the base. +bool CompilerOpenCL::should_dereference(uint32_t id) +{ + // In OpenCL C, function parameters with StorageClassFunction pointer types + // are emitted as actual pointers (T*), so they need dereferencing for + // member/component access (e.g., (*a).x instead of a.x). + const auto &type = expression_type(id); + if (is_pointer(type) && type.storage == StorageClassFunction) + { + auto *var = maybe_get(id); + if (var && var->parameter != nullptr) + return true; + } + return CompilerGLSL::should_dereference(id); +} + std::string CompilerOpenCL::to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain_is_resolved) { @@ -1439,7 +2548,19 @@ std::string CompilerOpenCL::to_member_reference(uint32_t base, const SPIRType &t { return join("->", to_member_name(type, index)); } - // StorageClassUniform (UBO): emitted by value in OpenCL — use '.' + // StorageClassUniform with BufferBlock decoration is a legacy SSBO (GLSL 430 style), + // emitted as __global T* in OpenCL C — use ->. + // Plain Uniform with Block decoration is a UBO, emitted by value — use '.'. + if (sc == StorageClassUniform) + { + auto *var = maybe_get_backing_variable(base); + if (var) + { + auto &var_type = get(var->basetype); + if (has_decoration(var_type.self, DecorationBufferBlock)) + return join("->", to_member_name(type, index)); + } + } } } return join(".", to_member_name(type, index)); @@ -1800,6 +2921,67 @@ void CompilerOpenCL::emit_function(SPIRFunction &func, const Bitset &return_flag } } +void CompilerOpenCL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) +{ + auto &type = expression_type(rhs_expression); + auto *lhs_e = maybe_get(lhs_expression); + + // In OpenCL C, we cannot assign to a function return value (rvalue). + // The base class wraps the LHS in convert_row_major_matrix() which produces + // spvTranspose(lhs) = rhs, which is invalid C. + // Instead, transpose the RHS and store directly to the LHS. + if (is_matrix(type) && lhs_e && lhs_e->need_transpose) + { + lhs_e->need_transpose = false; + + auto *rhs_e = maybe_get(rhs_expression); + if (rhs_e && rhs_e->need_transpose) + { + // Both sides need transpose — they cancel out. + rhs_e->need_transpose = false; + statement(to_expression(lhs_expression), " = ", to_unpacked_row_major_matrix_expression(rhs_expression), + ";"); + rhs_e->need_transpose = true; + } + else + { + // Transpose the RHS before storing. + auto &rhs_type = expression_type(rhs_expression); + auto rhs_short = opencl_matrix_short_name(rhs_type.basetype, rhs_type.vecsize, rhs_type.columns); + MatrixTypeKey rhs_key = { rhs_type.basetype, rhs_type.vecsize, rhs_type.columns }; + need_transpose.insert(rhs_key); + used_matrix_types.insert(rhs_key); + // The LHS is in physical (transposed) layout, so we transpose the logical RHS to physical. + statement(to_expression(lhs_expression), " = spvTranspose", rhs_short, "(", + to_unpacked_expression(rhs_expression), ");"); + } + + lhs_e->need_transpose = true; + register_write(lhs_expression); + } + else if (lhs_e && lhs_e->need_transpose) + { + // Storing a column to a row-major matrix. Unroll the write. + lhs_e->need_transpose = false; + for (uint32_t c = 0; c < type.vecsize; c++) + { + auto lhs_expr = to_dereferenced_expression(lhs_expression); + auto column_index = lhs_expr.find_last_of('['); + if (column_index != string::npos) + { + statement(lhs_expr.insert(column_index, join('[', c, ']')), " = ", + to_extract_component_expression(rhs_expression, c), ";"); + } + } + lhs_e->need_transpose = true; + register_write(lhs_expression); + } + else + { + CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression); + } +} + void CompilerOpenCL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const string &qualifier, uint32_t) { @@ -1811,6 +2993,30 @@ void CompilerOpenCL::emit_struct_member(const SPIRType &type, uint32_t member_ty { statement(qualifier, "ulong ", to_member_name(type, index), ";"); } + else if (has_member_decoration(type.self, index, DecorationRowMajor)) + { + // Row-major matrix: the physical layout has transposed dimensions. + // Emit the member with the physical (transposed) type so struct layout matches buffer. + // Walk through array nesting to find the inner matrix type. + const auto *inner = &membertype; + while (is_array(*inner)) + inner = &get(inner->parent_type); + + if (inner->columns > 1) + { + auto phys_type_name = opencl_matrix_type_name(inner->basetype, inner->columns, inner->vecsize); + MatrixTypeKey phys_key = { inner->basetype, inner->columns, inner->vecsize }; + used_matrix_types.insert(phys_key); + + statement(qualifier, phys_type_name, " ", to_member_name(type, index), type_to_array_glsl(membertype, 0), + ";"); + } + else + { + // Not actually a matrix member, fall through to default. + statement(qualifier, variable_decl(membertype, to_member_name(type, index)), ";"); + } + } else { statement(qualifier, variable_decl(membertype, to_member_name(type, index)), ";"); @@ -2188,24 +3394,240 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) break; } - // OpOuterProduct: no OpenCL builtin and no native matrix type. - // The result matrix type is represented as its column vector type in OpenCL C. - // Emit only the first column (col_vec * row_vec.x). + // OpOuterProduct: use struct-wrapped matrix helper. case OpOuterProduct: { uint32_t result_type = ops[0]; uint32_t result_id = ops[1]; uint32_t col_vec = ops[2]; // column vector uint32_t row_vec = ops[3]; // row vector + auto &res_type = get(result_type); + auto &col_type = expression_type(col_vec); auto &row_type = expression_type(row_vec); - // First column of the outer product: col_vec * row_vec.x - string first_row_elem = - row_type.vecsize > 1 ? join(to_expression(row_vec), ".", index_to_swizzle(0)) : to_expression(row_vec); - string expr = join(to_expression(col_vec), " * ", first_row_elem); - emit_op(result_type, result_id, expr, should_forward(col_vec) && should_forward(row_vec)); - inherit_expression_dependencies(result_id, col_vec); - inherit_expression_dependencies(result_id, row_vec); + need_outer_product.insert(make_matrix_key(res_type)); + // Ensure the result matrix type is registered. + used_matrix_types.insert(make_matrix_key(res_type)); + + auto col_short = opencl_vector_short_name(col_type.basetype, col_type.vecsize); + auto row_short = opencl_vector_short_name(row_type.basetype, row_type.vecsize); + string func_name = "spvOuterProduct" + col_short + row_short; + + emit_binary_func_op(result_type, result_id, col_vec, row_vec, func_name.c_str()); + break; + } + + // Matrix arithmetic operations using struct-wrapped matrix helpers. + case OpMatrixTimesVector: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t mat_id = ops[2]; + uint32_t vec_id = ops[3]; + auto &mat_type = expression_type(mat_id); + + auto *e = maybe_get(mat_id); + if (e && e->need_transpose) + { + // Transposed M * v = v * M_untransposed. + // mat_type is the SPIR-V type (e.g., mat2x3 = 2 cols, vecsize=3). + // The untransposed (physical) matrix is mat3x2 = 3 cols, vecsize=2. + e->need_transpose = false; + uint32_t phys_cols = mat_type.vecsize; + uint32_t phys_rows = mat_type.columns; + MatrixTypeKey phys_key = { mat_type.basetype, phys_rows, phys_cols }; + need_mul_vec_mat.insert(phys_key); + used_matrix_types.insert(phys_key); + + auto vec_short = opencl_vector_short_name(mat_type.basetype, phys_rows); + auto mat_short = opencl_matrix_short_name(mat_type.basetype, phys_rows, phys_cols); + string func_name = "spvMul" + vec_short + mat_short; + + string expr = + join(func_name, "(", to_expression(vec_id), ", ", to_unpacked_row_major_matrix_expression(mat_id), ")"); + bool forward = should_forward(mat_id) && should_forward(vec_id); + emit_op(result_type, result_id, expr, forward); + e->need_transpose = true; + } + else + { + auto key = make_matrix_key(mat_type); + need_mul_mat_vec.insert(key); + + auto mat_short = opencl_matrix_short_name(mat_type.basetype, mat_type.vecsize, mat_type.columns); + auto vec_short = opencl_vector_short_name(mat_type.basetype, mat_type.columns); + string func_name = "spvMul" + mat_short + vec_short; + + emit_binary_func_op(result_type, result_id, mat_id, vec_id, func_name.c_str()); + } + inherit_expression_dependencies(result_id, mat_id); + inherit_expression_dependencies(result_id, vec_id); + break; + } + + case OpVectorTimesMatrix: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t vec_id = ops[2]; + uint32_t mat_id = ops[3]; + auto &mat_type = expression_type(mat_id); + + auto *e = maybe_get(mat_id); + if (e && e->need_transpose) + { + // v * M^T = M_untransposed * v. + // mat_type is the SPIR-V type (e.g., mat2x3 = 2 cols, vecsize=3). + // The untransposed (physical) matrix is mat3x2 = 3 cols, vecsize=2. + e->need_transpose = false; + uint32_t phys_cols = mat_type.vecsize; + uint32_t phys_rows = mat_type.columns; + MatrixTypeKey phys_key = { mat_type.basetype, phys_rows, phys_cols }; + need_mul_mat_vec.insert(phys_key); + used_matrix_types.insert(phys_key); + + auto mat_short = opencl_matrix_short_name(mat_type.basetype, phys_rows, phys_cols); + auto vec_short = opencl_vector_short_name(mat_type.basetype, phys_rows); + string func_name = "spvMul" + mat_short + vec_short; + + string expr = + join(func_name, "(", to_unpacked_row_major_matrix_expression(mat_id), ", ", to_expression(vec_id), ")"); + bool forward = should_forward(mat_id) && should_forward(vec_id); + emit_op(result_type, result_id, expr, forward); + e->need_transpose = true; + } + else + { + auto key = make_matrix_key(mat_type); + need_mul_vec_mat.insert(key); + + auto vec_short = opencl_vector_short_name(mat_type.basetype, mat_type.vecsize); + auto mat_short = opencl_matrix_short_name(mat_type.basetype, mat_type.vecsize, mat_type.columns); + string func_name = "spvMul" + vec_short + mat_short; + + emit_binary_func_op(result_type, result_id, vec_id, mat_id, func_name.c_str()); + } + inherit_expression_dependencies(result_id, vec_id); + inherit_expression_dependencies(result_id, mat_id); + break; + } + + case OpMatrixTimesMatrix: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t a_id = ops[2]; + uint32_t b_id = ops[3]; + auto &a_type = expression_type(a_id); + auto &b_type = expression_type(b_id); + + auto *ea = maybe_get(a_id); + auto *eb = maybe_get(b_id); + + if (ea && eb && ea->need_transpose && eb->need_transpose) + { + // (A^T * B^T) = (B * A)^T + // Physical (untransposed) matrices have swapped dimensions. + ea->need_transpose = false; + eb->need_transpose = false; + + MatrixTypeKey phys_b = { b_type.basetype, b_type.columns, b_type.vecsize }; + MatrixTypeKey phys_a = { a_type.basetype, a_type.columns, a_type.vecsize }; + need_mul_mat_mat.insert({ phys_b, phys_a }); + need_mul_mat_vec.insert(phys_b); + used_matrix_types.insert(phys_b); + used_matrix_types.insert(phys_a); + + auto mat_b_short = opencl_matrix_short_name(phys_b.basetype, phys_b.vecsize, phys_b.columns); + auto mat_a_short = opencl_matrix_short_name(phys_a.basetype, phys_a.vecsize, phys_a.columns); + string func_name = "spvMul" + mat_b_short + mat_a_short; + + string expr = join(func_name, "(", to_unpacked_row_major_matrix_expression(b_id), ", ", + to_unpacked_row_major_matrix_expression(a_id), ")"); + bool forward = should_forward(a_id) && should_forward(b_id); + emit_transposed_op(result_type, result_id, expr, forward); + + ea->need_transpose = true; + eb->need_transpose = true; + } + else + { + auto key_a = make_matrix_key(a_type); + auto key_b = make_matrix_key(b_type); + need_mul_mat_mat.insert({ key_a, key_b }); + // Also need the MatVec helper for the inner multiplication. + need_mul_mat_vec.insert(key_a); + + auto mat_a_short = opencl_matrix_short_name(a_type.basetype, a_type.vecsize, a_type.columns); + auto mat_b_short = opencl_matrix_short_name(b_type.basetype, b_type.vecsize, b_type.columns); + string func_name = "spvMul" + mat_a_short + mat_b_short; + + emit_binary_func_op(result_type, result_id, a_id, b_id, func_name.c_str()); + } + inherit_expression_dependencies(result_id, a_id); + inherit_expression_dependencies(result_id, b_id); + break; + } + + case OpMatrixTimesScalar: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t mat_id = ops[2]; + uint32_t scalar_id = ops[3]; + auto &mat_type = expression_type(mat_id); + + auto *e = maybe_get(mat_id); + if (e && e->need_transpose) + { + // Physical (untransposed) matrix has swapped dimensions. + e->need_transpose = false; + MatrixTypeKey phys_key = { mat_type.basetype, mat_type.columns, mat_type.vecsize }; + need_mul_mat_scalar.insert(phys_key); + used_matrix_types.insert(phys_key); + + auto mat_short = opencl_matrix_short_name(phys_key.basetype, phys_key.vecsize, phys_key.columns); + string func_name = "spvMul" + mat_short + "Scalar"; + + string expr = join(func_name, "(", to_unpacked_row_major_matrix_expression(mat_id), ", ", + to_expression(scalar_id), ")"); + bool forward = should_forward(mat_id) && should_forward(scalar_id); + emit_transposed_op(result_type, result_id, expr, forward); + e->need_transpose = true; + } + else + { + auto key = make_matrix_key(mat_type); + need_mul_mat_scalar.insert(key); + + auto mat_short = opencl_matrix_short_name(mat_type.basetype, mat_type.vecsize, mat_type.columns); + string func_name = "spvMul" + mat_short + "Scalar"; + + emit_binary_func_op(result_type, result_id, mat_id, scalar_id, func_name.c_str()); + } + inherit_expression_dependencies(result_id, mat_id); + inherit_expression_dependencies(result_id, scalar_id); + break; + } + + case OpTranspose: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t input_id = ops[2]; + auto &in_type = expression_type(input_id); + auto &res_type = get(result_type); + + auto key = make_matrix_key(in_type); + need_transpose.insert(key); + // Ensure both input and output matrix types are registered. + used_matrix_types.insert(key); + used_matrix_types.insert(make_matrix_key(res_type)); + + auto in_short = opencl_matrix_short_name(in_type.basetype, in_type.vecsize, in_type.columns); + string func_name = "spvTranspose" + in_short; + + emit_unary_func_op(result_type, result_id, input_id, func_name.c_str()); break; } @@ -2753,6 +4175,15 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) e.access_chain = true; if (is_subscript_deref) subscripted_deref_exprs.insert(result_id); + + // Propagate row-major transpose flag for matrix members. + if (struct_type && length >= 4) + { + uint32_t mbr_idx = get(ops[3]).scalar(); + if (member_is_non_native_row_major_matrix(*struct_type, mbr_idx)) + e.need_transpose = true; + } + forwarded_temporaries.insert(result_id); suppressed_usage_tracking.insert(result_id); for (uint32_t i = 2; i < length; i++) @@ -2864,6 +4295,41 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) break; } + case OpCompositeConstruct: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + auto &type = get(result_type); + if (type.columns > 1) + { + // Matrix composite construct: emit compound literal (spvMat4){ { col0, col1, ... } } + const auto *elems = &ops[2]; + uint32_t length = instruction.length - 2; + + bool forward = true; + for (uint32_t i = 0; i < length; i++) + forward = forward && should_forward(elems[i]); + + auto mat_name = opencl_matrix_type_name(type); + string expr = "(" + mat_name + "){ { "; + for (uint32_t i = 0; i < length; i++) + { + if (i > 0) + expr += ", "; + expr += to_unpacked_expression(elems[i]); + } + expr += " } }"; + emit_op(result_type, result_id, expr, forward); + for (uint32_t i = 0; i < length; i++) + inherit_expression_dependencies(result_id, elems[i]); + } + else + { + CompilerGLSL::emit_instruction(instruction); + } + break; + } + case OpCompositeConstructReplicateEXT: { // GLSL base uses type(value) for vector splat, but OpenCL C needs (type)(value). @@ -2872,9 +4338,17 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) auto &type = get(result_type); if (type.op == OpTypeMatrix) { - // OpenCL C has no native matrix type; matrices are represented as their column vector type. - // Just use the sub-value directly (representing the first/only column). - emit_op(result_type, result_id, to_expression(ops[2]), should_forward(ops[2])); + // Struct-wrapped matrix: replicate the column value across all columns. + auto mat_name = opencl_matrix_type_name(type); + string expr = "(" + mat_name + "){ { "; + for (uint32_t i = 0; i < type.columns; i++) + { + if (i > 0) + expr += ", "; + expr += to_expression(ops[2]); + } + expr += " } }"; + emit_op(result_type, result_id, expr, should_forward(ops[2])); inherit_expression_dependencies(result_id, ops[2]); } else if (type.op != OpTypeArray && type.vecsize > 1) diff --git a/spirv_opencl.hpp b/spirv_opencl.hpp index a9991fa90..82e551be9 100644 --- a/spirv_opencl.hpp +++ b/spirv_opencl.hpp @@ -25,6 +25,7 @@ #define SPIRV_CROSS_OPENCL_HPP #include "spirv_glsl.hpp" +#include #include #include #include @@ -42,14 +43,20 @@ class CompilerOpenCL : public CompilerGLSL { // OpenCL C version: 120 = 1.2, 200 = 2.0 uint32_t opencl_version = make_opencl_version(1, 2); + // Enable cl_khr_fp16 (half) extension + bool enable_fp16 = false; // Enable cl_khr_fp64 (double) extension bool enable_fp64 = false; // Enable cl_khr_int64_extended_atomics extension bool enable_64bit_atomics = false; // Enable cl_khr_subgroups extension bool enable_subgroups = false; - // Enable cl_khr_subgroup_shuffle extension - bool enable_shuffle = false; + // Enable all subgroup extensions + bool enable_subgroups_all = false; + // Emulate missing subgroup extensions + bool emulate_subgroups = false; + // Size of subgroup emulation + uint32_t fixed_subgroup_size = 0; void set_opencl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) { @@ -104,6 +111,7 @@ class CompilerOpenCL : public CompilerGLSL std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0) override; void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; void emit_instruction(const Instruction &instruction) override; + bool should_dereference(uint32_t id) override; std::string to_member_reference(uint32_t base, const SPIRType &type, uint32_t index, bool ptr_chain_is_resolved) override; std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; @@ -124,10 +132,14 @@ class CompilerOpenCL : public CompilerGLSL std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false); const char *to_restrict(uint32_t id, bool space); uint32_t get_physical_type_id_stride(TypeID type_id) const override; + bool member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) override; + std::string convert_row_major_matrix(std::string exp_str, const SPIRType &exp_type, uint32_t physical_type_id, + bool is_packed, bool relaxed) override; void replace_illegal_names() override; void emit_function(SPIRFunction &func, const Bitset &return_flags) override; void emit_block_hints(const SPIRBlock &block) override; + void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override; void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier = "", uint32_t base_offset = 0) override; @@ -151,6 +163,74 @@ class CompilerOpenCL : public CompilerGLSL bool needs_bitreverse_polyfill = false; // Set when a default sampler is needed for combined image+sampler usage. bool needs_default_sampler = false; + // Set when findLSB polyfill is needed. + bool needs_findlsb_polyfill = false; + // Set when pack/unpack Snorm/Unorm polyfills are needed. + bool needs_pack_snorm_4x8 = false; + bool needs_pack_unorm_4x8 = false; + bool needs_pack_snorm_2x16 = false; + bool needs_pack_unorm_2x16 = false; + bool needs_unpack_snorm_4x8 = false; + bool needs_unpack_unorm_4x8 = false; + bool needs_unpack_snorm_2x16 = false; + bool needs_unpack_unorm_2x16 = false; + // Set when determinant/inverse polyfills are needed (per size). + bool needs_determinant_2 = false; + bool needs_determinant_3 = false; + bool needs_determinant_4 = false; + bool needs_inverse_2 = false; + bool needs_inverse_3 = false; + bool needs_inverse_4 = false; + + // Matrix type support: tracks which matrix signatures (basetype, vecsize, columns) are needed. + struct MatrixTypeKey + { + SPIRType::BaseType basetype; + uint32_t vecsize; + uint32_t columns; + bool operator<(const MatrixTypeKey &o) const + { + if (basetype != o.basetype) + return basetype < o.basetype; + if (columns != o.columns) + return columns < o.columns; + return vecsize < o.vecsize; + } + bool operator==(const MatrixTypeKey &o) const + { + return basetype == o.basetype && vecsize == o.vecsize && columns == o.columns; + } + bool operator!=(const MatrixTypeKey &o) const + { + return !(*this == o); + } + }; + std::set used_matrix_types; + + // Flags for which matrix helper functions need to be emitted. + std::set need_mul_mat_vec; // MatrixTimesVector + std::set need_mul_vec_mat; // VectorTimesMatrix + std::set> need_mul_mat_mat; // MatrixTimesMatrix + std::set need_mul_mat_scalar; // MatrixTimesScalar + std::set need_transpose; // OpTranspose (key is input matrix type) + std::set need_outer_product; // OpOuterProduct (key is result matrix type) + + std::string opencl_matrix_type_name(const SPIRType &type); + std::string opencl_matrix_type_name(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns); + std::string opencl_column_type_name(SPIRType::BaseType basetype, uint32_t vecsize); + // Short names for building helper function names (e.g. "Mat4", "Vec4", "DVec4"). + std::string opencl_matrix_short_name(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns); + std::string opencl_vector_short_name(SPIRType::BaseType basetype, uint32_t vecsize); + void emit_matrix_typedefs(); + void emit_matrix_helpers(); + void emit_mul_mat_vec_helper(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns); + void emit_mul_vec_mat_helper(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns); + void emit_mul_mat_mat_helper(const MatrixTypeKey &a, const MatrixTypeKey &b); + void emit_mul_mat_scalar_helper(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns); + void emit_transpose_helper(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns); + void emit_outer_product_helper(SPIRType::BaseType basetype, uint32_t vecsize, uint32_t columns); + MatrixTypeKey make_matrix_key(const SPIRType &type); + void prepass_discover_matrix_types(); // For each non-entry function, the ordered list of flattened buffer var IDs to thread as extra params. std::unordered_map> func_flattened_args; diff --git a/test_shaders.py b/test_shaders.py index dbc38ba5c..9343d9a9d 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -613,12 +613,22 @@ def path_to_opencl_standard_cli(shader): def validate_shader_opencl(shader, opt, paths): shader = reference_path(shader[0], shader[1], opt) extensions = [] - if '.double.' in shader: + if '.fp16.' in shader: + extensions.append('cl_khr_fp16') + if '.fp64.' in shader: extensions.append('cl_khr_fp64') - if '.subgroup.' in shader: + if '.subgroups-emulate.' in shader: + if '.subgroups.' in shader: + extensions.append('cl_khr_subgroups') + elif '.subgroups.' in shader: extensions.append('cl_khr_subgroups') - if '.shuffle.' in shader: + extensions.append('cl_khr_subgroup_ballot') + extensions.append('cl_khr_subgroup_clustered_reduce') + extensions.append('cl_khr_subgroup_non_uniform_arithmetic') + extensions.append('cl_khr_subgroup_non_uniform_vote') + extensions.append('cl_khr_subgroup_rotate') extensions.append('cl_khr_subgroup_shuffle') + extensions.append('cl_khr_subgroup_shuffle_relative') global ignore_clang try: @@ -681,12 +691,16 @@ def cross_compile_opencl(shader, spirv, opt, iterations, paths): opencl_args = [spirv_cross_path, '--output', opencl_path, spirv_path, '--opencl', '--iterations', str(iterations)] opencl_args.append('--opencl-version') opencl_args.append(path_to_opencl_standard_cli(shader)) - if '.double.' in shader: + if '.fp16.' in shader: + opencl_args.append('--opencl-fp16') + if '.fp64.' in shader: opencl_args.append('--opencl-fp64') - if '.subgroup.' in shader: - opencl_args.append('--opencl-subgroups') - if '.shuffle.' in shader: - opencl_args.append('--opencl-shuffle') + if '.subgroups.' in shader: + opencl_args.append('--opencl-subgroups-all') + if '.subgroups-emulate.' in shader: + opencl_args.append('--opencl-emulate-subgroups') + opencl_args.append('--opencl-fixed-subgroup-size') + opencl_args.append('32') if shader_is_invalid_spirv(shader): subprocess.run(opencl_args) From cfc761b9a5a4ddcc948dcd8212e0e463d801a72c Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Sat, 14 Mar 2026 11:47:54 -0700 Subject: [PATCH 07/16] OpenCL: add more tests --- ...ased-struct-divergent-member-name.asm.comp | 32 +++ .../comp/arithmetic-conversion-signs.asm.comp | 43 ++++ ...-physical-layout-mismatch.invalid.asm.comp | 20 ++ .../asm/comp/atomic-load-store.asm.comp | 20 ++ .../asm/comp/atomic-min-max-sign.asm.comp | 25 ++ .../asm/comp/atomic-result-temporary.asm.comp | 21 ++ .../asm/comp/bda-arguments.asm.comp | 41 +++ ...-to-array-in-buffer.invalid.asm.spv16.comp | 24 ++ .../bitcast-fp16-fp32.fp16.invalid.asm.comp | 22 ++ .../comp/bitfield-signed-operations.asm.comp | 38 +++ .../asm/comp/bitscan.asm.comp | 35 +++ ...block-like-array-type-construct-2.asm.comp | 41 +++ ...like-array-type-construct.invalid.asm.comp | 37 +++ ...buffer-device-address-ptr-casting.asm.comp | 36 +++ ...e-construct-buffer-struct.asm.invalid.comp | 25 ++ .../comp/constant-composite-undef.asm.comp | 18 ++ .../comp/constant-lut-name-aliasing.asm.comp | 20 ++ .../asm/comp/copy-logical-2.spv14.asm.comp | 83 +++++++ ...fset-and-array-stride-diffs.spv14.asm.comp | 61 +++++ .../asm/comp/copy-logical.spv14.asm.comp | 56 +++++ ...vice-array-load-temporary.asm.invalid.comp | 29 +++ ...porary.force-native-array.asm.invalid.comp | 29 +++ ...constant-array-load-store.asm.invalid.comp | 33 +++ ...-store.force-native-array.asm.invalid.comp | 33 +++ ...-in-entry-point.noeliminate.spv14.asm.comp | 23 ++ .../asm/comp/glsl-signed-operations.asm.comp | 50 ++++ .../glsl.std450.frexp-modf-struct.asm.comp | 40 +++ ...nner-array-of-struct-copy.invalid.asm.comp | 38 +++ ...mage-atomic-mismatch-sign.asm.invalid.comp | 0 .../asm/comp/local-size-id-override.asm.comp | 34 +++ .../asm/comp/local-size-id.asm.invalid.comp | 35 +++ .../asm/comp/modf-storage-class.asm.comp | 49 ++++ .../opptrdiff-basic.spv14.invalid.asm.comp | 56 +++++ ...pptraccesschain-elem-offset.spv14.asm.comp | 50 ++++ .../asm/comp/opptrequal-basic.spv14.asm.comp | 34 +++ ...tx-bypass-transpose.spv14.asm.invalid.comp | 52 ++++ .../comp/opptrnotequal-basic.spv14.asm.comp | 34 +++ ...-access-chain-custom-array-stride.asm.comp | 21 ++ .../comp/spec-constant-name-aliasing.asm.comp | 48 ++++ .../storage-buffer-basic.invalid.asm.comp | 27 ++ .../storage-buffer-pointer-argument.asm.comp | 28 +++ ...entals-float-controls-2-fp16.fp16.asm.comp | 35 +++ ...entals-float-controls-2-fp32.fp16.asm.comp | 35 +++ .../asm/comp/variable-pointers-2.asm.comp | 16 ++ .../comp/variable-pointers-3.invalid.asm.comp | 12 + ...ariable-pointers-vector-to-scalar.asm.comp | 12 + .../comp/variable-pointers.asm.invalid.comp | 77 ++++++ .../variable-ssbo-argument.spv16.asm.comp | 21 ++ ...ssbo-array-argument.spv16.invalid.asm.comp | 21 ++ ...ar-alias-ptr-access-chain.asm.invalid.comp | 145 +++++++++++ ...tier-1.device-argument-buffer.invalid.comp | 0 ...array-copy-threadgroup-memory.invalid.comp | 20 ++ .../atomic-cmpxchg-packed-vector.invalid.comp | 31 +++ .../comp/basic.invalid.comp | 0 .../comp/bda-atomics.invalid.comp | 44 ++++ ...a-load-std140-arrayed-pointer.invalid.comp | 27 ++ .../bda-nonwritable-glslang-workaround.comp | 26 ++ ...bda-restrict-pointer-variable.invalid.comp | 26 ++ .../comp/bitcast-16bit-1.invalid.comp | 0 .../comp/bitcast-16bit-2.invalid.comp | 0 .../shaders-opencl-no-opt/comp/bitfield.comp | 35 +++ ...ce-address-from-pointer-complex-chain.comp | 33 +++ ...extract-atomics-from-function.invalid.comp | 90 +++++++ ...vocation-id-writable-ssbo-in-function.comp | 27 ++ .../comp/glsl.std450.comp | 234 ++++++++++++++++++ .../comp/illegal-struct-name.asm.comp | 27 ++ ...plicit-integer-promotion.fp16.invalid.comp | 89 +++++++ .../comp/int16min-literal.fp16.invalid.comp | 27 ++ .../comp/int64.invalid.comp | 75 ++++++ .../comp/int64min-literal.comp | 26 ++ .../comp/integer-dot-product.comp | 58 +++++ .../comp/intmin-literal.comp | 24 ++ .../shaders-opencl-no-opt/comp/loop.comp | 100 ++++++++ .../read-only-coherent-image.invalid.comp | 17 ++ .../shaders-opencl-no-opt/comp/return.comp | 39 +++ ...tier-1.device-argument-buffer.invalid.comp | 0 ...std140-array-load-composite-construct.comp | 18 ++ ...ct-packing-scalar.nocompat.invalid.vk.comp | 144 +++++++++++ ....vk.opencl12.emulate-subgroup.invalid.comp | 0 ...at.vk.subgroup.fixed-subgroup.invalid.comp | 0 ...ubgroups.nocompat.vk.subgroup.invalid.comp | 0 ....nocompat.vk.subgroup.swizzle.invalid.comp | 0 ...ncendental-float-controls-1-fp16.fp16.comp | 35 +++ ...ncendental-float-controls-1-fp32.fp16.comp | 35 +++ .../transposed-temporary-expression-2.comp | 56 +++++ .../comp/transposed-temporary-expression.comp | 41 +++ .../comp/trivial-select-cast-vector.comp | 19 ++ .../comp/trivial-select-matrix.spv14.comp | 22 ++ ...roup-size-spec-constant-array.invalid.comp | 58 +++++ ...ize-spec-constant-array.spv16.invalid.comp | 70 ++++++ ...ased-struct-divergent-member-name.asm.comp | 77 ++++++ .../comp/arithmetic-conversion-signs.asm.comp | 131 ++++++++++ ...-physical-layout-mismatch.invalid.asm.comp | 47 ++++ .../asm/comp/atomic-load-store.asm.comp | 48 ++++ .../asm/comp/atomic-min-max-sign.asm.comp | 56 +++++ .../asm/comp/atomic-result-temporary.asm.comp | 59 +++++ .../asm/comp/bda-arguments.asm.comp | 81 ++++++ ...-to-array-in-buffer.invalid.asm.spv16.comp | 71 ++++++ .../bitcast-fp16-fp32.fp16.invalid.asm.comp | 63 +++++ .../comp/bitfield-signed-operations.asm.comp | 97 ++++++++ .../asm/comp/bitscan.asm.comp | 72 ++++++ ...block-like-array-type-construct-2.asm.comp | 85 +++++++ ...like-array-type-construct.invalid.asm.comp | 80 ++++++ ...buffer-device-address-ptr-casting.asm.comp | 106 ++++++++ ...e-construct-buffer-struct.asm.invalid.comp | 54 ++++ .../comp/constant-composite-undef.asm.comp | 40 +++ .../comp/constant-lut-name-aliasing.asm.comp | 81 ++++++ .../asm/comp/copy-logical-2.spv14.asm.comp | 81 ++++++ ...fset-and-array-stride-diffs.spv14.asm.comp | 60 +++++ .../asm/comp/copy-logical.spv14.asm.comp | 69 ++++++ ...vice-array-load-temporary.asm.invalid.comp | 53 ++++ ...porary.force-native-array.asm.invalid.comp | 53 ++++ ...constant-array-load-store.asm.invalid.comp | 81 ++++++ ...-store.force-native-array.asm.invalid.comp | 81 ++++++ ...-in-entry-point.noeliminate.spv14.asm.comp | 59 +++++ .../asm/comp/glsl-signed-operations.asm.comp | 123 +++++++++ .../glsl.std450.frexp-modf-struct.asm.comp | 55 ++++ ...nner-array-of-struct-copy.invalid.asm.comp | 137 ++++++++++ ...mage-atomic-mismatch-sign.asm.invalid.comp | 71 ++++++ .../asm/comp/local-size-id-override.asm.comp | 60 +++++ .../asm/comp/local-size-id.asm.invalid.comp | 76 ++++++ .../asm/comp/modf-storage-class.asm.comp | 116 +++++++++ .../opptrdiff-basic.spv14.invalid.asm.comp | 98 ++++++++ ...pptraccesschain-elem-offset.spv14.asm.comp | 79 ++++++ .../asm/comp/opptrequal-basic.spv14.asm.comp | 96 +++++++ ...tx-bypass-transpose.spv14.asm.invalid.comp | 98 ++++++++ .../comp/opptrnotequal-basic.spv14.asm.comp | 96 +++++++ ...-access-chain-custom-array-stride.asm.comp | 98 ++++++++ .../comp/spec-constant-name-aliasing.asm.comp | 78 ++++++ .../storage-buffer-basic.invalid.asm.comp | 58 +++++ .../storage-buffer-pointer-argument.asm.comp | 63 +++++ ...entals-float-controls-2-fp16.fp16.asm.comp | 225 +++++++++++++++++ ...entals-float-controls-2-fp32.fp16.asm.comp | 224 +++++++++++++++++ .../asm/comp/variable-pointers-2.asm.comp | 71 ++++++ .../comp/variable-pointers-3.invalid.asm.comp | 60 +++++ ...ariable-pointers-vector-to-scalar.asm.comp | 60 +++++ .../comp/variable-pointers.asm.invalid.comp | 152 ++++++++++++ .../variable-ssbo-argument.spv16.asm.comp | 44 ++++ ...ssbo-array-argument.spv16.invalid.asm.comp | 45 ++++ ...ar-alias-ptr-access-chain.asm.invalid.comp | 214 ++++++++++++++++ ...tier-1.device-argument-buffer.invalid.comp | 9 + ...array-copy-threadgroup-memory.invalid.comp | 18 ++ .../atomic-cmpxchg-packed-vector.invalid.comp | 17 ++ shaders-opencl-no-opt/comp/basic.invalid.comp | 27 ++ .../comp/bda-atomics.invalid.comp | 34 +++ ...a-load-std140-arrayed-pointer.invalid.comp | 20 ++ .../bda-nonwritable-glslang-workaround.comp | 22 ++ ...bda-restrict-pointer-variable.invalid.comp | 18 ++ .../comp/bitcast-16bit-1.invalid.comp | 23 ++ .../comp/bitcast-16bit-2.invalid.comp | 26 ++ shaders-opencl-no-opt/comp/bitfield.comp | 23 ++ ...ce-address-from-pointer-complex-chain.comp | 21 ++ ...extract-atomics-from-function.invalid.comp | 69 ++++++ ...vocation-id-writable-ssbo-in-function.comp | 12 + shaders-opencl-no-opt/comp/glsl.std450.comp | 129 ++++++++++ .../comp/illegal-struct-name.asm.comp | 62 +++++ ...plicit-integer-promotion.fp16.invalid.comp | 85 +++++++ .../comp/int16min-literal.fp16.invalid.comp | 22 ++ shaders-opencl-no-opt/comp/int64.invalid.comp | 65 +++++ .../comp/int64min-literal.comp | 21 ++ .../comp/integer-dot-product.comp | 114 +++++++++ .../comp/intmin-literal.comp | 18 ++ shaders-opencl-no-opt/comp/loop.comp | 98 ++++++++ .../read-only-coherent-image.invalid.comp | 17 ++ shaders-opencl-no-opt/comp/return.comp | 33 +++ ...tier-1.device-argument-buffer.invalid.comp | 13 + ...std140-array-load-composite-construct.comp | 13 + ...ct-packing-scalar.nocompat.invalid.vk.comp | 100 ++++++++ ....vk.opencl12.emulate-subgroup.invalid.comp | 25 ++ ...at.vk.subgroup.fixed-subgroup.invalid.comp | 211 ++++++++++++++++ ...ubgroups.nocompat.vk.subgroup.invalid.comp | 211 ++++++++++++++++ ....nocompat.vk.subgroup.swizzle.invalid.comp | 211 ++++++++++++++++ ...ncendental-float-controls-1-fp16.fp16.comp | 35 +++ ...ncendental-float-controls-1-fp32.fp16.comp | 35 +++ .../transposed-temporary-expression-2.comp | 24 ++ .../comp/transposed-temporary-expression.comp | 17 ++ .../comp/trivial-select-cast-vector.comp | 14 ++ .../comp/trivial-select-matrix.spv14.comp | 16 ++ ...roup-size-spec-constant-array.invalid.comp | 21 ++ ...ize-spec-constant-array.spv16.invalid.comp | 21 ++ spirv_opencl.cpp | 111 +++++++-- spirv_opencl.hpp | 5 + 182 files changed, 9737 insertions(+), 19 deletions(-) create mode 100644 reference/shaders-opencl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/arithmetic-conversion-signs.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/atomic-load-store.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/atomic-min-max-sign.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/atomic-result-temporary.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/bda-arguments.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/bda-to-array-in-buffer.invalid.asm.spv16.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.invalid.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/bitfield-signed-operations.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/bitscan.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.invalid.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/buffer-device-address-ptr-casting.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/composite-construct-buffer-struct.asm.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/constant-composite-undef.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/copy-logical.spv14.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.asm.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.asm.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/glsl-signed-operations.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/image-atomic-mismatch-sign.asm.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/local-size-id-override.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/local-size-id.asm.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/modf-storage-class.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/opptrdiff-basic.spv14.invalid.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/ptr-access-chain-custom-array-stride.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp16.fp16.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp32.fp16.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/variable-pointers-2.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/variable-pointers-3.invalid.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/variable-pointers-vector-to-scalar.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/variable-pointers.asm.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-argument.spv16.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.invalid.asm.comp create mode 100644 reference/shaders-opencl-no-opt/asm/comp/workgroup-uint-to-uchar-alias-ptr-access-chain.asm.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/atomic-cmpxchg-packed-vector.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/basic.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/bda-atomics.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/bda-nonwritable-glslang-workaround.comp create mode 100644 reference/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/bitfield.comp create mode 100644 reference/shaders-opencl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.comp create mode 100644 reference/shaders-opencl-no-opt/comp/extract-atomics-from-function.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/global-invocation-id-writable-ssbo-in-function.comp create mode 100644 reference/shaders-opencl-no-opt/comp/glsl.std450.comp create mode 100644 reference/shaders-opencl-no-opt/comp/illegal-struct-name.asm.comp create mode 100644 reference/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/int16min-literal.fp16.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/int64.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/int64min-literal.comp create mode 100644 reference/shaders-opencl-no-opt/comp/integer-dot-product.comp create mode 100644 reference/shaders-opencl-no-opt/comp/intmin-literal.comp create mode 100644 reference/shaders-opencl-no-opt/comp/loop.comp create mode 100644 reference/shaders-opencl-no-opt/comp/read-only-coherent-image.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/return.comp create mode 100644 reference/shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/std140-array-load-composite-construct.comp create mode 100644 reference/shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp16.fp16.comp create mode 100644 reference/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp32.fp16.comp create mode 100644 reference/shaders-opencl-no-opt/comp/transposed-temporary-expression-2.comp create mode 100644 reference/shaders-opencl-no-opt/comp/transposed-temporary-expression.comp create mode 100644 reference/shaders-opencl-no-opt/comp/trivial-select-cast-vector.comp create mode 100644 reference/shaders-opencl-no-opt/comp/trivial-select-matrix.spv14.comp create mode 100644 reference/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.spv16.invalid.comp create mode 100644 shaders-opencl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/arithmetic-conversion-signs.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/atomic-load-store.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/atomic-min-max-sign.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/atomic-result-temporary.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/bda-arguments.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/bda-to-array-in-buffer.invalid.asm.spv16.comp create mode 100644 shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.invalid.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/bitfield-signed-operations.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/bitscan.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.invalid.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/buffer-device-address-ptr-casting.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/composite-construct-buffer-struct.asm.invalid.comp create mode 100644 shaders-opencl-no-opt/asm/comp/constant-composite-undef.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/copy-logical.spv14.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/device-array-load-temporary.asm.invalid.comp create mode 100644 shaders-opencl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.invalid.comp create mode 100644 shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.asm.invalid.comp create mode 100644 shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.invalid.comp create mode 100644 shaders-opencl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/glsl-signed-operations.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/image-atomic-mismatch-sign.asm.invalid.comp create mode 100644 shaders-opencl-no-opt/asm/comp/local-size-id-override.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/local-size-id.asm.invalid.comp create mode 100644 shaders-opencl-no-opt/asm/comp/modf-storage-class.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/opptrdiff-basic.spv14.invalid.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp create mode 100644 shaders-opencl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/ptr-access-chain-custom-array-stride.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp16.fp16.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp32.fp16.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/variable-pointers-2.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/variable-pointers-3.invalid.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/variable-pointers-vector-to-scalar.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/variable-pointers.asm.invalid.comp create mode 100644 shaders-opencl-no-opt/asm/comp/variable-ssbo-argument.spv16.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.invalid.asm.comp create mode 100644 shaders-opencl-no-opt/asm/comp/workgroup-uint-to-uchar-alias-ptr-access-chain.asm.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/atomic-cmpxchg-packed-vector.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/basic.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/bda-atomics.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/bda-nonwritable-glslang-workaround.comp create mode 100644 shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/bitcast-16bit-1.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/bitcast-16bit-2.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/bitfield.comp create mode 100644 shaders-opencl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.comp create mode 100644 shaders-opencl-no-opt/comp/extract-atomics-from-function.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/global-invocation-id-writable-ssbo-in-function.comp create mode 100644 shaders-opencl-no-opt/comp/glsl.std450.comp create mode 100644 shaders-opencl-no-opt/comp/illegal-struct-name.asm.comp create mode 100644 shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/int16min-literal.fp16.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/int64.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/int64min-literal.comp create mode 100644 shaders-opencl-no-opt/comp/integer-dot-product.comp create mode 100644 shaders-opencl-no-opt/comp/intmin-literal.comp create mode 100644 shaders-opencl-no-opt/comp/loop.comp create mode 100644 shaders-opencl-no-opt/comp/read-only-coherent-image.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/return.comp create mode 100644 shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/std140-array-load-composite-construct.comp create mode 100644 shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp16.fp16.comp create mode 100644 shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp32.fp16.comp create mode 100644 shaders-opencl-no-opt/comp/transposed-temporary-expression-2.comp create mode 100644 shaders-opencl-no-opt/comp/transposed-temporary-expression.comp create mode 100644 shaders-opencl-no-opt/comp/trivial-select-cast-vector.comp create mode 100644 shaders-opencl-no-opt/comp/trivial-select-matrix.spv14.comp create mode 100644 shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.spv16.invalid.comp diff --git a/reference/shaders-opencl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp new file mode 100644 index 000000000..583813d01 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp @@ -0,0 +1,32 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct T +{ + float c; +}; + +typedef struct T T; + +struct SSBO1 +{ + T foo[1]; +}; + +typedef struct SSBO1 SSBO1; + +struct SSBO2 +{ + T bar[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global T* _9, __global T* _13) +{ + T v = (T){ 40.0f }; + _9[10].c = v.c; + _13[30].c = v.c; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/arithmetic-conversion-signs.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/arithmetic-conversion-signs.asm.comp new file mode 100644 index 000000000..19a82a8fb --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/arithmetic-conversion-signs.asm.comp @@ -0,0 +1,43 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + int s32; + uint u32; + short s16; + ushort u16; + float f32; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _6) +{ + int _29 = _6->s32; + uint _30 = _6->u32; + short _31 = _6->s16; + ushort _32 = _6->u16; + float _33 = _6->f32; + _6->s32 = convert_int(_31); + _6->u32 = convert_uint(_31); + _6->s32 = convert_int(_32); + _6->u32 = convert_uint(_32); + _6->u32 = convert_uint(_31); + _6->u32 = convert_uint(_32); + _6->s16 = convert_short(_29); + _6->u16 = convert_ushort(_29); + _6->s16 = convert_short(_30); + _6->u16 = convert_ushort(_30); + _6->u16 = convert_ushort(_29); + _6->u16 = convert_ushort(_30); + _6->f32 = convert_float(_31); + _6->f32 = convert_float(_32); + _6->f32 = convert_float(_31); + _6->f32 = convert_float(_32); + _6->s16 = convert_short(_33); + _6->u16 = convert_ushort(_33); + _6->u16 = convert_ushort(_33); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp new file mode 100644 index 000000000..45aaa65c5 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp @@ -0,0 +1,20 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float b[5]; + float c[5]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _7) +{ + float a[5] = _7->b; + a = _7->b; + _7->b = a; + _7->c = a; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/atomic-load-store.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 000000000..64dd5c4dc --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,20 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + uint a; + uint b; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _7) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint _16 = atomic_add(&(_7->b), 0u); + uint c = _16; + atomic_xchg(&(_7->a), c); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/atomic-min-max-sign.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/atomic-min-max-sign.asm.comp new file mode 100644 index 000000000..51a153da0 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/atomic-min-max-sign.asm.comp @@ -0,0 +1,25 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + uint a; + int b; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _6) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint _30 = atomic_max(&(_6->a), 1u); + uint _31 = atomic_min(&(_6->a), 1u); + uint _32 = atomic_min(&(_6->a), 4294967295u); + uint _33 = atomic_max(&(_6->a), 4294967295u); + int _34 = atomic_max(&(_6->b), -3); + int _35 = atomic_min(&(_6->b), -3); + int _36 = atomic_min(&(_6->b), 4); + int _37 = atomic_max(&(_6->b), 4); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/atomic-result-temporary.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/atomic-result-temporary.asm.comp new file mode 100644 index 000000000..e68c8925b --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/atomic-result-temporary.asm.comp @@ -0,0 +1,21 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + uint count; + uint data[1]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _7) +{ + uint _19 = atomic_add(&(_7->count), 1u); + if (_19 < 1024u) + { + _7->data[_19] = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + } +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/bda-arguments.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/bda-arguments.asm.comp new file mode 100644 index 000000000..e927b1917 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/bda-arguments.asm.comp @@ -0,0 +1,41 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _4; + +struct _4 +{ + int _m0; + ulong _m1; +}; + +typedef struct _4 _4; + +struct _16 +{ + ulong _m0; +}; + +typedef struct _16 _16; + +void _43(__global _4* __restrict _10, int _44, __global int* __restrict _12, __global int* __restrict __global * __restrict _13, __global int* __restrict _14) +{ +} + +void _40(__global _4* _6, int _41, __global int* _7, __global int* __global * _8, __global int* _9) +{ + _43(_6, _41, _7, _8, _9); + _6->_m0 = _41; + *_7 = _41; + *_9 = _41; + *_8 = _9; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(_16 _32) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __global _4* _28 = ((__global _4*)(_32._m0)); + _40(_28, 40, &_28->_m0, &_28->_m1, ((__global int*)(_28->_m1))); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/bda-to-array-in-buffer.invalid.asm.spv16.comp b/reference/shaders-opencl-no-opt/asm/comp/bda-to-array-in-buffer.invalid.asm.spv16.comp new file mode 100644 index 000000000..803d9421e --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/bda-to-array-in-buffer.invalid.asm.spv16.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _7 +{ + ulong _m0; + ulong _m1; +}; + +typedef struct _7 _7; + +__global uint* _23(__global _7* _2) +{ + __global uint* _29 = ((__global uint*)((ulong)(((__global uchar*)(_2->_m1))) + 16ul)); + *_29 = 1u; + return _29; +} + +__attribute__((reqd_work_group_size(16, 16, 1))) +__kernel void comp_main(__global _7* _2) +{ + __global uint* _31 = _23(_2); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.invalid.asm.comp new file mode 100644 index 000000000..b02f295d9 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.invalid.asm.comp @@ -0,0 +1,22 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +struct SSBO +{ + half2 a; + float b; + float c; + half2 d; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _6) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _6->b = uintBitsToFloat(packFloat2x16(_6->a)); + _6->d = unpackFloat2x16(floatBitsToUint(_6->c)); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/bitfield-signed-operations.asm.comp new file mode 100644 index 000000000..3b5bd0001 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/bitfield-signed-operations.asm.comp @@ -0,0 +1,38 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + int4 ints; + uint4 uints; +}; + +typedef struct SSBO SSBO; + +uint spvBitReverse(uint v) { + v = ((v >> 1u) & 0x55555555u) | ((v & 0x55555555u) << 1u); + v = ((v >> 2u) & 0x33333333u) | ((v & 0x33333333u) << 2u); + v = ((v >> 4u) & 0x0F0F0F0Fu) | ((v & 0x0F0F0F0Fu) << 4u); + v = ((v >> 8u) & 0x00FF00FFu) | ((v & 0x00FF00FFu) << 8u); + return (v >> 16u) | (v << 16u); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _4) +{ + int4 _19 = _4->ints; + uint4 _20 = _4->uints; + _4->ints = popcount(_19); + _4->uints = as_uint4(popcount(_19)); + _4->ints = as_int4(popcount(_20)); + _4->uints = popcount(_20); + _4->ints = as_int4((uint4)(spvBitReverse(as_uint4(_19).s0), spvBitReverse(as_uint4(_19).s1), spvBitReverse(as_uint4(_19).s2), spvBitReverse(as_uint4(_19).s3))); + _4->uints = (uint4)(spvBitReverse(_20.s0), spvBitReverse(_20.s1), spvBitReverse(_20.s2), spvBitReverse(_20.s3)); + _4->ints = (_19 << (32 - 11u - 1)) >> (32 - 11u); + _4->uints = as_uint4((as_int4(_20) << (32 - 1 - 11u)) >> (32 - 1)); + _4->ints = as_int4((as_uint4(_19) >> 1) & ((uint4)(1u << 11u) - (uint4)1u)); + _4->uints = (_20 >> 11u) & ((uint4)(1u << 1) - (uint4)1u); + _4->ints = as_int4((as_uint4(_19) & ~(((uint4)(1u << 11u) - (uint4)1u) << 1)) | ((as_uint4(_19.wzyx) << 1) & (((uint4)(1u << 11u) - (uint4)1u) << 1))); + _4->uints = (_20 & ~(((uint4)(1u << 1) - (uint4)1u) << 11u)) | ((_20.wzyx << 11u) & (((uint4)(1u << 1) - (uint4)1u) << 11u)); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/bitscan.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/bitscan.asm.comp new file mode 100644 index 000000000..f538fab1a --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/bitscan.asm.comp @@ -0,0 +1,35 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + uint4 u; + int4 i; +}; + +typedef struct SSBO SSBO; + +static int spvFindLSB(uint x) { + if (x == 0u) return -1; + return 31 - as_int(clz(x & (0u - x))); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _6) +{ + uint4 _19 = _6->u; + int4 _20 = _6->i; + _6->u = (uint4)(spvFindLSB(_19.x), spvFindLSB(_19.y), spvFindLSB(_19.z), spvFindLSB(_19.w)); + _6->i = (int4)(spvFindLSB(_19.x), spvFindLSB(_19.y), spvFindLSB(_19.z), spvFindLSB(_19.w)); + _6->u = (uint4)(spvFindLSB(as_uint(_20.x)), spvFindLSB(as_uint(_20.y)), spvFindLSB(as_uint(_20.z)), spvFindLSB(as_uint(_20.w))); + _6->i = (int4)(spvFindLSB(as_uint(_20.x)), spvFindLSB(as_uint(_20.y)), spvFindLSB(as_uint(_20.z)), spvFindLSB(as_uint(_20.w))); + _6->u = as_uint4(31 - as_int4(clz(_19))); + _6->i = 31 - as_int4(clz(_19)); + _6->u = as_uint4(31 - as_int4(clz(as_uint4(_20)))); + _6->i = 31 - as_int4(clz(as_uint4(_20))); + _6->u = as_uint4(31 - clz(as_int4(_19) ^ (as_int4(_19) >> 31))); + _6->i = 31 - clz(as_int4(_19) ^ (as_int4(_19) >> 31)); + _6->u = as_uint4(31 - clz(_20 ^ (_20 >> 31))); + _6->i = 31 - clz(_20 ^ (_20 >> 31)); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp new file mode 100644 index 000000000..4ba95f9d4 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp @@ -0,0 +1,41 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct type_CommonConstants +{ + uint g_count; + uint3 g_padding4; +}; + +typedef struct type_CommonConstants type_CommonConstants; + +struct MyStruct +{ + float4 m_coefficients[4]; +}; + +typedef struct MyStruct MyStruct; + +struct type_RWStructuredBuffer_MyStruct +{ + MyStruct _m0[1]; +}; + +typedef struct type_RWStructuredBuffer_MyStruct type_RWStructuredBuffer_MyStruct; + +constant float4 _27[4] = { (float4)(0.0f), (float4)(0.0f), (float4)(0.0f), (float4)(0.0f) }; + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void comp_main(type_CommonConstants CommonConstants, __global MyStruct* g_data) +{ + do + { + if (((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x >= CommonConstants.g_count) + { + break; + } + g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = (MyStruct){ { (float4)(0.0f), (float4)(0.0f), (float4)(0.0f), (float4)(0.0f) } }; + break; + } while(false); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.invalid.asm.comp new file mode 100644 index 000000000..421377b4d --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.invalid.asm.comp @@ -0,0 +1,37 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _12 +{ + float _m0[4]; + float _m1[4]; +}; + +typedef struct _12 _12; + +constant float _36[4] = { 1.0f, 2.0f, 3.0f, 4.0f }; +constant _12 _39[2] = { (_12){ { 1.0f, 2.0f, 3.0f, 4.0f }, { 1.0f, 2.0f, 3.0f, 4.0f } }, (_12){ { 1.0f, 2.0f, 3.0f, 4.0f }, { 1.0f, 2.0f, 3.0f, 4.0f } } }; + +struct SSBO +{ + uint a; + int b; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _8) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float foo[4]; + float foo2[4]; + foo[0] = 1.0f; + foo = { 1.0f, 2.0f, 3.0f, 4.0f }; + foo[1] = 2.0f; + foo[2] = 3.0f; + foo[3] = 4.0f; + foo2 = foo; + _12 _41 = (_12){ { foo[0], foo[1], foo[2], foo[3] }, { foo2[0], foo2[1], foo2[2], foo2[3] } }; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/buffer-device-address-ptr-casting.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/buffer-device-address-ptr-casting.asm.comp new file mode 100644 index 000000000..bd7015044 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/buffer-device-address-ptr-casting.asm.comp @@ -0,0 +1,36 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SomeBuffer; + +struct SomeBuffer +{ + float4 v; + ulong a; + uint2 b; +}; + +typedef struct SomeBuffer SomeBuffer; + +struct Registers +{ + ulong address; + uint2 address2; +}; + +typedef struct Registers Registers; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(Registers registers) +{ + __global SomeBuffer* _44 = ((__global SomeBuffer*)(registers.address)); + __global SomeBuffer* _45 = ((__global SomeBuffer*)(registers.address)); + __global SomeBuffer* _46 = ((__global SomeBuffer*)as_ulong(registers.address2)); + _44->v = (float4)(1.0f, 2.0f, 3.0f, 4.0f); + _45->v = (float4)(1.0f, 2.0f, 3.0f, 4.0f); + _46->v = (float4)(1.0f, 2.0f, 3.0f, 4.0f); + _44->a = (ulong)(_44); + _45->a = (ulong)((ulong)(_45)); + _46->b = as_uint2((ulong)(_46)); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/composite-construct-buffer-struct.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/composite-construct-buffer-struct.asm.invalid.comp new file mode 100644 index 000000000..c9bf180b8 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/composite-construct-buffer-struct.asm.invalid.comp @@ -0,0 +1,25 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Block +{ + uint2 _m0[2]; + uint2 _m1[2]; +}; + +typedef struct Block Block; + +struct SSBO +{ + Block _m0[3]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global Block* ssbo) +{ + __local uint2 _18[2]; + ssbo[0u] = (Block){ { ssbo[0u]._m1[0], ssbo[0u]._m1[1] }, { ssbo[0u]._m1[0], ssbo[0u]._m1[1] } }; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/constant-composite-undef.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/constant-composite-undef.asm.comp new file mode 100644 index 000000000..b59d2814c --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/constant-composite-undef.asm.comp @@ -0,0 +1,18 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Block +{ + float4 f; +}; + +typedef struct Block Block; + +constant float _15 = 0; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float4* block) +{ + block[0] = (float4)(0.100000001490116119384765625f, 0.20000000298023223876953125f, 0.300000011920928955078125f, 0.0f); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp new file mode 100644 index 000000000..6e0851769 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp @@ -0,0 +1,20 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + int values[1]; +}; + +typedef struct SSBO SSBO; + +constant int indexable[4] = { 0, 1, 2, 3 }; +constant int indexable_1[4] = { 4, 5, 6, 7 }; + +__attribute__((reqd_work_group_size(4, 4, 1))) +__kernel void comp_main(__global int* _8) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _8[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = indexable[((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))).x] + indexable_1[((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))).y]; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp new file mode 100644 index 000000000..e237c895e --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp @@ -0,0 +1,83 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float2 columns[2]; } spvMat2; + +struct _13 +{ + spvMat2 _m0; +}; + +typedef struct _13 _13; + +struct _14 +{ + spvMat2 _m0; +}; + +typedef struct _14 _14; + +struct B2 +{ + float4 elem2; +}; + +typedef struct B2 B2; + +struct C +{ + float4 c; + B2 b2; + B2 b2_array[4]; + _14 _m3; +}; + +typedef struct C C; + +struct B1 +{ + float4 elem1; +}; + +typedef struct B1 B1; + +struct A +{ + float4 a; + B1 b1; + B1 b1_array[4]; + _13 _m3; +}; + +typedef struct A A; + +struct _10 +{ + A a_block; + C c_block; +}; + +typedef struct _10 _10; + +static spvMat2 spvTransposeMat2(spvMat2 m) +{ + spvMat2 r; + r.columns[0] = (float2)(m.columns[0].x, m.columns[1].x); + r.columns[1] = (float2)(m.columns[0].y, m.columns[1].y); + return r; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _10* _4) +{ + A _24; + _24.a = _4->c_block.c; + _24.b1.elem1 = _4->c_block.b2.elem2; + _24.b1_array[0].elem1 = _4->c_block.b2_array[0].elem2; + _24.b1_array[1].elem1 = _4->c_block.b2_array[1].elem2; + _24.b1_array[2].elem1 = _4->c_block.b2_array[2].elem2; + _24.b1_array[3].elem1 = _4->c_block.b2_array[3].elem2; + _24._m3._m0 = spvTransposeMat2(_4->c_block._m3._m0); + _4->a_block = _24; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp new file mode 100644 index 000000000..c411b48d0 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp @@ -0,0 +1,61 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float4 columns[4]; } spvMat4; + +struct _9 +{ + uint _m0; +}; + +typedef struct _9 _9; + +struct _10 +{ + uint _m0; +}; + +typedef struct _10 _10; + +struct _5 +{ + uint _m0; + uint _m1[2]; + uint _m2; + _9 _m3; + float4 _m4; + float3 _m5; + float2 _m6; +}; + +typedef struct _5 _5; + +struct _6 +{ + uint _m0; + uint _m1[2]; + uint _m2; + _10 _m3; + float4 _m4; + float3 _m5; + float2 _m6; +}; + +typedef struct _6 _6; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _6* _3, __global _5* _4) +{ + _6 _22 = (*_3); + _5 _23; + _23._m0 = _22._m0; + _23._m1[0] = _22._m1[0]; + _23._m1[1] = _22._m1[1]; + _23._m2 = _22._m2; + _23._m3._m0 = _22._m3._m0; + _23._m4 = _22._m4; + _23._m5 = _22._m5; + _23._m6 = _22._m6; + *_4 = _23; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/copy-logical.spv14.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/copy-logical.spv14.asm.comp new file mode 100644 index 000000000..069e04c31 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/copy-logical.spv14.asm.comp @@ -0,0 +1,56 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct B2 +{ + float4 elem2; +}; + +typedef struct B2 B2; + +struct C +{ + float4 c; + B2 b2; + B2 b2_array[4]; +}; + +typedef struct C C; + +struct B1 +{ + float4 elem1; +}; + +typedef struct B1 B1; + +struct A +{ + float4 a; + B1 b1; + B1 b1_array[4]; +}; + +typedef struct A A; + +struct _10 +{ + A a_block; + C c_block; +}; + +typedef struct _10 _10; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _10* _4) +{ + A _24; + _24.a = _4->c_block.c; + _24.b1.elem1 = _4->c_block.b2.elem2; + _24.b1_array[0].elem1 = _4->c_block.b2_array[0].elem2; + _24.b1_array[1].elem1 = _4->c_block.b2_array[1].elem2; + _24.b1_array[2].elem1 = _4->c_block.b2_array[2].elem2; + _24.b1_array[3].elem1 = _4->c_block.b2_array[3].elem2; + _4->a_block = _24; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.asm.invalid.comp new file mode 100644 index 000000000..e4387c0c9 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.asm.invalid.comp @@ -0,0 +1,29 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Block +{ + uint2 _m0[2]; + uint2 _m1[2]; +}; + +typedef struct Block Block; + +struct SSBO +{ + Block _m0[3]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global Block* ssbo) +{ + __local uint2 _18[2]; + uint2 _27[2]; + _27[0] = ssbo[0u]._m1[0]; + _27[1] = ssbo[0u]._m1[1]; + ssbo[0u]._m0 = _27; + ssbo[0u]._m0 = _27; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.invalid.comp new file mode 100644 index 000000000..e4387c0c9 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.invalid.comp @@ -0,0 +1,29 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Block +{ + uint2 _m0[2]; + uint2 _m1[2]; +}; + +typedef struct Block Block; + +struct SSBO +{ + Block _m0[3]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global Block* ssbo) +{ + __local uint2 _18[2]; + uint2 _27[2]; + _27[0] = ssbo[0u]._m1[0]; + _27[1] = ssbo[0u]._m1[1]; + ssbo[0u]._m0 = _27; + ssbo[0u]._m0 = _27; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.asm.invalid.comp new file mode 100644 index 000000000..f8a5f221b --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.asm.invalid.comp @@ -0,0 +1,33 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Block +{ + uint2 _m0[2]; + uint2 _m1[2]; +}; + +typedef struct Block Block; + +struct SSBO +{ + Block _m0[3]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global Block* ssbo, SSBO ubo) +{ + __local uint2 _18[2]; + ssbo[0u]._m0 = ssbo[0u]._m1; + ssbo[0u]._m0 = ubo._m0[0u]._m1; + uint2 _23[2]; + ssbo[0u]._m0 = _23; + ssbo[0u]._m0 = _18; + _18 = ssbo[0u]._m1; + _23 = ssbo[0u]._m1; + _18 = ubo._m0[0u]._m1; + _23 = ubo._m0[0u]._m1; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.invalid.comp new file mode 100644 index 000000000..f8a5f221b --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.invalid.comp @@ -0,0 +1,33 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Block +{ + uint2 _m0[2]; + uint2 _m1[2]; +}; + +typedef struct Block Block; + +struct SSBO +{ + Block _m0[3]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global Block* ssbo, SSBO ubo) +{ + __local uint2 _18[2]; + ssbo[0u]._m0 = ssbo[0u]._m1; + ssbo[0u]._m0 = ubo._m0[0u]._m1; + uint2 _23[2]; + ssbo[0u]._m0 = _23; + ssbo[0u]._m0 = _18; + _18 = ssbo[0u]._m1; + _23 = ssbo[0u]._m1; + _18 = ubo._m0[0u]._m1; + _23 = ubo._m0[0u]._m1; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp new file mode 100644 index 000000000..0d6820a4f --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp @@ -0,0 +1,23 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct UBO +{ + float v; +}; + +typedef struct UBO UBO; + +struct SSBO +{ + float v; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void comp_main(__global float* ssbo) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/glsl-signed-operations.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/glsl-signed-operations.asm.comp new file mode 100644 index 000000000..e59e39dae --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/glsl-signed-operations.asm.comp @@ -0,0 +1,50 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + int4 ints; + uint4 uints; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _6) +{ + int4 _19 = _6->ints; + uint4 _20 = _6->uints; + _6->ints = as_int4(abs(_19)); + _6->uints = abs(_19); + _6->ints = as_int4(abs(as_int4(_20))); + _6->uints = abs(as_int4(_20)); + _6->ints = clamp(_19, -1, 1); + _6->uints = as_uint4(clamp(_19, -1, 1)); + _6->ints = clamp(as_int4(_20), -1, 1); + _6->uints = as_uint4(clamp(as_int4(_20), -1, 1)); + _6->ints = 31 - clz(as_int4(_20) ^ (as_int4(_20) >> 31)); + _6->uints = as_uint4(31 - clz(as_int4(_20) ^ (as_int4(_20) >> 31))); + _6->ints = 31 - as_int4(clz(as_uint4(_19))); + _6->uints = as_uint4(31 - as_int4(clz(as_uint4(_19)))); + _6->ints = min(_19, _19); + _6->uints = as_uint4(min(_19, as_int4(_20))); + _6->ints = min(as_int4(_20), as_int4(_20)); + _6->uints = as_uint4(min(as_int4(_20), _19)); + _6->ints = as_int4(min(as_uint4(_19), _20)); + _6->uints = min(as_uint4(_19), _20); + _6->ints = as_int4(min(_20, as_uint4(_19))); + _6->uints = min(_20, as_uint4(_19)); + _6->ints = max(_19, _19); + _6->uints = as_uint4(max(_19, _19)); + _6->ints = max(as_int4(_20), _19); + _6->uints = as_uint4(max(as_int4(_20), _19)); + _6->ints = as_int4(max(as_uint4(_19), _20)); + _6->uints = max(as_uint4(_19), as_uint4(_19)); + _6->ints = as_int4(max(_20, as_uint4(_19))); + _6->uints = max(_20, as_uint4(_19)); + _6->ints = clamp(as_int4(_20), as_int4(_20), as_int4(_20)); + _6->uints = as_uint4(clamp(as_int4(_20), as_int4(_20), as_int4(_20))); + _6->ints = as_int4(clamp(as_uint4(_19), as_uint4(_19), as_uint4(_19))); + _6->uints = clamp(as_uint4(_19), as_uint4(_19), as_uint4(_19)); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp new file mode 100644 index 000000000..f44d85023 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp @@ -0,0 +1,40 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _9 +{ + float _m0; + float _m1; +}; + +typedef struct _9 _9; + +struct _16 +{ + float _m0; + int _m1; +}; + +typedef struct _16 _16; + +struct _4 +{ + float _m0; + int _m1; +}; + +typedef struct _4 _4; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _4* _6) +{ + _9 _23; + _23._m0 = modf(20.0f, &_23._m1); + _16 _24; + _24._m0 = frexp(40.0f, &_24._m1); + _6->_m0 = _23._m0; + _6->_m0 = _23._m1; + _6->_m0 = _24._m0; + _6->_m1 = _24._m1; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp new file mode 100644 index 000000000..cedb4d5d6 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp @@ -0,0 +1,38 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Data +{ + float3 sourceData[16]; +}; + +typedef struct Data Data; + +__attribute__((reqd_work_group_size(8, 8, 1))) +__kernel void comp_main(read_only image2d_t g_inputTexture, write_only image2d_t g_output) +{ + __local Data g_data[64]; + uint _49; + _49 = 0u; + for (; _49 < 4u; _49++) + { + for (uint _56 = 0u; _56 < 4u; ) + { + int3 _65 = as_int3(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2)))) + (int3)(as_int(_56), as_int(_49), 0); + g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[(_49 * 4u) + _56] = texelFetch(g_inputTexture, _65.xy, _65.z).xyz; + _56++; + continue; + } + } + float3 _45[16] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData; + uint _77; + _77 = 0u; + for (int _80 = 0; _80 < 16; ) + { + _77 |= convert_uint(clamp(dot(_45[_80], (float3)(-1.0f)), 0.0f, 1.0f)); + _80++; + continue; + } + write_imageui(g_output, as_int2(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).xy), (uint4)(_77)); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/image-atomic-mismatch-sign.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/image-atomic-mismatch-sign.asm.invalid.comp new file mode 100644 index 000000000..e69de29bb diff --git a/reference/shaders-opencl-no-opt/asm/comp/local-size-id-override.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/local-size-id-override.asm.comp new file mode 100644 index 000000000..22b5b4066 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/local-size-id-override.asm.comp @@ -0,0 +1,34 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 values[1]; +}; + +typedef struct SSBO SSBO; + +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 11u +#endif +constant uint _12 = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 12u +#endif +constant uint _13 = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 13u +#endif +constant uint _6 = SPIRV_CROSS_CONSTANT_ID_3; +#ifndef SPIRV_CROSS_CONSTANT_ID_4 +#define SPIRV_CROSS_CONSTANT_ID_4 14u +#endif +constant uint _7 = SPIRV_CROSS_CONSTANT_ID_4; +constant uint3 spvWorkgroupSize = (uint3)(3u, _12, _13); + +__attribute__((reqd_work_group_size(3, 11, 12))) +__kernel void comp_main(__global float4* _10) +{ + _10[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] += (float4)(2.0f); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/local-size-id.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/local-size-id.asm.invalid.comp new file mode 100644 index 000000000..8e6e38e20 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/local-size-id.asm.invalid.comp @@ -0,0 +1,35 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float4 values[1]; +}; + +typedef struct SSBO SSBO; + +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 11 +#endif +constant int _12 = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 12 +#endif +constant int _13 = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 13 +#endif +constant int _6 = SPIRV_CROSS_CONSTANT_ID_3; +#ifndef SPIRV_CROSS_CONSTANT_ID_4 +#define SPIRV_CROSS_CONSTANT_ID_4 14 +#endif +constant int _7 = SPIRV_CROSS_CONSTANT_ID_4; +#define _37 ((as_uint(_6) + 3u)) +constant uint3 _38 = (uint3)(_37, _7, 2u); + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float4* _10) +{ + _10[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = ((((_10[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] + (float4)(2.0f)) + convert_float3(_38).xyzz) * convert_float(_6)) * convert_float(_7)) * convert_float(2u); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/modf-storage-class.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/modf-storage-class.asm.comp new file mode 100644 index 000000000..7522b61b7 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/modf-storage-class.asm.comp @@ -0,0 +1,49 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _19 +{ + float2 _m0; + float2 _m1; +}; + +typedef struct _19 _19; + +struct _6 +{ + uint2 _m0[324]; +}; + +typedef struct _6 _6; + +struct _9 +{ + float2 _m0[648]; +}; + +typedef struct _9 _9; + +struct _13 +{ + float2 _m0[648]; +}; + +typedef struct _13 _13; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const uint2* _7, __global float2* _11, __global float2* _14) +{ + for (uint _46 = 0u; _46 < 648u; _46 += 2u) + { + uint2 _47 = _7[_46 / 2u]; + float2 _48 = as_float2(_47); + float2 _69 = modf(_48, &_11[_46]); + _11[_46 + 1u] = _69; + _19 _74; + _74._m0 = modf(_48, &_74._m1); + _19 _50 = _74; + _14[_46] = _50._m1; + _14[_46 + 1u] = _50._m0; + } +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/opptrdiff-basic.spv14.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/opptrdiff-basic.spv14.invalid.asm.comp new file mode 100644 index 000000000..32195c0b5 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/opptrdiff-basic.spv14.invalid.asm.comp @@ -0,0 +1,56 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _7 +{ + int _m0[1][4]; +}; + +typedef struct _7 _7; + +struct _9 +{ + int _m0[1][17]; +}; + +typedef struct _9 _9; + +struct _11 +{ + int _m0; +}; + +typedef struct _11 _11; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global int* _2, __global int* _3, _11 _4) +{ + if (as_int3(((uint3)(get_group_id(0), get_group_id(1), get_group_id(2)))).x >= _4._m0) + { + return; + } + int _49; + if (as_int3(((uint3)(get_local_id(0), get_local_id(1), get_local_id(2)))).x == 1) + { + _3[as_int3(((uint3)(get_group_id(0), get_group_id(1), get_group_id(2)))).x][16] = &_2[as_int3(((uint3)(get_group_id(0), get_group_id(1), get_group_id(2)))).x] - &_2[0]; + _49 = 0; + } + else + { + _49 = 0; + } + for (;;) + { + int _50 = _49 + 1; + _3[as_int3(((uint3)(get_group_id(0), get_group_id(1), get_group_id(2)))).x][(as_int3(((uint3)(get_local_id(0), get_local_id(1), get_local_id(2)))).x * 4) + _49] = &_2[as_int3(((uint3)(get_group_id(0), get_group_id(1), get_group_id(2)))).x][as_int3(((uint3)(get_local_id(0), get_local_id(1), get_local_id(2)))).x] - &_2[as_int3(((uint3)(get_group_id(0), get_group_id(1), get_group_id(2)))).x][_49]; + if (_50 == 4) + { + break; + } + else + { + _49 = _50; + } + } +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp new file mode 100644 index 000000000..eb979ab04 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp @@ -0,0 +1,50 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _7 +{ + int _m0; + int _m1[1]; +}; + +typedef struct _7 _7; + +struct _9 +{ + int2 _m0[1]; +}; + +typedef struct _9 _9; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _7* _2, __global int2* _3) +{ + __global int* _4; + __global int* _5; + int _28 = _2->_m0; + _4 = &_2->_m1[0]; + _5 = &_2->_m1[0 + _28]; + int _34; + if (!(_28 <= 0)) + { + _34 = 0; + for (;;) + { + __global int* _36 = _4; + __global int* _37 = _5; + int _35 = _34 + 1; + _4 = &_36[1]; + _5 = &_37[-1]; + _3[_34] = (int2)(_36 - _37, _37 - _36); + if (_34 >= _28) + { + break; + } + else + { + _34 = _35; + } + } + } +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp new file mode 100644 index 000000000..3f60f0746 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp @@ -0,0 +1,34 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _7 +{ + uint _m0[1]; +}; + +typedef struct _7 _7; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _2, __global uint* _3, __global uint* _4, __global uint* _5) +{ + uint _18 = 0u; + uint _28 = _18 + 1u; + _5[_18] = (uint)(&_2 == &_3); + uint _32 = _28 + 1u; + _5[_28] = (uint)(&_2[0] == &_3[0]); + uint _36 = _32 + 1u; + _5[_32] = (uint)(&_2[0u] == &_3[0u]); + uint _40 = _36 + 1u; + _5[_36] = (uint)(&_2 == &_4); + uint _44 = _40 + 1u; + _5[_40] = (uint)(&_2[0] == &_4[0]); + uint _48 = _44 + 1u; + _5[_44] = (uint)(&_2[0u] == &_4[0u]); + uint _52 = _48 + 1u; + _5[_48] = (uint)(&_3 == &_4); + uint _56 = _52 + 1u; + _5[_52] = (uint)(&_3[0] == &_4[0]); + _5[_56] = (uint)(&_3[0u] == &_4[0u]); + _5[_56 + 1u] = (uint)(&_2 == &_2); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp new file mode 100644 index 000000000..0f41e332f --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp @@ -0,0 +1,52 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float4 columns[4]; } spvMat4; + +struct _6 +{ + spvMat4 _m0; + spvMat4 _m1; + float _m2; + float _m3; +}; + +typedef struct _6 _6; + +struct _7 +{ + uint _m0[1]; +}; + +typedef struct _7 _7; + +static spvMat4 spvTransposeMat4(spvMat4 m) +{ + spvMat4 r; + r.columns[0] = (float4)(m.columns[0].x, m.columns[1].x, m.columns[2].x, m.columns[3].x); + r.columns[1] = (float4)(m.columns[0].y, m.columns[1].y, m.columns[2].y, m.columns[3].y); + r.columns[2] = (float4)(m.columns[0].z, m.columns[1].z, m.columns[2].z, m.columns[3].z); + r.columns[3] = (float4)(m.columns[0].w, m.columns[1].w, m.columns[2].w, m.columns[3].w); + return r; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global _6* _2, __global _6* _3, __global uint* _4) +{ + uint _26 = 0u; + uint _39 = _26 + 1u; + _4[_26] = (&_2->_m2 == &_2->_m3) ? 0u : 1u; + bool _40 = &_2->_m2 == &_3->_m2; + uint _43 = _39 + 1u; + _4[_39] = _40 ? 0u : 1u; + bool _46 = _40 ? &_2->_m2 : &_2->_m3 == _40 ? &_3->_m2 : &_3->_m3; + uint _49 = _43 + 1u; + _4[_43] = _46 ? 0u : 1u; + uint _54 = _49 + 1u; + _4[_49] = (_46 ? &_2->_m2 : &_2->_m3 == &_2->_m0.columns[0u].x) ? 0u : 1u; + uint _56 = (&_2->_m0 == &spvTransposeMat4(_2->_m1)) ? 0u : 1u; + uint _58 = _54 + 1u; + _4[_54] = _56; + _4[_58] = _56; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp new file mode 100644 index 000000000..2d46e33bc --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp @@ -0,0 +1,34 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _7 +{ + uint _m0[1]; +}; + +typedef struct _7 _7; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _2, __global uint* _3, __global uint* _4, __global uint* _5) +{ + uint _18 = 0u; + uint _28 = _18 + 1u; + _5[_18] = (uint)(&_2 != &_3); + uint _32 = _28 + 1u; + _5[_28] = (uint)(&_2[0] != &_3[0]); + uint _36 = _32 + 1u; + _5[_32] = (uint)(&_2[0u] != &_3[0u]); + uint _40 = _36 + 1u; + _5[_36] = (uint)(&_2 != &_4); + uint _44 = _40 + 1u; + _5[_40] = (uint)(&_2[0] != &_4[0]); + uint _48 = _44 + 1u; + _5[_44] = (uint)(&_2[0u] != &_4[0u]); + uint _52 = _48 + 1u; + _5[_48] = (uint)(&_3 != &_4); + uint _56 = _52 + 1u; + _5[_52] = (uint)(&_3[0] != &_4[0]); + _5[_56] = (uint)(&_3[0u] != &_4[0u]); + _5[_56 + 1u] = (uint)(&_2 != &_2); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/ptr-access-chain-custom-array-stride.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/ptr-access-chain-custom-array-stride.asm.comp new file mode 100644 index 000000000..9d37c04c1 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/ptr-access-chain-custom-array-stride.asm.comp @@ -0,0 +1,21 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Registers +{ + ulong a; + ulong b; + uint2 c; + uint2 d; +}; + +typedef struct Registers Registers; + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void comp_main(Registers _7) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + *((__global float3*)((ulong)(((__global float3*)(_7.a))) + ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x * 12)) = (*((__global float3*)((ulong)(((__global float3*)(_7.a))) + ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x * 12))) + ((__global float3*)(_7.b))[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; + *((__global float3*)((ulong)(((__global float3*)as_ulong(_7.c))) + ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x * 12)) = (*((__global float3*)((ulong)(((__global float3*)as_ulong(_7.c))) + ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x * 12))) + ((__global float3*)as_ulong(_7.d))[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp new file mode 100644 index 000000000..d07a53e83 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp @@ -0,0 +1,48 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + int values[1]; +}; + +typedef struct SSBO SSBO; + +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 0 +#endif +constant int A = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 1 +#endif +constant int A_1 = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 2 +#endif +constant int A_2 = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 3 +#endif +constant int A_3 = SPIRV_CROSS_CONSTANT_ID_3; +#ifndef SPIRV_CROSS_CONSTANT_ID_4 +#define SPIRV_CROSS_CONSTANT_ID_4 4 +#endif +constant int A_4 = SPIRV_CROSS_CONSTANT_ID_4; +#ifndef SPIRV_CROSS_CONSTANT_ID_5 +#define SPIRV_CROSS_CONSTANT_ID_5 5 +#endif +constant int A_5 = SPIRV_CROSS_CONSTANT_ID_5; +#define A_6 ((A - A_1)) +#define A_7 ((A_6 - A_2)) +#define A_8 ((A_7 - A_3)) +#define A_9 ((A_8 - A_4)) +#define A_10 ((A_9 - A_5)) +#define A_11 ((A_10 + A_5)) + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global int* _7) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _7[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = A_11; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp new file mode 100644 index 000000000..952585e08 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp @@ -0,0 +1,27 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _3 +{ + float _m0[1]; +}; + +typedef struct _3 _3; + +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 1u +#endif +constant uint _15 = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 3u +#endif +constant uint _17 = SPIRV_CROSS_CONSTANT_ID_2; +constant uint3 spvWorkgroupSize = (uint3)(_15, 2u, _17); + +__attribute__((reqd_work_group_size(1, 2, 3))) +__kernel void comp_main(__global float* _20, __global float* _21) +{ + uint3 _19 = spvWorkgroupSize = spvWorkgroupSize; + _20[((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x] = _21[((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x] + _20[((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x]; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp new file mode 100644 index 000000000..9a56784a5 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp @@ -0,0 +1,28 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float a; +}; + +typedef struct SSBO SSBO; + +struct SSBORead +{ + float b; +}; + +typedef struct SSBORead SSBORead; + +void copy_out(__global float* A_1, __global float* B_1) +{ + *A_1 = *B_1; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float* _10, __global const float* _14) +{ + copy_out(&_10[0], &_14[0]); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp16.fp16.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp16.fp16.asm.comp new file mode 100644 index 000000000..230929e90 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp16.fp16.asm.comp @@ -0,0 +1,35 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +struct SSBO +{ + float v[4]; + half f16[4]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global SSBO* _9) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = cos(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sin(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += tan(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += acos(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += asin(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += atan(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += exp(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += exp2(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += log(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += log2(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sqrt(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += rsqrt(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += pow(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))], 4.0f); + _9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = cos(_9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sin(_9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += cosh(_9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sinh(_9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp32.fp16.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp32.fp16.asm.comp new file mode 100644 index 000000000..230929e90 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp32.fp16.asm.comp @@ -0,0 +1,35 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +struct SSBO +{ + float v[4]; + half f16[4]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global SSBO* _9) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = cos(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sin(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += tan(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += acos(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += asin(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += atan(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += exp(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += exp2(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += log(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += log2(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sqrt(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += rsqrt(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += pow(_9->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))], 4.0f); + _9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = cos(_9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sin(_9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += cosh(_9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sinh(_9->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-2.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-2.asm.comp new file mode 100644 index 000000000..dfbbef692 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-2.asm.comp @@ -0,0 +1,16 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void comp_main() +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local float2 test[64]; + float _21 = convert_float(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x); + float2 _22 = (float2)(_21); + ((&((&test)[0u]))[0u])[1u + 2u] = _22; + ((&test)[0u])[1u + 2u] = _22; + ((&test)[0u])[3u] = _22; + ((&test)[0u])[2u + 1u].x = _21; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-3.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-3.invalid.asm.comp new file mode 100644 index 000000000..99ce6ceff --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-3.invalid.asm.comp @@ -0,0 +1,12 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void comp_main() +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local float2 test[64]; + float _21 = convert_float(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x); + (true ? &((&test)[0u])[2u].x : &((&test)[0u])[2u].x)[1u] = _21; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-vector-to-scalar.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-vector-to-scalar.asm.comp new file mode 100644 index 000000000..5af73eb76 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-vector-to-scalar.asm.comp @@ -0,0 +1,12 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void comp_main() +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local float2 test[64]; + float _21 = convert_float(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x); + (*(true ? &test[1u] : &test[2u])).y = _21; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/variable-pointers.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers.asm.invalid.comp new file mode 100644 index 000000000..7b9c6e61d --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers.asm.invalid.comp @@ -0,0 +1,77 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct foo +{ + int a[128]; + uint b; + float2 c; +}; + +typedef struct foo foo; + +struct bar +{ + int d; +}; + +typedef struct bar bar; + +struct baz +{ + int e[128]; +}; + +typedef struct baz baz; + +__global int* select_buffer(__global foo* buf, __global int* buf2, bar cb) +{ + return (cb.d != 0) ? &buf->a[0u] : &buf2[0u]; +} + +__global int* select_buffer_null(__global foo* buf, bar cb) +{ + return (cb.d != 0) ? &buf->a[0u] : NULL; +} + +__local int* select_tgsm(bar cb, __local int* tgsm) +{ + return (cb.d != 0) ? &tgsm[0u] : NULL; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global foo* buf, bar cb, __global int* buf2) +{ + __local int tgsm[128]; + __global int* sbuf_1_1; + __global int* sbuf2_1_1; + __local int* stgsm_1_1; + sbuf_1_1 = select_buffer(buf, buf2, cb); + sbuf2_1_1 = select_buffer_null(buf, cb); + stgsm_1_1 = select_tgsm(cb, tgsm); + __local int* cur_1_1 = stgsm_1_1; + __global int* _78; + _78 = &buf->a[0u]; + __local int* _81; + int _82; + for (;;) + { + _81 = cur_1_1; + _82 = *_78; + if (_82 != 0) + { + int _86 = *_81; + int _87 = _82 + _86; + *_78 = _87; + *_81 = _87; + cur_1_1 = &_81[1u]; + _78 = &_78[1u]; + continue; + } + else + { + break; + } + } +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-argument.spv16.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-argument.spv16.asm.comp new file mode 100644 index 000000000..86ba9b715 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-argument.spv16.asm.comp @@ -0,0 +1,21 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _3 +{ + uchar _m0[1]; +}; + +typedef struct _3 _3; + +void _20(__global uchar* _21) +{ + _21[2u] = (uchar)(0); +} + +__attribute__((reqd_work_group_size(16, 1, 1))) +__kernel void comp_main(__global uchar* _2) +{ + _20(&_2[1u]); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.invalid.asm.comp new file mode 100644 index 000000000..2b20027cd --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.invalid.asm.comp @@ -0,0 +1,21 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct _3 +{ + uchar _m0[16]; +}; + +typedef struct _3 _3; + +void _20(__global uchar* _21[16]) +{ + (*_21)[2u] = (uchar)(0); +} + +__attribute__((reqd_work_group_size(16, 1, 1))) +__kernel void comp_main(__global uchar* _2) +{ + _20(&_2[0]); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/workgroup-uint-to-uchar-alias-ptr-access-chain.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/workgroup-uint-to-uchar-alias-ptr-access-chain.asm.invalid.comp new file mode 100644 index 000000000..c6c583b90 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/workgroup-uint-to-uchar-alias-ptr-access-chain.asm.invalid.comp @@ -0,0 +1,145 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 1u +#endif +constant uint _15 = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 1u +#endif +constant uint _16 = SPIRV_CROSS_CONSTANT_ID_1; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 1u +#endif +constant uint _17 = SPIRV_CROSS_CONSTANT_ID_2; +constant uint3 spvWorkgroupSize = (uint3)(_15, _16, _17); + +struct _6 +{ + uint4 _m0[1]; +}; + +typedef struct _6 _6; + +struct _7 +{ + uint _m0; +}; + +typedef struct _7 _7; + +struct _8 +{ + _7 _m0; +}; + +typedef struct _8 _8; + +constant uchar4 _137 = (uchar4)(0); + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint4* _25, _8 _29) +{ + __local uint _5[256]; + __local uchar _10[1024]; + uint3 _20 = spvWorkgroupSize = spvWorkgroupSize; + bool _40 = _29._m0._m0 != 0u; + if (_40) + { + uchar _58 = convert_uchar(((((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))).y * ((uint3)(get_local_id(0), get_local_id(1), get_local_id(2))).x) / ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).y) % 255u); + uint _66; + uint _61 = 0u; + uint _62; + for (;;) + { + _62 = _61 * _29._m0._m0; + _66 = 0u; + for (;;) + { + uint _67 = _66 + _62; + uint _68 = _66 * _61; + _5[_67] = ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x + _68; + uint _74 = _67 << 2u; + uint _76 = _74 >> 10u; + uint _78 = _74 & 1020u; + uchar4 _80 = as_uchar4(((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).y + _68); + ((&_10)[_76])[_78 | 1u] = _80.y; + ((&_10)[_76])[_78 | 2u] = _80.z; + ((&_10)[_76])[_78 | 3u] = _80.w; + ((&_10)[_76])[_78] = _58; + uint _93 = _66 + 1u; + if (_93 >= _29._m0._m0) + { + break; + } + else + { + _66 = _93; + } + } + uint _100 = _61 + 1u; + if (_100 >= _29._m0._m0) + { + break; + } + else + { + _61 = _100; + continue; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + uint _112; + if (_40) + { + _112 = 0u; + uint _117; + uint _113; + for (;;) + { + _113 = _112 * _29._m0._m0; + _117 = 0u; + for (;;) + { + uint _118 = _117 + _113; + uint _123 = _118 << 2u; + uint _124 = _123 >> 10u; + uint _125 = _123 & 1020u; + uchar4 _138; + _138.x = ((&_10)[_124])[_125]; + _138.y = ((&_10)[_124])[_125 | 1u]; + _138.z = ((&_10)[_124])[_125 | 2u]; + _138.w = ((&_10)[_124])[_125 | 3u]; + uint _143 = _5[_118] + as_uint(_138); + uint4 _144 = _25[_118]; + _144.x = _143; + _144.y = _143 >> 2u; + _144.w = _143 >> 3u; + _25[_118] = _144; + uint _150 = _117 + 1u; + if (_150 >= _29._m0._m0) + { + break; + } + else + { + _117 = _150; + } + } + uint _157 = _112 + 1u; + if (_157 >= _29._m0._m0) + { + break; + } + else + { + _112 = _157; + continue; + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); +} + diff --git a/reference/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp b/reference/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp new file mode 100644 index 000000000..e69de29bb diff --git a/reference/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp b/reference/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp new file mode 100644 index 000000000..33bdcbea5 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp @@ -0,0 +1,20 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(8, 1, 1))) +__kernel void comp_main() +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local float shared_group[8][8]; + __local float shared_group_alt[8][8]; + float blob[8]; + for (int i = 0; i < 8; i++) + { + blob[i] = convert_float(i); + } + shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = blob; + barrier(CLK_LOCAL_MEM_FENCE); + float copied_blob[8] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))) ^ 1u]; + shared_group_alt[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]; +} + diff --git a/reference/shaders-opencl-no-opt/comp/atomic-cmpxchg-packed-vector.invalid.comp b/reference/shaders-opencl-no-opt/comp/atomic-cmpxchg-packed-vector.invalid.comp new file mode 100644 index 000000000..e7237a064 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/atomic-cmpxchg-packed-vector.invalid.comp @@ -0,0 +1,31 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct AttData0 +{ + uint3 att0[1]; +}; + +typedef struct AttData0 AttData0; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint3* _22) +{ + uint newVal_1 = 432u; + uint prevVal_1 = 0u; + uint curVal_1 = 0u; + for (;;) + { + uint _30 = atomic_cmpxchg(&(_22[0][0u]), prevVal_1, newVal_1); + curVal_1 = _30; + if (_30 != prevVal_1) + { + continue; + } + else + { + break; + } + } +} + diff --git a/reference/shaders-opencl-no-opt/comp/basic.invalid.comp b/reference/shaders-opencl-no-opt/comp/basic.invalid.comp new file mode 100644 index 000000000..e69de29bb diff --git a/reference/shaders-opencl-no-opt/comp/bda-atomics.invalid.comp b/reference/shaders-opencl-no-opt/comp/bda-atomics.invalid.comp new file mode 100644 index 000000000..5b9a08f9b --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/bda-atomics.invalid.comp @@ -0,0 +1,44 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Ptr; + +struct Registers +{ + ulong ptr; +}; + +typedef struct Registers Registers; + +struct Ptr +{ + uint i; + uint2 i2; +}; + +typedef struct Ptr Ptr; + +struct UBO +{ + ulong ptr_ubo; +}; + +typedef struct UBO UBO; + +struct SSBO +{ + ulong ptr_ssbo; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(Registers _12, UBO _26, __global const __global Ptr** _35) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint _23 = atomic_add(&((__global Ptr*)(_12.ptr))->i, 10u); + uint _32 = atomic_add(&((__global Ptr*)(_26.ptr_ubo))->i, 11u); + uint _41 = atomic_add(&((__global Ptr*)(_35[0]))->i, 12u); + uint _51 = atomic_add(&((__global Ptr*)as_ulong(((__global Ptr*)(_12.ptr))->i2))->i, 13u); +} + diff --git a/reference/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.invalid.comp b/reference/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.invalid.comp new file mode 100644 index 000000000..cb3ef0331 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.invalid.comp @@ -0,0 +1,27 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO; + +struct SSBO +{ + float data[1]; +}; + +typedef struct SSBO SSBO; + +struct UBO +{ + __global SSBO* ptrs[2]; +}; + +typedef struct UBO UBO; + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(UBO _17) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __global SSBO* s0 = ((__global SSBO*)(_17.ptrs[0])); + s0->data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] += 1.0f; +} + diff --git a/reference/shaders-opencl-no-opt/comp/bda-nonwritable-glslang-workaround.comp b/reference/shaders-opencl-no-opt/comp/bda-nonwritable-glslang-workaround.comp new file mode 100644 index 000000000..3decaac79 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/bda-nonwritable-glslang-workaround.comp @@ -0,0 +1,26 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO; + +struct Registers +{ + uint2 bda; +}; + +typedef struct Registers Registers; + +struct SSBO +{ + float data[1]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(Registers _10) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + ((__global SSBO*)as_ulong(_10.bda))->data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = 0.0f; +} + diff --git a/reference/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.invalid.comp b/reference/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.invalid.comp new file mode 100644 index 000000000..5d82fb4d5 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.invalid.comp @@ -0,0 +1,26 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Ref; + +struct Ref +{ + float4 v; +}; + +typedef struct Ref Ref; + +struct Registers +{ + ulong foo; +}; + +typedef struct Registers Registers; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(Registers _14) +{ + restrict __global Ref* __restrict ref = ((__global Ref*)(_14.foo)); + ref->v = (float4)(1.0f); +} + diff --git a/reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.invalid.comp b/reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.invalid.comp new file mode 100644 index 000000000..e69de29bb diff --git a/reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.invalid.comp b/reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.invalid.comp new file mode 100644 index 000000000..e69de29bb diff --git a/reference/shaders-opencl-no-opt/comp/bitfield.comp b/reference/shaders-opencl-no-opt/comp/bitfield.comp new file mode 100644 index 000000000..754ec2495 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/bitfield.comp @@ -0,0 +1,35 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +uint spvBitReverse(uint v) { + v = ((v >> 1u) & 0x55555555u) | ((v & 0x55555555u) << 1u); + v = ((v >> 2u) & 0x33333333u) | ((v & 0x33333333u) << 2u); + v = ((v >> 4u) & 0x0F0F0F0Fu) | ((v & 0x0F0F0F0Fu) << 4u); + v = ((v >> 8u) & 0x00FF00FFu) | ((v & 0x00FF00FFu) << 8u); + return (v >> 16u) | (v << 16u); +} + +static int spvFindLSB(uint x) { + if (x == 0u) return -1; + return 31 - as_int(clz(x & (0u - x))); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main() +{ + int signed_value_1 = 0; + uint unsigned_value_1 = 0u; + int s_1 = (signed_value_1 << (32 - 20 - 5)) >> (32 - 20); + uint u_1 = (unsigned_value_1 >> 6) & ((uint)(1u << 21) - (uint)1u); + s_1 = as_int((as_uint(s_1) & ~(((uint)(1u << 4) - (uint)1u) << 5)) | ((as_uint(40) << 5) & (((uint)(1u << 4) - (uint)1u) << 5))); + u_1 = (u_1 & ~(((uint)(1u << 4) - (uint)1u) << 5)) | ((60u << 5) & (((uint)(1u << 4) - (uint)1u) << 5)); + u_1 = spvBitReverse(u_1); + s_1 = as_int(spvBitReverse(as_uint(s_1))); + int v0_1 = as_int(popcount(u_1)); + int v1_1 = popcount(s_1); + int v2_1 = 31 - as_int(clz(u_1)); + int v3_1 = 31 - clz(s_1 ^ (s_1 >> 31)); + int v4_1 = spvFindLSB(u_1); + int v5_1 = spvFindLSB(as_uint(s_1)); +} + diff --git a/reference/shaders-opencl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.comp b/reference/shaders-opencl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.comp new file mode 100644 index 000000000..350c009c8 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.comp @@ -0,0 +1,33 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO; + +struct S +{ + float3 v; +}; + +typedef struct S S; + +struct SSBO +{ + S s[1]; +}; + +typedef struct SSBO SSBO; + +struct PC +{ + uint2 ptr; +}; + +typedef struct PC PC; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(PC pc) +{ + __global SSBO* ssbo = ((__global SSBO*)as_ulong(pc.ptr)); + ssbo->s[0].v = (float3)(1.0f); +} + diff --git a/reference/shaders-opencl-no-opt/comp/extract-atomics-from-function.invalid.comp b/reference/shaders-opencl-no-opt/comp/extract-atomics-from-function.invalid.comp new file mode 100644 index 000000000..016fbcd95 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/extract-atomics-from-function.invalid.comp @@ -0,0 +1,90 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +#define var (*var_ptr) +#define var (*var_ptr) +void testAdd(__local uint* var_ptr) +{ + uint _29 = atomic_add(&var, 1u); +} + +#undef var + +#define var (*var_ptr) +void testMin(__local uint* var_ptr) +{ + uint _31 = atomic_min(&var, 2u); +} + +#undef var + +#define var (*var_ptr) +void testMax(__local uint* var_ptr) +{ + uint _33 = atomic_max(&var, 3u); +} + +#undef var + +#define var (*var_ptr) +void testOr(__local uint* var_ptr) +{ + uint _35 = atomic_or(&var, 5u); +} + +#undef var + +#define var (*var_ptr) +void testXor(__local uint* var_ptr) +{ + uint _37 = atomic_xor(&var, 6u); +} + +#undef var + +#define var (*var_ptr) +void testExchange(__local uint* var_ptr) +{ + uint _39 = atomic_xchg(&var, 7u); +} + +#undef var + +#define var (*var_ptr) +void testCompSwap(__local uint* var_ptr) +{ + uint _42 = atomic_cmpxchg(&var, 8u, 9u); +} + +#undef var + +#define var (*var_ptr) +void testStore(__local uint* var_ptr) +{ + atomic_xchg(&var, 10u); +} + +#undef var + +void foo(__local uint* var_ptr) +{ + testAdd(&var); + testMin(&var); + testMax(&var); + testOr(&var); + testXor(&var); + testExchange(&var); + testCompSwap(&var); + testStore(&var); +} + +#undef var + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void comp_main() +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local uint var; + foo(&var); +} + diff --git a/reference/shaders-opencl-no-opt/comp/global-invocation-id-writable-ssbo-in-function.comp b/reference/shaders-opencl-no-opt/comp/global-invocation-id-writable-ssbo-in-function.comp new file mode 100644 index 000000000..8edae4313 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/global-invocation-id-writable-ssbo-in-function.comp @@ -0,0 +1,27 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct myBlock +{ + int a; + float b[1]; +}; + +typedef struct myBlock myBlock; + +#define _RESERVED_IDENTIFIER_FIXUP_gl_GlobalInvocationID (*_RESERVED_IDENTIFIER_FIXUP_gl_GlobalInvocationID_ptr) +float getB(__global myBlock* myStorage, __private uint3* _RESERVED_IDENTIFIER_FIXUP_gl_GlobalInvocationID_ptr) +{ + return myStorage->b[_RESERVED_IDENTIFIER_FIXUP_gl_GlobalInvocationID.x]; +} + +#undef _RESERVED_IDENTIFIER_FIXUP_gl_GlobalInvocationID + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global myBlock* myStorage) +{ + uint3 _RESERVED_IDENTIFIER_FIXUP_gl_GlobalInvocationID = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))); + myStorage->a = (myStorage->a + 1) % 256; + myStorage->b[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = fmod(getB(myStorage, &_RESERVED_IDENTIFIER_FIXUP_gl_GlobalInvocationID) + 0.0199999995529651641845703125f, 1.0f); +} + diff --git a/reference/shaders-opencl-no-opt/comp/glsl.std450.comp b/reference/shaders-opencl-no-opt/comp/glsl.std450.comp new file mode 100644 index 000000000..9188ba1e6 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/glsl.std450.comp @@ -0,0 +1,234 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float2 columns[2]; } spvMat2; +typedef struct { float3 columns[3]; } spvMat3; +typedef struct { float4 columns[4]; } spvMat4; + +struct SSBO +{ + float res; + int ires; + uint ures; + float4 f32; + int4 s32; + uint4 u32; + spvMat2 m2; + spvMat3 m3; + spvMat4 m4; +}; + +typedef struct SSBO SSBO; + +struct ResType +{ + float _m0; + float _m1; +}; + +typedef struct ResType ResType; + +struct ResType_1 +{ + float _m0; + int _m1; +}; + +typedef struct ResType_1 ResType_1; + +uint spvPackHalf2x16(float2 v) { + uint r; + vstore_half(v.x, 0, (__private half *)&r); + vstore_half(v.y, 1, (__private half *)&r); + return r; +} + +float2 spvUnpackHalf2x16(uint u) { + const __private uint *p = &u; + return (float2)(vload_half(0, (const __private half *)p), + vload_half(1, (const __private half *)p)); +} + +static int spvFindLSB(uint x) { + if (x == 0u) return -1; + return 31 - as_int(clz(x & (0u - x))); +} + +static uint spvPackSnorm4x8(float4 v) { + char4 packed = convert_char4_sat_rte(v * 127.0f); + return as_uint(packed); +} + +static uint spvPackUnorm4x8(float4 v) { + uchar4 packed = convert_uchar4_sat_rte(v * 255.0f); + return as_uint(packed); +} + +static uint spvPackSnorm2x16(float2 v) { + short2 packed = convert_short2_sat_rte(v * 32767.0f); + return as_uint(packed); +} + +static uint spvPackUnorm2x16(float2 v) { + ushort2 packed = convert_ushort2_sat_rte(v * 65535.0f); + return as_uint(packed); +} + +static float4 spvUnpackSnorm4x8(uint v) { + char4 packed = as_char4(v); + return max(convert_float4(packed) / 127.0f, (float4)(-1.0f)); +} + +static float4 spvUnpackUnorm4x8(uint v) { + uchar4 packed = as_uchar4(v); + return convert_float4(packed) / 255.0f; +} + +static float2 spvUnpackSnorm2x16(uint v) { + short2 packed = as_short2(v); + return max(convert_float2(packed) / 32767.0f, (float2)(-1.0f)); +} + +static float2 spvUnpackUnorm2x16(uint v) { + ushort2 packed = as_ushort2(v); + return convert_float2(packed) / 65535.0f; +} + +static float spvDeterminant2(spvMat2 m) { + return m.columns[0].x * m.columns[1].y - m.columns[0].y * m.columns[1].x; +} + +static float spvDeterminant3(spvMat3 m) { + return dot(m.columns[0], (float3)(m.columns[1].y * m.columns[2].z - m.columns[1].z * m.columns[2].y, m.columns[1].z * m.columns[2].x - m.columns[1].x * m.columns[2].z, m.columns[1].x * m.columns[2].y - m.columns[1].y * m.columns[2].x)); +} + +static float spvDeterminant4(spvMat4 m) { + return dot(m.columns[0], (float4)(m.columns[2].y * m.columns[3].z * m.columns[1].w - m.columns[3].y * m.columns[2].z * m.columns[1].w + m.columns[3].y * m.columns[1].z * m.columns[2].w - m.columns[1].y * m.columns[3].z * m.columns[2].w - m.columns[2].y * m.columns[1].z * m.columns[3].w + m.columns[1].y * m.columns[2].z * m.columns[3].w, m.columns[3].x * m.columns[2].z * m.columns[1].w - m.columns[2].x * m.columns[3].z * m.columns[1].w - m.columns[3].x * m.columns[1].z * m.columns[2].w + m.columns[1].x * m.columns[3].z * m.columns[2].w + m.columns[2].x * m.columns[1].z * m.columns[3].w - m.columns[1].x * m.columns[2].z * m.columns[3].w, m.columns[2].x * m.columns[3].y * m.columns[1].w - m.columns[3].x * m.columns[2].y * m.columns[1].w + m.columns[3].x * m.columns[1].y * m.columns[2].w - m.columns[1].x * m.columns[3].y * m.columns[2].w - m.columns[2].x * m.columns[1].y * m.columns[3].w + m.columns[1].x * m.columns[2].y * m.columns[3].w, m.columns[3].x * m.columns[2].y * m.columns[1].z - m.columns[2].x * m.columns[3].y * m.columns[1].z - m.columns[3].x * m.columns[1].y * m.columns[2].z + m.columns[1].x * m.columns[3].y * m.columns[2].z + m.columns[2].x * m.columns[1].y * m.columns[3].z - m.columns[1].x * m.columns[2].y * m.columns[3].z)); +} + +static spvMat2 spvInverse2(spvMat2 m) { + float d = 1.0f / (m.columns[0].x * m.columns[1].y - m.columns[1].x * m.columns[0].y); + return (spvMat2){ { (float2)(m.columns[1].y * d, -m.columns[0].y * d), (float2)(-m.columns[1].x * d, m.columns[0].x * d) } }; +} + +static spvMat3 spvInverse3(spvMat3 m) { + float3 t = (float3)(m.columns[1].y * m.columns[2].z - m.columns[1].z * m.columns[2].y, m.columns[1].z * m.columns[2].x - m.columns[1].x * m.columns[2].z, m.columns[1].x * m.columns[2].y - m.columns[1].y * m.columns[2].x); + float d = 1.0f / dot(m.columns[0], t); + return (spvMat3){ { t * d, (float3)(m.columns[0].z * m.columns[2].y - m.columns[0].y * m.columns[2].z, m.columns[0].x * m.columns[2].z - m.columns[0].z * m.columns[2].x, m.columns[0].y * m.columns[2].x - m.columns[0].x * m.columns[2].y) * d, (float3)(m.columns[0].y * m.columns[1].z - m.columns[0].z * m.columns[1].y, m.columns[0].z * m.columns[1].x - m.columns[0].x * m.columns[1].z, m.columns[0].x * m.columns[1].y - m.columns[0].y * m.columns[1].x) * d } }; +} + +static spvMat4 spvInverse4(spvMat4 m) { + float4 t = (float4)(m.columns[2].y * m.columns[3].z * m.columns[1].w - m.columns[3].y * m.columns[2].z * m.columns[1].w + m.columns[3].y * m.columns[1].z * m.columns[2].w - m.columns[1].y * m.columns[3].z * m.columns[2].w - m.columns[2].y * m.columns[1].z * m.columns[3].w + m.columns[1].y * m.columns[2].z * m.columns[3].w, m.columns[3].x * m.columns[2].z * m.columns[1].w - m.columns[2].x * m.columns[3].z * m.columns[1].w - m.columns[3].x * m.columns[1].z * m.columns[2].w + m.columns[1].x * m.columns[3].z * m.columns[2].w + m.columns[2].x * m.columns[1].z * m.columns[3].w - m.columns[1].x * m.columns[2].z * m.columns[3].w, m.columns[2].x * m.columns[3].y * m.columns[1].w - m.columns[3].x * m.columns[2].y * m.columns[1].w + m.columns[3].x * m.columns[1].y * m.columns[2].w - m.columns[1].x * m.columns[3].y * m.columns[2].w - m.columns[2].x * m.columns[1].y * m.columns[3].w + m.columns[1].x * m.columns[2].y * m.columns[3].w, m.columns[3].x * m.columns[2].y * m.columns[1].z - m.columns[2].x * m.columns[3].y * m.columns[1].z - m.columns[3].x * m.columns[1].y * m.columns[2].z + m.columns[1].x * m.columns[3].y * m.columns[2].z + m.columns[2].x * m.columns[1].y * m.columns[3].z - m.columns[1].x * m.columns[2].y * m.columns[3].z); + spvMat4 r = (spvMat4){ { (float4)(t.x, m.columns[3].y * m.columns[2].z * m.columns[0].w - m.columns[2].y * m.columns[3].z * m.columns[0].w - m.columns[3].y * m.columns[0].z * m.columns[2].w + m.columns[0].y * m.columns[3].z * m.columns[2].w + m.columns[2].y * m.columns[0].z * m.columns[3].w - m.columns[0].y * m.columns[2].z * m.columns[3].w, m.columns[1].y * m.columns[3].z * m.columns[0].w - m.columns[3].y * m.columns[1].z * m.columns[0].w + m.columns[3].y * m.columns[0].z * m.columns[1].w - m.columns[0].y * m.columns[3].z * m.columns[1].w - m.columns[1].y * m.columns[0].z * m.columns[3].w + m.columns[0].y * m.columns[1].z * m.columns[3].w, m.columns[2].y * m.columns[1].z * m.columns[0].w - m.columns[1].y * m.columns[2].z * m.columns[0].w - m.columns[2].y * m.columns[0].z * m.columns[1].w + m.columns[0].y * m.columns[2].z * m.columns[1].w + m.columns[1].y * m.columns[0].z * m.columns[2].w - m.columns[0].y * m.columns[1].z * m.columns[2].w), (float4)(t.y, m.columns[2].x * m.columns[3].z * m.columns[0].w - m.columns[3].x * m.columns[2].z * m.columns[0].w + m.columns[3].x * m.columns[0].z * m.columns[2].w - m.columns[0].x * m.columns[3].z * m.columns[2].w - m.columns[2].x * m.columns[0].z * m.columns[3].w + m.columns[0].x * m.columns[2].z * m.columns[3].w, m.columns[3].x * m.columns[1].z * m.columns[0].w - m.columns[1].x * m.columns[3].z * m.columns[0].w - m.columns[3].x * m.columns[0].z * m.columns[1].w + m.columns[0].x * m.columns[3].z * m.columns[1].w + m.columns[1].x * m.columns[0].z * m.columns[3].w - m.columns[0].x * m.columns[1].z * m.columns[3].w, m.columns[1].x * m.columns[2].z * m.columns[0].w - m.columns[2].x * m.columns[1].z * m.columns[0].w + m.columns[2].x * m.columns[0].z * m.columns[1].w - m.columns[0].x * m.columns[2].z * m.columns[1].w - m.columns[1].x * m.columns[0].z * m.columns[2].w + m.columns[0].x * m.columns[1].z * m.columns[2].w), (float4)(t.z, m.columns[3].x * m.columns[2].y * m.columns[0].w - m.columns[2].x * m.columns[3].y * m.columns[0].w - m.columns[3].x * m.columns[0].y * m.columns[2].w + m.columns[0].x * m.columns[3].y * m.columns[2].w + m.columns[2].x * m.columns[0].y * m.columns[3].w - m.columns[0].x * m.columns[2].y * m.columns[3].w, m.columns[1].x * m.columns[3].y * m.columns[0].w - m.columns[3].x * m.columns[1].y * m.columns[0].w + m.columns[3].x * m.columns[0].y * m.columns[1].w - m.columns[0].x * m.columns[3].y * m.columns[1].w - m.columns[1].x * m.columns[0].y * m.columns[3].w + m.columns[0].x * m.columns[1].y * m.columns[3].w, m.columns[2].x * m.columns[1].y * m.columns[0].w - m.columns[1].x * m.columns[2].y * m.columns[0].w - m.columns[2].x * m.columns[0].y * m.columns[1].w + m.columns[0].x * m.columns[2].y * m.columns[1].w + m.columns[1].x * m.columns[0].y * m.columns[2].w - m.columns[0].x * m.columns[1].y * m.columns[2].w), (float4)(t.w, m.columns[2].x * m.columns[3].y * m.columns[0].z - m.columns[3].x * m.columns[2].y * m.columns[0].z + m.columns[3].x * m.columns[0].y * m.columns[2].z - m.columns[0].x * m.columns[3].y * m.columns[2].z - m.columns[2].x * m.columns[0].y * m.columns[3].z + m.columns[0].x * m.columns[2].y * m.columns[3].z, m.columns[3].x * m.columns[1].y * m.columns[0].z - m.columns[1].x * m.columns[3].y * m.columns[0].z - m.columns[3].x * m.columns[0].y * m.columns[1].z + m.columns[0].x * m.columns[3].y * m.columns[1].z + m.columns[1].x * m.columns[0].y * m.columns[3].z - m.columns[0].x * m.columns[1].y * m.columns[3].z, m.columns[1].x * m.columns[2].y * m.columns[0].z - m.columns[2].x * m.columns[1].y * m.columns[0].z + m.columns[2].x * m.columns[0].y * m.columns[1].z - m.columns[0].x * m.columns[2].y * m.columns[1].z - m.columns[1].x * m.columns[0].y * m.columns[2].z + m.columns[0].x * m.columns[1].y * m.columns[2].z) } }; + float d = 1.0f / dot(m.columns[0], t); + r.columns[0] *= d; r.columns[1] *= d; r.columns[2] *= d; r.columns[3] *= d; + return r; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _19) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _19->res = round(_19->f32[0u]); + _19->res = rint(_19->f32[0u]); + _19->res = trunc(_19->f32[0u]); + _19->res = fabs(_19->f32[0u]); + _19->ires = as_int(abs(_19->s32[0u])); + _19->res = sign(_19->f32[0u]); + _19->ires = clamp(_19->s32[0u], -1, 1); + _19->res = floor(_19->f32[0u]); + _19->res = ceil(_19->f32[0u]); + _19->res = (_19->f32[0u] - floor(_19->f32[0u])); + _19->res = radians(_19->f32[0u]); + _19->res = degrees(_19->f32[0u]); + _19->res = sin(_19->f32[0u]); + _19->res = cos(_19->f32[0u]); + _19->res = tan(_19->f32[0u]); + _19->res = asin(_19->f32[0u]); + _19->res = acos(_19->f32[0u]); + _19->res = atan(_19->f32[0u]); + _19->res = sinh(_19->f32[0u]); + _19->res = cosh(_19->f32[0u]); + _19->res = tanh(_19->f32[0u]); + _19->res = asinh(_19->f32[0u]); + _19->res = acosh(_19->f32[0u]); + _19->res = atanh(_19->f32[0u]); + _19->res = atan2(_19->f32[0u], _19->f32[1u]); + _19->res = pow(_19->f32[0u], _19->f32[1u]); + _19->res = exp(_19->f32[0u]); + _19->res = log(_19->f32[0u]); + _19->res = exp2(_19->f32[0u]); + _19->res = log2(_19->f32[0u]); + _19->res = sqrt(_19->f32[0u]); + _19->res = rsqrt(_19->f32[0u]); + _19->res = fabs(_19->f32[0u]); + _19->res = fabs(_19->f32[0u] - _19->f32[1u]); + _19->res = sign(_19->f32[0u]); + _19->res = (_19->f32[2u] * _19->f32[1u] < 0.0f ? _19->f32[0u] : -_19->f32[0u]); + _19->res = _19->f32[0u] - 2.0f * _19->f32[1u] * _19->f32[0u] * _19->f32[1u]; + float _195 = (float)(0.0f); + { + float spv_NdotI = _19->f32[1u] * _19->f32[0u]; + float spv_k = 1.0f - _19->f32[2u] * _19->f32[2u] * (1.0f - spv_NdotI * spv_NdotI); + if (spv_k >= 0.0f) + _195 = _19->f32[2u] * _19->f32[0u] - (_19->f32[2u] * spv_NdotI + sqrt(spv_k)) * _19->f32[1u]; + } + _19->res = _195; + _19->res = length(_19->f32.xy); + _19->res = distance(_19->f32.xy, _19->f32.zw); + float2 v2_1 = normalize(_19->f32.xy); + v2_1 = (dot(_19->f32.zw, _19->f32.yz) < 0.0f ? _19->f32.xy : -_19->f32.xy); + v2_1 = _19->f32.xy - 2.0f * dot(_19->f32.zw, _19->f32.xy) * _19->f32.zw; + float2 _243 = (float2)(0.0f); + { + float spv_NdotI = dot(_19->f32.yz, _19->f32.xy); + float spv_k = 1.0f - _19->f32[3u] * _19->f32[3u] * (1.0f - spv_NdotI * spv_NdotI); + if (spv_k >= 0.0f) + _243 = _19->f32[3u] * _19->f32.xy - (_19->f32[3u] * spv_NdotI + sqrt(spv_k)) * _19->f32.yz; + } + v2_1 = _243; + float3 v3_1 = cross(_19->f32.xyz, _19->f32.yzw); + _19->res = spvDeterminant2(_19->m2); + _19->res = spvDeterminant3(_19->m3); + _19->res = spvDeterminant4(_19->m4); + _19->m2 = spvInverse2(_19->m2); + _19->m3 = spvInverse3(_19->m3); + _19->m4 = spvInverse4(_19->m4); + ResType _288; + _288._m0 = modf(_19->f32[0u], &_288._m1); + float tmp_1 = _288._m1; + _19->res = _288._m0; + _19->res = fmin(_19->f32[0u], _19->f32[1u]); + _19->ures = min(_19->u32[0u], _19->u32[1u]); + _19->ires = min(_19->s32[0u], _19->s32[1u]); + _19->res = fmax(_19->f32[0u], _19->f32[1u]); + _19->ures = max(_19->u32[0u], _19->u32[1u]); + _19->ires = max(_19->s32[0u], _19->s32[1u]); + _19->res = clamp(_19->f32[0u], _19->f32[1u], _19->f32[2u]); + _19->ures = clamp(_19->u32[0u], _19->u32[1u], _19->u32[2u]); + _19->ires = clamp(_19->s32[0u], _19->s32[1u], _19->s32[2u]); + _19->res = mix(_19->f32[0u], _19->f32[1u], _19->f32[2u]); + _19->res = step(_19->f32[0u], _19->f32[1u]); + _19->res = smoothstep(_19->f32[0u], _19->f32[1u], _19->f32[2u]); + _19->res = fma(_19->f32[0u], _19->f32[1u], _19->f32[2u]); + ResType_1 _390; + _390._m0 = frexp(_19->f32[0u], &_390._m1); + int itmp_1 = _390._m1; + _19->res = _390._m0; + _19->res = ldexp(_19->f32[0u], itmp_1); + _19->ures = spvPackSnorm4x8(_19->f32); + _19->ures = spvPackUnorm4x8(_19->f32); + _19->ures = spvPackSnorm2x16(_19->f32.xy); + _19->ures = spvPackUnorm2x16(_19->f32.xy); + _19->ures = spvPackHalf2x16(_19->f32.xy); + v2_1 = spvUnpackSnorm2x16(_19->u32[0u]); + v2_1 = spvUnpackUnorm2x16(_19->u32[0u]); + v2_1 = spvUnpackHalf2x16(_19->u32[0u]); + float4 v4_1 = spvUnpackSnorm4x8(_19->u32[0u]); + v4_1 = spvUnpackUnorm4x8(_19->u32[0u]); + _19->s32 = (int4)(spvFindLSB(as_uint(_19->s32.x)), spvFindLSB(as_uint(_19->s32.y)), spvFindLSB(as_uint(_19->s32.z)), spvFindLSB(as_uint(_19->s32.w))); + _19->s32 = (int4)(spvFindLSB(_19->u32.x), spvFindLSB(_19->u32.y), spvFindLSB(_19->u32.z), spvFindLSB(_19->u32.w)); + _19->s32 = 31 - clz(_19->s32 ^ (_19->s32 >> 31)); + _19->s32 = 31 - as_int4(clz(_19->u32)); +} + diff --git a/reference/shaders-opencl-no-opt/comp/illegal-struct-name.asm.comp b/reference/shaders-opencl-no-opt/comp/illegal-struct-name.asm.comp new file mode 100644 index 000000000..7dde55fc7 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/illegal-struct-name.asm.comp @@ -0,0 +1,27 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Foo +{ + float _abs; +}; + +typedef struct Foo Foo; + +struct SSBO +{ + Foo foo; + Foo foo2; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _9) +{ + Foo f; + f._abs = _9->foo._abs; + int _abs = 10; + _9->foo2._abs = f._abs; +} + diff --git a/reference/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.invalid.comp b/reference/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.invalid.comp new file mode 100644 index 000000000..7ee5a5f89 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.invalid.comp @@ -0,0 +1,89 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +struct BUF0 +{ + half2 f16s; + ushort2 u16; + short2 i16; + ushort4 u16s; + short4 i16s; + half f16; +}; + +typedef struct BUF0 BUF0; + +void test_u16(__global BUF0* _24) +{ + _24->f16 += as_half(ushort(_24->u16[0u] + _24->u16[1u])); + _24->f16 += as_half(ushort(_24->u16[0u] - _24->u16[1u])); + _24->f16 += as_half(ushort(_24->u16[0u] * _24->u16[1u])); + _24->f16 += as_half(ushort(_24->u16[0u] / _24->u16[1u])); + _24->f16 += as_half(ushort(_24->u16[0u] % _24->u16[1u])); + _24->f16 += as_half(ushort(_24->u16[0u] << _24->u16[1u])); + _24->f16 += as_half(ushort(_24->u16[0u] >> _24->u16[1u])); + _24->f16 += as_half(ushort(~_24->u16[0u])); + _24->f16 += as_half(ushort(-_24->u16[0u])); + _24->f16 += as_half(ushort(_24->u16[0u] ^ _24->u16[1u])); + _24->f16 += as_half(ushort(_24->u16[0u] & _24->u16[1u])); + _24->f16 += as_half(ushort(_24->u16[0u] | _24->u16[1u])); +} + +void test_i16(__global BUF0* _24) +{ + _24->f16 += as_half(short(_24->i16[0u] + _24->i16[1u])); + _24->f16 += as_half(short(_24->i16[0u] - _24->i16[1u])); + _24->f16 += as_half(short(_24->i16[0u] * _24->i16[1u])); + _24->f16 += as_half(short(_24->i16[0u] / _24->i16[1u])); + _24->f16 += as_half(short(_24->i16[0u] % _24->i16[1u])); + _24->f16 += as_half(short(_24->i16[0u] << _24->i16[1u])); + _24->f16 += as_half(short(_24->i16[0u] >> _24->i16[1u])); + _24->f16 += as_half(short(~_24->i16[0u])); + _24->f16 += as_half(short(-_24->i16[0u])); + _24->f16 += as_half(short(_24->i16[0u] ^ _24->i16[1u])); + _24->f16 += as_half(short(_24->i16[0u] & _24->i16[1u])); + _24->f16 += as_half(short(_24->i16[0u] | _24->i16[1u])); +} + +void test_u16s(__global BUF0* _24) +{ + _24->f16s += as_half2(_24->u16s.xy + _24->u16s.zw); + _24->f16s += as_half2(_24->u16s.xy - _24->u16s.zw); + _24->f16s += as_half2(_24->u16s.xy * _24->u16s.zw); + _24->f16s += as_half2(_24->u16s.xy / _24->u16s.zw); + _24->f16s += as_half2(_24->u16s.xy % _24->u16s.zw); + _24->f16s += as_half2(_24->u16s.xy << _24->u16s.zw); + _24->f16s += as_half2(_24->u16s.xy >> _24->u16s.zw); + _24->f16s += as_half2(~_24->u16s.xy); + _24->f16s += as_half2(-_24->u16s.xy); + _24->f16s += as_half2(_24->u16s.xy ^ _24->u16s.zw); + _24->f16s += as_half2(_24->u16s.xy & _24->u16s.zw); + _24->f16s += as_half2(_24->u16s.xy | _24->u16s.zw); +} + +void test_i16s(__global BUF0* _24) +{ + _24->f16s += as_half2(_24->i16s.xy + _24->i16s.zw); + _24->f16s += as_half2(_24->i16s.xy - _24->i16s.zw); + _24->f16s += as_half2(_24->i16s.xy * _24->i16s.zw); + _24->f16s += as_half2(_24->i16s.xy / _24->i16s.zw); + _24->f16s += as_half2(_24->i16s.xy % _24->i16s.zw); + _24->f16s += as_half2(_24->i16s.xy << _24->i16s.zw); + _24->f16s += as_half2(_24->i16s.xy >> _24->i16s.zw); + _24->f16s += as_half2(~_24->i16s.xy); + _24->f16s += as_half2(-_24->i16s.xy); + _24->f16s += as_half2(_24->i16s.xy ^ _24->i16s.zw); + _24->f16s += as_half2(_24->i16s.xy & _24->i16s.zw); + _24->f16s += as_half2(_24->i16s.xy | _24->i16s.zw); +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global BUF0* _24) +{ + test_u16(_24); + test_i16(_24); + test_u16s(_24); + test_i16s(_24); +} + diff --git a/reference/shaders-opencl-no-opt/comp/int16min-literal.fp16.invalid.comp b/reference/shaders-opencl-no-opt/comp/int16min-literal.fp16.invalid.comp new file mode 100644 index 000000000..4d3324d9e --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/int16min-literal.fp16.invalid.comp @@ -0,0 +1,27 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +struct UBO +{ + half b; +}; + +typedef struct UBO UBO; + +struct SSBO +{ + half a; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(UBO _12, __global half* _24) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + short v = as_short(_12.b); + v = short(v ^ (-32768s)); + _24[0] = as_half(v); +} + diff --git a/reference/shaders-opencl-no-opt/comp/int64.invalid.comp b/reference/shaders-opencl-no-opt/comp/int64.invalid.comp new file mode 100644 index 000000000..13fc8bf8b --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/int64.invalid.comp @@ -0,0 +1,75 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct M0 +{ + long v; + long2 b[2]; + ulong c; + ulong d[5]; +}; + +typedef struct M0 M0; + +struct SSBO0_Type +{ + long4 a; + M0 m0; +}; + +typedef struct SSBO0_Type SSBO0_Type; + +struct SSBO1_Type +{ + ulong4 b; + M0 m0; +}; + +typedef struct SSBO1_Type SSBO1_Type; + +struct SSBO2_Type +{ + long a[4]; + long2 b[4]; +}; + +typedef struct SSBO2_Type SSBO2_Type; + +struct SSBO3_Type +{ + long a[4]; + long2 b[4]; +}; + +typedef struct SSBO3_Type SSBO3_Type; + +struct SSBO +{ + int s32; + uint u32; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _96) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + SSBO0_Type ssbo_0; + ssbo_0.a += (long4)(10l, 20l, 30l, 40l); + SSBO1_Type ssbo_1; + ssbo_1.b += (ulong4)(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul); + ssbo_0.a += (long4)(20l); + ssbo_0.a = as_long4(abs(ssbo_0.a + as_long4(ssbo_1.b))); + ssbo_0.a += (long4)(1l); + ssbo_1.b += as_ulong4((long4)(1l)); + ssbo_0.a -= (long4)(1l); + ssbo_1.b -= as_ulong4((long4)(1l)); + SSBO2_Type ssbo_2; + ssbo_2.a[0] += 1l; + SSBO3_Type ssbo_3; + ssbo_3.a[0] += 2l; + _96->s32 = as_int(convert_uint(((as_ulong(ssbo_0.a.x) + ssbo_1.b.y) + as_ulong(ssbo_2.a[1])) + as_ulong(ssbo_3.a[2]))); + _96->u32 = convert_uint(((as_ulong(ssbo_0.a.y) + ssbo_1.b.z) + as_ulong(ssbo_2.a[0])) + as_ulong(ssbo_3.a[1])); +} + diff --git a/reference/shaders-opencl-no-opt/comp/int64min-literal.comp b/reference/shaders-opencl-no-opt/comp/int64min-literal.comp new file mode 100644 index 000000000..1697efd5b --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/int64min-literal.comp @@ -0,0 +1,26 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct UBO +{ + float b; +}; + +typedef struct UBO UBO; + +struct SSBO +{ + float a; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(UBO _12, __global float* _25) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + long v = convert_long(as_int(_12.b)); + v ^= (long)(0x8000000000000000ul); + _25[0] = as_float(convert_int(v)); +} + diff --git a/reference/shaders-opencl-no-opt/comp/integer-dot-product.comp b/reference/shaders-opencl-no-opt/comp/integer-dot-product.comp new file mode 100644 index 000000000..a76cd28a7 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/integer-dot-product.comp @@ -0,0 +1,58 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct InOut3 +{ + ushort4 x; + ushort4 y; + int acc; + int result; +}; + +typedef struct InOut3 InOut3; + +struct InOut2 +{ + uint x; + uint y; + uint result; +}; + +typedef struct InOut2 InOut2; + +struct InOut +{ + uint4 x; + uint4 y; + int result; +}; + +typedef struct InOut InOut; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global InOut3* comp3, __global InOut2* comp2, __global InOut* comp) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + int sdot_int = (int)(as_short4(comp3->x).s0) * (int)(as_short4(comp3->y).s0) + (int)(as_short4(comp3->x).s1) * (int)(as_short4(comp3->y).s1) + (int)(as_short4(comp3->x).s2) * (int)(as_short4(comp3->y).s2) + (int)(as_short4(comp3->x).s3) * (int)(as_short4(comp3->y).s3); + uint sdot_uint = (uint)(as_short4(comp3->x).s0) * (uint)(as_short4(comp3->y).s0) + (uint)(as_short4(comp3->x).s1) * (uint)(as_short4(comp3->y).s1) + (uint)(as_short4(comp3->x).s2) * (uint)(as_short4(comp3->y).s2) + (uint)(as_short4(comp3->x).s3) * (uint)(as_short4(comp3->y).s3); + uint udot_uint = (uint)(comp3->x.s0) * (uint)(comp3->y.s0) + (uint)(comp3->x.s1) * (uint)(comp3->y.s1) + (uint)(comp3->x.s2) * (uint)(comp3->y.s2) + (uint)(comp3->x.s3) * (uint)(comp3->y.s3); + int sudot_int = (int)(as_short4(comp3->x).s0) * (int)(comp3->y.s0) + (int)(as_short4(comp3->x).s1) * (int)(comp3->y.s1) + (int)(as_short4(comp3->x).s2) * (int)(comp3->y.s2) + (int)(as_short4(comp3->x).s3) * (int)(comp3->y.s3); + uint sudot_uint = (uint)(as_short4(comp3->x).s0) * (uint)(comp3->y.s0) + (uint)(as_short4(comp3->x).s1) * (uint)(comp3->y.s1) + (uint)(as_short4(comp3->x).s2) * (uint)(comp3->y.s2) + (uint)(as_short4(comp3->x).s3) * (uint)(comp3->y.s3); + uchar spdot8 = (uchar)(as_char4(comp2->x).s0) * (uchar)(as_char4(comp2->y).s0) + (uchar)(as_char4(comp2->x).s1) * (uchar)(as_char4(comp2->y).s1) + (uchar)(as_char4(comp2->x).s2) * (uchar)(as_char4(comp2->y).s2) + (uchar)(as_char4(comp2->x).s3) * (uchar)(as_char4(comp2->y).s3); + ushort spdot16 = (ushort)(as_char4(comp2->x).s0) * (ushort)(as_char4(comp2->y).s0) + (ushort)(as_char4(comp2->x).s1) * (ushort)(as_char4(comp2->y).s1) + (ushort)(as_char4(comp2->x).s2) * (ushort)(as_char4(comp2->y).s2) + (ushort)(as_char4(comp2->x).s3) * (ushort)(as_char4(comp2->y).s3); + uint spdot32 = (uint)(as_char4(comp2->x).s0) * (uint)(as_char4(comp2->y).s0) + (uint)(as_char4(comp2->x).s1) * (uint)(as_char4(comp2->y).s1) + (uint)(as_char4(comp2->x).s2) * (uint)(as_char4(comp2->y).s2) + (uint)(as_char4(comp2->x).s3) * (uint)(as_char4(comp2->y).s3); + int spdoti32 = (int)(as_char4(comp2->x).s0) * (int)(as_char4(comp2->y).s0) + (int)(as_char4(comp2->x).s1) * (int)(as_char4(comp2->y).s1) + (int)(as_char4(comp2->x).s2) * (int)(as_char4(comp2->y).s2) + (int)(as_char4(comp2->x).s3) * (int)(as_char4(comp2->y).s3); + uchar updot8 = (uchar)(as_uchar4(comp2->x).s0) * (uchar)(as_uchar4(comp2->y).s0) + (uchar)(as_uchar4(comp2->x).s1) * (uchar)(as_uchar4(comp2->y).s1) + (uchar)(as_uchar4(comp2->x).s2) * (uchar)(as_uchar4(comp2->y).s2) + (uchar)(as_uchar4(comp2->x).s3) * (uchar)(as_uchar4(comp2->y).s3); + ushort updot16 = (ushort)(as_uchar4(comp2->x).s0) * (ushort)(as_uchar4(comp2->y).s0) + (ushort)(as_uchar4(comp2->x).s1) * (ushort)(as_uchar4(comp2->y).s1) + (ushort)(as_uchar4(comp2->x).s2) * (ushort)(as_uchar4(comp2->y).s2) + (ushort)(as_uchar4(comp2->x).s3) * (ushort)(as_uchar4(comp2->y).s3); + uint updot32 = (uint)(as_uchar4(comp2->x).s0) * (uint)(as_uchar4(comp2->y).s0) + (uint)(as_uchar4(comp2->x).s1) * (uint)(as_uchar4(comp2->y).s1) + (uint)(as_uchar4(comp2->x).s2) * (uint)(as_uchar4(comp2->y).s2) + (uint)(as_uchar4(comp2->x).s3) * (uint)(as_uchar4(comp2->y).s3); + uchar supdot8 = (uchar)(as_char4(comp2->x).s0) * (uchar)(as_uchar4(comp2->y).s0) + (uchar)(as_char4(comp2->x).s1) * (uchar)(as_uchar4(comp2->y).s1) + (uchar)(as_char4(comp2->x).s2) * (uchar)(as_uchar4(comp2->y).s2) + (uchar)(as_char4(comp2->x).s3) * (uchar)(as_uchar4(comp2->y).s3); + ushort supdot16 = (ushort)(as_char4(comp2->x).s0) * (ushort)(as_uchar4(comp2->y).s0) + (ushort)(as_char4(comp2->x).s1) * (ushort)(as_uchar4(comp2->y).s1) + (ushort)(as_char4(comp2->x).s2) * (ushort)(as_uchar4(comp2->y).s2) + (ushort)(as_char4(comp2->x).s3) * (ushort)(as_uchar4(comp2->y).s3); + uint supdot32 = (uint)(as_char4(comp2->x).s0) * (uint)(as_uchar4(comp2->y).s0) + (uint)(as_char4(comp2->x).s1) * (uint)(as_uchar4(comp2->y).s1) + (uint)(as_char4(comp2->x).s2) * (uint)(as_uchar4(comp2->y).s2) + (uint)(as_char4(comp2->x).s3) * (uint)(as_uchar4(comp2->y).s3); + int supdoti32 = (int)(as_char4(comp2->x).s0) * (int)(as_uchar4(comp2->y).s0) + (int)(as_char4(comp2->x).s1) * (int)(as_uchar4(comp2->y).s1) + (int)(as_char4(comp2->x).s2) * (int)(as_uchar4(comp2->y).s2) + (int)(as_char4(comp2->x).s3) * (int)(as_uchar4(comp2->y).s3); + int sdotaddsat_int = (int)add_sat((int)(as_short4(comp3->x).s0) * (int)(as_short4(comp3->y).s0) + (int)(as_short4(comp3->x).s1) * (int)(as_short4(comp3->y).s1) + (int)(as_short4(comp3->x).s2) * (int)(as_short4(comp3->y).s2) + (int)(as_short4(comp3->x).s3) * (int)(as_short4(comp3->y).s3), comp3->acc); + uint sdotaddsat_uint = (uint)add_sat((int)(as_short4(comp3->x).s0) * (int)(as_short4(comp3->y).s0) + (int)(as_short4(comp3->x).s1) * (int)(as_short4(comp3->y).s1) + (int)(as_short4(comp3->x).s2) * (int)(as_short4(comp3->y).s2) + (int)(as_short4(comp3->x).s3) * (int)(as_short4(comp3->y).s3), comp3->acc); + uint udotaddsat_uint = (uint)add_sat((uint)(comp3->x.s0) * (uint)(comp3->y.s0) + (uint)(comp3->x.s1) * (uint)(comp3->y.s1) + (uint)(comp3->x.s2) * (uint)(comp3->y.s2) + (uint)(comp3->x.s3) * (uint)(comp3->y.s3), as_uint(comp3->acc)); + int sudotaddsat_int = (int)add_sat((int)(as_short4(comp3->x).s0) * (int)(comp3->y.s0) + (int)(as_short4(comp3->x).s1) * (int)(comp3->y.s1) + (int)(as_short4(comp3->x).s2) * (int)(comp3->y.s2) + (int)(as_short4(comp3->x).s3) * (int)(comp3->y.s3), comp3->acc); + uint sudotaddsat_uint = (uint)add_sat((int)(as_short4(comp3->x).s0) * (int)(comp3->y.s0) + (int)(as_short4(comp3->x).s1) * (int)(comp3->y.s1) + (int)(as_short4(comp3->x).s2) * (int)(comp3->y.s2) + (int)(as_short4(comp3->x).s3) * (int)(comp3->y.s3), comp3->acc); +} + diff --git a/reference/shaders-opencl-no-opt/comp/intmin-literal.comp b/reference/shaders-opencl-no-opt/comp/intmin-literal.comp new file mode 100644 index 000000000..78eca3f61 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/intmin-literal.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float a; +}; + +typedef struct SSBO SSBO; + +struct UBO +{ + float b; +}; + +typedef struct UBO UBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float* _9, UBO _14) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _9[0] = as_float(as_int(_14.b) ^ (int)(0x80000000)); +} + diff --git a/reference/shaders-opencl-no-opt/comp/loop.comp b/reference/shaders-opencl-no-opt/comp/loop.comp new file mode 100644 index 000000000..84472618d --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/loop.comp @@ -0,0 +1,100 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float4 columns[4]; } spvMat4; + +struct SSBO +{ + spvMat4 mvp; + float4 in_data[1]; +}; + +typedef struct SSBO SSBO; + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +static float4 spvMulMat4Vec4(spvMat4 m, float4 v) +{ + return m.columns[0] * v.x + m.columns[1] * v.y + m.columns[2] * v.z + m.columns[3] * v.w; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global const SSBO* _24, __global float4* _177) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint ident_1 = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + float4 idat_1 = _24->in_data[ident_1]; + int k_1 = 0; + uint i_2 = 0u; + if (idat_1.y == 20.0f) + { + do + { + k_1 *= 2; + i_2 += as_uint(1); + } while (i_2 < ident_1); + } + switch (k_1) + { + case 10: + { + for (;;) + { + i_2 += as_uint(1); + if (i_2 > 10u) + { + break; + } + continue; + } + break; + } + default: + { + for (;;) + { + i_2 += 2u; + if (i_2 > 20u) + { + break; + } + continue; + } + break; + } + } + while (k_1 < 10) + { + idat_1 *= 2.0f; + k_1++; + } + for (uint i_1_1 = 0u; i_1_1 < 16u; i_1_1 += as_uint(1), k_1++) + { + for (uint j_1 = 0u; j_1 < 30u; j_1 += as_uint(1)) + { + idat_1 = spvMulMat4Vec4(_24->mvp, idat_1); + } + } + k_1 = 0; + for (;;) + { + k_1++; + if (k_1 > 10) + { + k_1 += 2; + } + else + { + k_1 += 3; + continue; + } + k_1 += 10; + continue; + } +} + diff --git a/reference/shaders-opencl-no-opt/comp/read-only-coherent-image.invalid.comp b/reference/shaders-opencl-no-opt/comp/read-only-coherent-image.invalid.comp new file mode 100644 index 000000000..0e8f8174f --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/read-only-coherent-image.invalid.comp @@ -0,0 +1,17 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + uint val; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global uint* _9, write_only image2d_t img) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _9[0] = read_imageui(img, (int2)(10)).x; +} + diff --git a/reference/shaders-opencl-no-opt/comp/return.comp b/reference/shaders-opencl-no-opt/comp/return.comp new file mode 100644 index 000000000..1f5bb5cb6 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/return.comp @@ -0,0 +1,39 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO2 +{ + float4 out_data[1]; +}; + +typedef struct SSBO2 SSBO2; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float4* _27) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + if (ident == 2u) + { + _27[ident] = (float4)(20.0f); + } + else + { + if (ident == 4u) + { + _27[ident] = (float4)(10.0f); + return; + } + } + int i = 0; + while (i < 20) + { + if (i == 10) + { + break; + } + return; + } + _27[ident] = (float4)(10.0f); +} + diff --git a/reference/shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp b/reference/shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp new file mode 100644 index 000000000..e69de29bb diff --git a/reference/shaders-opencl-no-opt/comp/std140-array-load-composite-construct.comp b/reference/shaders-opencl-no-opt/comp/std140-array-load-composite-construct.comp new file mode 100644 index 000000000..ba07a77f0 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/std140-array-load-composite-construct.comp @@ -0,0 +1,18 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float a[16]; + float4 b[16]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _14) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _14->b[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x] = (float4)(_14->a[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x]); +} + diff --git a/reference/shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/reference/shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp new file mode 100644 index 000000000..ae9db77c3 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp @@ -0,0 +1,144 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float2 columns[2]; } spvMat2; +typedef struct { float3 columns[2]; } spvMat2x3; +typedef struct { float2 columns[3]; } spvMat3x2; + +struct S0 +{ + float2 a[1]; + float b; +}; + +typedef struct S0 S0; + +struct S1 +{ + float3 a; + float b; +}; + +typedef struct S1 S1; + +struct S2 +{ + float3 a[1]; + float b; +}; + +typedef struct S2 S2; + +struct S3 +{ + float2 a; + float b; +}; + +typedef struct S3 S3; + +struct Content +{ + S0 m0s[1]; + S1 m1s[1]; + S2 m2s[1]; + S0 m0; + S1 m1; + S2 m2; + S3 m3; + float m4; +}; + +typedef struct Content Content; + +struct SSBO1 +{ + Content content; + Content content1[2]; + Content content2; + spvMat2 m0; + spvMat2 m1; + spvMat2x3 m2[4]; + spvMat3x2 m3; + spvMat2 m4; + spvMat2 m5[9]; + spvMat3x2 m6[4][2]; + spvMat2x3 m7; + float array[1]; +}; + +typedef struct SSBO1 SSBO1; + +struct SSBO0 +{ + Content content; + Content content1[2]; + Content content2; + spvMat2 m0; + spvMat2 m1; + spvMat2x3 m2[4]; + spvMat3x2 m3; + spvMat2 m4; + spvMat2 m5[9]; + spvMat3x2 m6[4][2]; + spvMat2x3 m7; + float array[1]; +}; + +typedef struct SSBO0 SSBO0; + +struct SSBO2 +{ + float m0; + spvMat2 m1; + spvMat2x3 m2; +}; + +typedef struct SSBO2 SSBO2; + +static float3 spvMulMat2x3Vec2(spvMat2x3 m, float2 v) +{ + return m.columns[0] * v.x + m.columns[1] * v.y; +} + +static spvMat2 spvTransposeMat2(spvMat2 m) +{ + spvMat2 r; + r.columns[0] = (float2)(m.columns[0].x, m.columns[1].x); + r.columns[1] = (float2)(m.columns[0].y, m.columns[1].y); + return r; +} + +static spvMat2x3 spvTransposeMat3x2(spvMat3x2 m) +{ + spvMat2x3 r; + r.columns[0] = (float3)(m.columns[0].x, m.columns[1].x, m.columns[2].x); + r.columns[1] = (float3)(m.columns[0].y, m.columns[1].y, m.columns[2].y); + return r; +} + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO1* ssbo_scalar, __global SSBO0* ssbo_140, __global SSBO2* ssbo_scalar2) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + ssbo_scalar->content.m0s[0].a[0] = ssbo_140->content.m0s[0].a[0]; + ssbo_scalar->content.m0s[0].b = ssbo_140->content.m0s[0].b; + ssbo_scalar->content.m1s[0].a = ssbo_140->content.m1s[0].a; + ssbo_scalar->content.m1s[0].b = ssbo_140->content.m1s[0].b; + ssbo_scalar->content.m2s[0].a[0] = ssbo_140->content.m2s[0].a[0]; + ssbo_scalar->content.m2s[0].b = ssbo_140->content.m2s[0].b; + ssbo_scalar->content.m0.a[0] = ssbo_140->content.m0.a[0]; + ssbo_scalar->content.m0.b = ssbo_140->content.m0.b; + ssbo_scalar->content.m1.a = ssbo_140->content.m1.a; + ssbo_scalar->content.m1.b = ssbo_140->content.m1.b; + ssbo_scalar->content.m2.a[0] = ssbo_140->content.m2.a[0]; + ssbo_scalar->content.m2.b = ssbo_140->content.m2.b; + ssbo_scalar->content.m3.a = ssbo_140->content.m3.a; + ssbo_scalar->content.m3.b = ssbo_140->content.m3.b; + ssbo_scalar->content.m4 = ssbo_140->content.m4; + ssbo_scalar->content.m1.a = spvMulMat2x3Vec2(ssbo_scalar->m2[1], ssbo_scalar->content.m0.a[0]); + ssbo_scalar->m0 = ssbo_scalar2->m1; + ssbo_scalar2->m1 = spvTransposeMat2(ssbo_scalar->m4); + ssbo_scalar2->m2 = spvTransposeMat3x2(ssbo_scalar->m3); +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp b/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp new file mode 100644 index 000000000..e69de29bb diff --git a/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp b/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp new file mode 100644 index 000000000..e69de29bb diff --git a/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp b/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp new file mode 100644 index 000000000..e69de29bb diff --git a/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp b/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp new file mode 100644 index 000000000..e69de29bb diff --git a/reference/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp16.fp16.comp b/reference/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp16.fp16.comp new file mode 100644 index 000000000..dcc01cef3 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp16.fp16.comp @@ -0,0 +1,35 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +struct SSBO +{ + float v[4]; + half f16[4]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global SSBO* _14) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = cos(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sin(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += tan(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += acos(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += asin(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += atan(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += exp(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += exp2(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += log(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += log2(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sqrt(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += rsqrt(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += pow(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))], 4.0f); + _14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = cos(_14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sin(_14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += cosh(_14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sinh(_14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); +} + diff --git a/reference/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp32.fp16.comp b/reference/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp32.fp16.comp new file mode 100644 index 000000000..dcc01cef3 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp32.fp16.comp @@ -0,0 +1,35 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +struct SSBO +{ + float v[4]; + half f16[4]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(4, 1, 1))) +__kernel void comp_main(__global SSBO* _14) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = cos(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sin(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += tan(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += acos(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += asin(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += atan(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += exp(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += exp2(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += log(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += log2(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sqrt(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += rsqrt(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += pow(_14->v[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))], 4.0f); + _14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = cos(_14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sin(_14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += cosh(_14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); + _14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] += sinh(_14->f16[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]); +} + diff --git a/reference/shaders-opencl-no-opt/comp/transposed-temporary-expression-2.comp b/reference/shaders-opencl-no-opt/comp/transposed-temporary-expression-2.comp new file mode 100644 index 000000000..be55a5acb --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/transposed-temporary-expression-2.comp @@ -0,0 +1,56 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float4 columns[3]; } spvMat3x4; +typedef struct { float3 columns[4]; } spvMat4x3; + +struct SSBO +{ + spvMat3x4 A; + spvMat3x4 B; + spvMat3x4 C; + float4 D; + float w0; + float w1; +}; + +typedef struct SSBO SSBO; + +static float3 spvMulMat4x3Vec4(spvMat4x3 m, float4 v) +{ + return m.columns[0] * v.x + m.columns[1] * v.y + m.columns[2] * v.z + m.columns[3] * v.w; +} + +static spvMat3x4 spvMulMat3x4Scalar(spvMat3x4 m, float s) +{ + spvMat3x4 r; + r.columns[0] = m.columns[0] * s; + r.columns[1] = m.columns[1] * s; + r.columns[2] = m.columns[2] * s; + return r; +} + +static spvMat4x3 spvTransposeMat3x4(spvMat3x4 m) +{ + spvMat4x3 r; + r.columns[0] = (float3)(m.columns[0].x, m.columns[1].x, m.columns[2].x); + r.columns[1] = (float3)(m.columns[0].y, m.columns[1].y, m.columns[2].y); + r.columns[2] = (float3)(m.columns[0].z, m.columns[1].z, m.columns[2].z); + r.columns[3] = (float3)(m.columns[0].w, m.columns[1].w, m.columns[2].w); + return r; +} + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void comp_main(__global SSBO* _18) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + spvMat4x3 Anew_1; + spvMat4x3 Bnew_1; + do + { + Anew_1 = spvTransposeMat3x4(spvMulMat3x4Scalar(_18->A, _18->w0)); + Bnew_1 = spvTransposeMat3x4(spvMulMat3x4Scalar(_18->B, _18->w1)); + } while (false); + _18->D = (float4)(spvMulMat4x3Vec4((spvMat4x3){ { Anew_1.columns[0] + Bnew_1.columns[0], Anew_1.columns[1] + Bnew_1.columns[1], Anew_1.columns[2] + Bnew_1.columns[2], Anew_1.columns[3] + Bnew_1.columns[3] } }, _18->D), 1.0f); +} + diff --git a/reference/shaders-opencl-no-opt/comp/transposed-temporary-expression.comp b/reference/shaders-opencl-no-opt/comp/transposed-temporary-expression.comp new file mode 100644 index 000000000..669835c16 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/transposed-temporary-expression.comp @@ -0,0 +1,41 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float4 columns[3]; } spvMat3x4; +typedef struct { float3 columns[4]; } spvMat4x3; + +struct SSBO +{ + spvMat3x4 A; + spvMat3x4 B; + spvMat3x4 C; + float4 D; + float w0; + float w1; +}; + +typedef struct SSBO SSBO; + +static float3 spvMulMat4x3Vec4(spvMat4x3 m, float4 v) +{ + return m.columns[0] * v.x + m.columns[1] * v.y + m.columns[2] * v.z + m.columns[3] * v.w; +} + +static spvMat3x4 spvMulMat3x4Scalar(spvMat3x4 m, float s) +{ + spvMat3x4 r; + r.columns[0] = m.columns[0] * s; + r.columns[1] = m.columns[1] * s; + r.columns[2] = m.columns[2] * s; + return r; +} + +__attribute__((reqd_work_group_size(64, 1, 1))) +__kernel void comp_main(__global SSBO* _12) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + spvMat3x4 _23 = spvMulMat3x4Scalar(_12->A, _12->w0); + spvMat3x4 _30 = spvMulMat3x4Scalar(_12->B, _12->w1); + _12->D = (float4)(spvMulMat4x3Vec4((spvMat4x3){ { (float3)(_23.columns[0][0], _23.columns[1][0], _23.columns[2][0]) + (float3)(_30.columns[0][0], _30.columns[1][0], _30.columns[2][0]), (float3)(_23.columns[0][1], _23.columns[1][1], _23.columns[2][1]) + (float3)(_30.columns[0][1], _30.columns[1][1], _30.columns[2][1]), (float3)(_23.columns[0][2], _23.columns[1][2], _23.columns[2][2]) + (float3)(_30.columns[0][2], _30.columns[1][2], _30.columns[2][2]), (float3)(_23.columns[0][3], _23.columns[1][3], _23.columns[2][3]) + (float3)(_30.columns[0][3], _30.columns[1][3], _30.columns[2][3]) } }, _12->D), 1.0f); +} + diff --git a/reference/shaders-opencl-no-opt/comp/trivial-select-cast-vector.comp b/reference/shaders-opencl-no-opt/comp/trivial-select-cast-vector.comp new file mode 100644 index 000000000..6e105e28a --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/trivial-select-cast-vector.comp @@ -0,0 +1,19 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct A +{ + float3 a; + float3 b; +}; + +typedef struct A A; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global A* _14) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + int3 c = _14->b < (float3)(1.0f); + _14->a = select((float3)(1.0f, 0.0f, 0.0f), (float3)(0.0f, 0.0f, 1.0f), c); +} + diff --git a/reference/shaders-opencl-no-opt/comp/trivial-select-matrix.spv14.comp b/reference/shaders-opencl-no-opt/comp/trivial-select-matrix.spv14.comp new file mode 100644 index 000000000..cf0dbbc85 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/trivial-select-matrix.spv14.comp @@ -0,0 +1,22 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +typedef struct { float3 columns[3]; } spvMat3; + +struct A +{ + spvMat3 a; + float b; +}; + +typedef struct A A; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global A* _14) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + bool c = _14->b < 1.0f; + _14->a = c ? ((spvMat3){ { (float3)(1.0f), (float3)(1.0f), (float3)(1.0f) } }) : ((spvMat3){ { (float3)(0.0f), (float3)(0.0f), (float3)(0.0f) } }); + _14->a = c ? ((spvMat3){ { (float3)(1.0f, 0.0f, 0.0f), (float3)(0.0f, 1.0f, 0.0f), (float3)(0.0f, 0.0f, 1.0f) } }) : ((spvMat3){ { (float3)(0.0f), (float3)(0.0f), (float3)(0.0f) } }); +} + diff --git a/reference/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.invalid.comp b/reference/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.invalid.comp new file mode 100644 index 000000000..2f0fe4a37 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.invalid.comp @@ -0,0 +1,58 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 2 +#endif +constant int A = SPIRV_CROSS_CONSTANT_ID_1; +#define _20 ((as_uint(A) + 0u)) +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 1u +#endif +constant uint _21 = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 1u +#endif +constant uint _22 = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 1u +#endif +constant uint _23 = SPIRV_CROSS_CONSTANT_ID_3; +constant uint3 spvWorkgroupSize = (uint3)(_21, _22, _23); +#define _26 (_21) +#define _27 ((_20 * _26)) +#define _31 ((as_uint(A) + 0u)) +#define _32 (_21) +#define _33 ((_31 * _32)) +#define _59 ((as_uint(A) + 0u)) +#define _61 (_23) +#define _62 ((_59 * _61)) + +struct SSBO +{ + int I; + float V; +}; + +typedef struct SSBO SSBO; + +#define _88 ((as_uint(A) + 0u)) +#define _89 (_23) +#define _90 ((_88 * _89)) + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _76) +{ + float D[_33]; + float E[_90]; + for (int i = 0; as_uint(i) < _27; i++) + { + D[i] = 1.0f + convert_float(as_uint(i) + spvWorkgroupSize.y); + } + for (int i_1 = 0; as_uint(i_1) < _62; i_1++) + { + D[i_1] = 1.0f + convert_float(as_uint(i_1) + spvWorkgroupSize.y); + } + _76->V = (D[_76->I] + D[_76->I ^ 1]) + E[_76->I]; +} + diff --git a/reference/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.spv16.invalid.comp b/reference/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.spv16.invalid.comp new file mode 100644 index 000000000..b04d8391b --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.spv16.invalid.comp @@ -0,0 +1,70 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 1u +#endif +constant uint _7 = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 1u +#endif +constant uint _8 = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 1u +#endif +constant uint _9 = SPIRV_CROSS_CONSTANT_ID_3; +#ifndef SPIRV_CROSS_CONSTANT_ID_1 +#define SPIRV_CROSS_CONSTANT_ID_1 2 +#endif +constant int A = SPIRV_CROSS_CONSTANT_ID_1; +#define _23 ((as_uint(A) + 0u)) +#ifndef SPIRV_CROSS_CONSTANT_ID_0 +#define SPIRV_CROSS_CONSTANT_ID_0 1u +#endif +constant uint _24 = SPIRV_CROSS_CONSTANT_ID_0; +#ifndef SPIRV_CROSS_CONSTANT_ID_2 +#define SPIRV_CROSS_CONSTANT_ID_2 1u +#endif +constant uint _25 = SPIRV_CROSS_CONSTANT_ID_2; +#ifndef SPIRV_CROSS_CONSTANT_ID_3 +#define SPIRV_CROSS_CONSTANT_ID_3 1u +#endif +constant uint _26 = SPIRV_CROSS_CONSTANT_ID_3; +constant uint3 _28 = (uint3)(_24, _25, _26); +#define _29 (_24) +#define _30 ((_23 * _29)) +#define _34 ((as_uint(A) + 0u)) +#define _35 (_24) +#define _36 ((_34 * _35)) +#define _62 ((as_uint(A) + 0u)) +#define _64 (_26) +#define _65 ((_62 * _64)) + +struct SSBO +{ + int I; + float V; +}; + +typedef struct SSBO SSBO; + +#define _91 ((as_uint(A) + 0u)) +#define _92 (_26) +#define _93 ((_91 * _92)) + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _79) +{ + float D[_36]; + float E[_93]; + for (int i = 0; as_uint(i) < _30; i++) + { + D[i] = 1.0f + convert_float(as_uint(i) + _28.y); + } + for (int i_1 = 0; as_uint(i_1) < _65; i_1++) + { + D[i_1] = 1.0f + convert_float(as_uint(i_1) + _28.y); + } + _79->V = (D[_79->I] + D[_79->I ^ 1]) + E[_79->I]; +} + diff --git a/shaders-opencl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp b/shaders-opencl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp new file mode 100644 index 000000000..87aee2db5 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/aliased-struct-divergent-member-name.asm.comp @@ -0,0 +1,77 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 37 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %T "T" + OpMemberName %T 0 "a" + OpName %v "v" + OpName %T_0 "T" + OpMemberName %T_0 0 "b" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "foo" + OpName %_ "" + OpName %T_1 "T" + OpMemberName %T_1 0 "c" + OpName %SSBO2 "SSBO2" + OpMemberName %SSBO2 0 "bar" + OpName %__0 "" + OpMemberDecorate %T_0 0 Offset 0 + OpDecorate %_runtimearr_T_0 ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpMemberDecorate %T_1 0 Offset 0 + OpDecorate %_runtimearr_T_1 ArrayStride 16 + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %SSBO2 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 1 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %T = OpTypeStruct %float +%_ptr_Function_T = OpTypePointer Function %T + %float_40 = OpConstant %float 40 + %11 = OpConstantComposite %T %float_40 + %T_0 = OpTypeStruct %float +%_runtimearr_T_0 = OpTypeRuntimeArray %T_0 + %SSBO1 = OpTypeStruct %_runtimearr_T_0 +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_10 = OpConstant %int 10 +%_ptr_Uniform_T_0 = OpTypePointer Uniform %T_0 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %T_1 = OpTypeStruct %float +%_runtimearr_T_1 = OpTypeRuntimeArray %T_1 + %SSBO2 = OpTypeStruct %_runtimearr_T_1 +%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2 + %__0 = OpVariable %_ptr_Uniform_SSBO2 Uniform + %int_30 = OpConstant %int 30 +%_ptr_Uniform_T_1 = OpTypePointer Uniform %T_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %v = OpVariable %_ptr_Function_T Function + OpStore %v %11 + %20 = OpLoad %T %v + %22 = OpAccessChain %_ptr_Uniform_T_0 %_ %int_0 %int_10 + %23 = OpCompositeExtract %float %20 0 + %25 = OpAccessChain %_ptr_Uniform_float %22 %int_0 + OpStore %25 %23 + %32 = OpLoad %T %v + %34 = OpAccessChain %_ptr_Uniform_T_1 %__0 %int_0 %int_30 + %35 = OpCompositeExtract %float %32 0 + %36 = OpAccessChain %_ptr_Uniform_float %34 %int_0 + OpStore %36 %35 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/arithmetic-conversion-signs.asm.comp b/shaders-opencl-no-opt/asm/comp/arithmetic-conversion-signs.asm.comp new file mode 100644 index 000000000..0e1ce235d --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/arithmetic-conversion-signs.asm.comp @@ -0,0 +1,131 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 76 +; Schema: 0 + OpCapability Shader + OpCapability Int16 + OpCapability StorageBuffer16BitAccess + OpExtension "SPV_KHR_16bit_storage" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types_int16" + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "s32" + OpMemberName %SSBO 1 "u32" + OpMemberName %SSBO 2 "s16" + OpMemberName %SSBO 3 "u16" + OpMemberName %SSBO 4 "f32" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpMemberDecorate %SSBO 2 Offset 8 + OpMemberDecorate %SSBO 3 Offset 10 + OpMemberDecorate %SSBO 4 Offset 12 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %short = OpTypeInt 16 1 + %ushort = OpTypeInt 16 0 + %float = OpTypeFloat 32 + %SSBO = OpTypeStruct %int %uint %short %ushort %float +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_2 = OpConstant %int 2 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_int = OpTypePointer Uniform %int +%_ptr_Uniform_short = OpTypePointer Uniform %short + %int_1 = OpConstant %int 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %int_3 = OpConstant %int 3 +%_ptr_Uniform_ushort = OpTypePointer Uniform %ushort + %int_4 = OpConstant %int 4 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %main = OpFunction %void None %3 + %5 = OpLabel + %ptr_s32 = OpAccessChain %_ptr_Uniform_int %_ %int_0 + %ptr_u32 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 + %ptr_s16 = OpAccessChain %_ptr_Uniform_short %_ %int_2 + %ptr_u16 = OpAccessChain %_ptr_Uniform_ushort %_ %int_3 + %ptr_f32 = OpAccessChain %_ptr_Uniform_float %_ %int_4 + %s32 = OpLoad %int %ptr_s32 + %u32 = OpLoad %uint %ptr_u32 + %s16 = OpLoad %short %ptr_s16 + %u16 = OpLoad %ushort %ptr_u16 + %f32 = OpLoad %float %ptr_f32 + + ; Sign-extend + %s16_to_s32_signed = OpSConvert %int %s16 + OpStore %ptr_s32 %s16_to_s32_signed + %s16_to_u32_signed = OpSConvert %uint %s16 + OpStore %ptr_u32 %s16_to_u32_signed + + %u16_to_s32_signed = OpSConvert %int %u16 + OpStore %ptr_s32 %u16_to_s32_signed + %u16_to_u32_signed = OpSConvert %uint %u16 + OpStore %ptr_u32 %u16_to_u32_signed + + ; Zero-extend + ; Result must be unsigned for OpUConvert. + ;%s16_to_s32_unsigned = OpUConvert %int %s16 + ;OpStore %ptr_s32 %s16_to_s32_unsigned + %s16_to_u32_unsigned = OpUConvert %uint %s16 + OpStore %ptr_u32 %s16_to_u32_unsigned + + ;%u16_to_s32_unsigned = OpUConvert %int %u16 + ;OpStore %ptr_s32 %u16_to_s32_unsigned + %u16_to_u32_unsigned = OpUConvert %uint %u16 + OpStore %ptr_u32 %u16_to_u32_unsigned + + ; Truncate (SConvert == UConvert) + %s32_to_s16_signed = OpSConvert %short %s32 + OpStore %ptr_s16 %s32_to_s16_signed + %s32_to_u16_signed = OpSConvert %ushort %s32 + OpStore %ptr_u16 %s32_to_u16_signed + + %u32_to_s16_signed = OpSConvert %short %u32 + OpStore %ptr_s16 %u32_to_s16_signed + %u32_to_u16_signed = OpSConvert %ushort %u32 + OpStore %ptr_u16 %u32_to_u16_signed + + ;%s32_to_s16_unsigned = OpUConvert %short %s32 + ;OpStore %ptr_s16 %s32_to_s16_unsigned + %s32_to_u16_unsigned = OpUConvert %ushort %s32 + OpStore %ptr_u16 %s32_to_u16_unsigned + + ;%u32_to_s16_unsigned = OpUConvert %short %u32 + ;OpStore %ptr_s16 %u32_to_s16_unsigned + %u32_to_u16_unsigned = OpUConvert %ushort %u32 + OpStore %ptr_u16 %u32_to_u16_unsigned + + ; SToF + %s16_to_f32_signed = OpConvertSToF %float %s16 + OpStore %ptr_f32 %s16_to_f32_signed + %u16_to_f32_signed = OpConvertSToF %float %u16 + OpStore %ptr_f32 %u16_to_f32_signed + %s16_to_f32_unsigned = OpConvertUToF %float %s16 + OpStore %ptr_f32 %s16_to_f32_unsigned + %u16_to_f32_unsigned = OpConvertUToF %float %u16 + OpStore %ptr_f32 %u16_to_f32_unsigned + + ; FToS + %f32_to_s16_signed = OpConvertFToS %short %f32 + OpStore %ptr_s16 %f32_to_s16_signed + %f32_to_u16_signed = OpConvertFToS %ushort %f32 + OpStore %ptr_u16 %f32_to_u16_signed + + ; FToU + %f32_to_u16_unsigned = OpConvertFToU %ushort %f32 + OpStore %ptr_u16 %f32_to_u16_unsigned + ; Result must be unsigned for FToU, so don't bother testing that. + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp new file mode 100644 index 000000000..bc465285e --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp @@ -0,0 +1,47 @@ + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %a "a" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "b" + OpMemberName %SSBO 1 "c" + OpName %_ "" + OpDecorate %_arr_float_uint_5 ArrayStride 16 + OpDecorate %SSBO BufferBlock + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 80 + OpDecorate %_ Binding 0 + OpDecorate %_ DescriptorSet 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %uint_5 = OpConstant %uint 5 +%_arr_float_uint_5 = OpTypeArray %float %uint_5 +%_ptr_Function__arr_float_uint_5 = OpTypePointer Function %_arr_float_uint_5 +%_ptr_Uniform__arr_float_uint_5 = OpTypePointer Uniform %_arr_float_uint_5 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Uniform_float = OpTypePointer Uniform %float + %SSBO = OpTypeStruct %_arr_float_uint_5 %_arr_float_uint_5 +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %main = OpFunction %void None %3 + %5 = OpLabel + %a = OpVariable %_ptr_Function__arr_float_uint_5 Function + %ptr_b = OpAccessChain %_ptr_Uniform__arr_float_uint_5 %_ %int_0 + %ptr_c = OpAccessChain %_ptr_Uniform__arr_float_uint_5 %_ %int_1 + %loaded_b = OpLoad %_arr_float_uint_5 %ptr_b + OpStore %a %loaded_b + OpCopyMemory %a %ptr_b + %loaded_a = OpLoad %_arr_float_uint_5 %a + OpStore %ptr_b %loaded_a + OpCopyMemory %ptr_c %a + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/atomic-load-store.asm.comp b/shaders-opencl-no-opt/asm/comp/atomic-load-store.asm.comp new file mode 100644 index 000000000..3f2d141a1 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/atomic-load-store.asm.comp @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 23 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %c "c" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %SSBO = OpTypeStruct %uint %uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %c = OpVariable %_ptr_Function_uint Function + %15 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 + %16 = OpAtomicLoad %uint %15 %int_1 %int_0 + OpStore %c %16 + %18 = OpLoad %uint %c + %19 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + OpAtomicStore %19 %int_1 %int_0 %18 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/atomic-min-max-sign.asm.comp b/shaders-opencl-no-opt/asm/comp/atomic-min-max-sign.asm.comp new file mode 100644 index 000000000..832a27354 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/atomic-min-max-sign.asm.comp @@ -0,0 +1,56 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 30 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %int = OpTypeInt 32 1 + %SSBO = OpTypeStruct %uint %int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 +%uint_4294967295 = OpConstant %uint 4294967295 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_int = OpTypePointer Uniform %int + %int_n3 = OpConstant %int -3 + %int_4 = OpConstant %int 4 + %v3uint = OpTypeVector %uint 3 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %13 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + %18 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + %22 = OpAccessChain %_ptr_Uniform_int %_ %int_1 + %25 = OpAccessChain %_ptr_Uniform_int %_ %int_1 + %30 = OpAtomicUMax %uint %13 %uint_1 %uint_0 %uint_1 + %31 = OpAtomicSMin %uint %13 %uint_1 %uint_0 %uint_1 + %32 = OpAtomicUMin %uint %18 %uint_1 %uint_0 %uint_4294967295 + %33 = OpAtomicSMax %uint %18 %uint_1 %uint_0 %uint_4294967295 + %34 = OpAtomicSMax %int %22 %uint_1 %uint_0 %int_n3 + %35 = OpAtomicUMin %int %22 %uint_1 %uint_0 %int_n3 + %36 = OpAtomicSMin %int %25 %uint_1 %uint_0 %int_4 + %37 = OpAtomicUMax %int %25 %uint_1 %uint_0 %int_4 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/atomic-result-temporary.asm.comp b/shaders-opencl-no-opt/asm/comp/atomic-result-temporary.asm.comp new file mode 100644 index 000000000..a32384159 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/atomic-result-temporary.asm.comp @@ -0,0 +1,59 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 35 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "count" + OpMemberName %SSBO 1 "data" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO = OpTypeStruct %uint %_runtimearr_uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 + %uint_1024 = OpConstant %uint 1024 + %bool = OpTypeBool + %int_1 = OpConstant %int 1 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Input_uint = OpTypePointer Input %uint + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 + %19 = OpAtomicIAdd %uint %16 %uint_1 %uint_0 %uint_1 + %23 = OpULessThan %bool %19 %uint_1024 + OpSelectionMerge %25 None + OpBranchConditional %23 %24 %25 + %24 = OpLabel + %32 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %33 = OpLoad %uint %32 + %34 = OpAccessChain %_ptr_Uniform_uint %_ %int_1 %19 + OpStore %34 %33 + OpBranch %25 + %25 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/bda-arguments.asm.comp b/shaders-opencl-no-opt/asm/comp/bda-arguments.asm.comp new file mode 100644 index 000000000..034e5ae0c --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/bda-arguments.asm.comp @@ -0,0 +1,81 @@ + OpCapability Shader + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_KHR_physical_storage_buffer" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_buffer_reference" + OpDecorate %Foo Block + OpMemberDecorate %Foo 0 Offset 0 + OpMemberDecorate %Foo 1 Offset 8 + OpDecorate %foo Aliased + OpDecorate %vp Aliased + OpDecorate %ppp Aliased + OpDecorate %p2 Aliased + OpDecorate %dummyarg1 Restrict + OpDecorate %dummyarg3 Restrict + OpDecorate %dummyarg4 Restrict + OpDecorate %dummyarg5 Restrict + + OpDecorate %Registers Block + OpMemberDecorate %Registers 0 Offset 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_Foo PhysicalStorageBuffer + %int = OpTypeInt 32 1 +%_ptr_PhysicalStorageBuffer_int = OpTypePointer PhysicalStorageBuffer %int +%_ptr_PhysicalStorageBuffer_int_int = OpTypePointer PhysicalStorageBuffer %_ptr_PhysicalStorageBuffer_int + %Foo = OpTypeStruct %int %_ptr_PhysicalStorageBuffer_int +%_ptr_PhysicalStorageBuffer_Foo = OpTypePointer PhysicalStorageBuffer %Foo +%_ptr_Function__ptr_PhysicalStorageBuffer_Foo = OpTypePointer Function %_ptr_PhysicalStorageBuffer_Foo + %11 = OpTypeFunction %void %_ptr_PhysicalStorageBuffer_Foo %int %_ptr_PhysicalStorageBuffer_int %_ptr_PhysicalStorageBuffer_int_int %_ptr_PhysicalStorageBuffer_int + %int_0 = OpConstant %int 0 + %int_1 = OpConstant %int 1 + %Registers = OpTypeStruct %_ptr_PhysicalStorageBuffer_Foo +%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers + %_ = OpVariable %_ptr_PushConstant_Registers PushConstant + %int_40 = OpConstant %int 40 +%_ptr_PushConstant__ptr_PhysicalStorageBuffer_Foo = OpTypePointer PushConstant %_ptr_PhysicalStorageBuffer_Foo + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %27 = OpAccessChain %_ptr_PushConstant__ptr_PhysicalStorageBuffer_Foo %_ %int_0 + %28 = OpLoad %_ptr_PhysicalStorageBuffer_Foo %27 + %29 = OpAccessChain %_ptr_PhysicalStorageBuffer_int %28 %int_0 + %pp = OpAccessChain %_ptr_PhysicalStorageBuffer_int_int %28 %int_1 + %pp_loaded = OpLoad %_ptr_PhysicalStorageBuffer_int %pp Aligned 8 + %30 = OpFunctionCall %void %func_1_i1_ %28 %int_40 %29 %pp %pp_loaded + OpReturn + OpFunctionEnd + %func_1_i1_ = OpFunction %void None %11 + %foo = OpFunctionParameter %_ptr_PhysicalStorageBuffer_Foo + %v = OpFunctionParameter %int + %vp = OpFunctionParameter %_ptr_PhysicalStorageBuffer_int + %ppp = OpFunctionParameter %_ptr_PhysicalStorageBuffer_int_int + %p2 = OpFunctionParameter %_ptr_PhysicalStorageBuffer_int + %15 = OpLabel + %dummy_call = OpFunctionCall %void %func_dummy %foo %v %vp %ppp %p2 + %20 = OpAccessChain %_ptr_PhysicalStorageBuffer_int %foo %int_0 + OpStore %20 %v Aligned 16 + OpStore %vp %v Aligned 4 + OpStore %p2 %v Aligned 4 + OpStore %ppp %p2 Aligned 8 + OpReturn + OpFunctionEnd + +%func_dummy = OpFunction %void None %11 + %dummyarg1 = OpFunctionParameter %_ptr_PhysicalStorageBuffer_Foo + %dummyarg2 = OpFunctionParameter %int + %dummyarg3 = OpFunctionParameter %_ptr_PhysicalStorageBuffer_int + %dummyarg4 = OpFunctionParameter %_ptr_PhysicalStorageBuffer_int_int + %dummyarg5 = OpFunctionParameter %_ptr_PhysicalStorageBuffer_int + %dummylabel = OpLabel + OpReturn + OpFunctionEnd + diff --git a/shaders-opencl-no-opt/asm/comp/bda-to-array-in-buffer.invalid.asm.spv16.comp b/shaders-opencl-no-opt/asm/comp/bda-to-array-in-buffer.invalid.asm.spv16.comp new file mode 100644 index 000000000..0cca78f6a --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/bda-to-array-in-buffer.invalid.asm.spv16.comp @@ -0,0 +1,71 @@ +; SPIR-V +; Version: 1.5 +; Generator: Khronos; 35 +; Bound: 5550 +; Schema: 0 + OpCapability Int8 + OpCapability Int64 + OpCapability Int64 + OpCapability Shader + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_KHR_physical_storage_buffer" + OpMemoryModel PhysicalStorageBuffer64 Simple + OpEntryPoint GLCompute %main "main" %globals + OpExecutionMode %main LocalSize 16 16 1 + + OpDecorate %ptr_uchar ArrayStride 8 + OpDecorate %ptr_uint ArrayStride 8 + OpDecorate %ptr_array_t ArrayStride 8 + OpDecorate %array_t ArrayStride 1 + OpDecorate %struct_t Block + OpMemberDecorate %struct_t 0 Offset 0 + OpMemberDecorate %struct_t 1 Offset 8 + OpDecorate %ptr_struct ArrayStride 32 + OpDecorate %globals DescriptorSet 0 + OpDecorate %globals Binding 0 + + %void = OpTypeVoid + %uchar = OpTypeInt 8 0 + %uint = OpTypeInt 32 0 + %ulong = OpTypeInt 64 0 + %bool = OpTypeBool + + %ulong_12 = OpConstant %ulong 12 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uchar_69 = OpConstant %uchar 69 + %ulong_16 = OpConstant %ulong 16 + + %ptr_uint = OpTypePointer PhysicalStorageBuffer %uint + %ptr_uchar = OpTypePointer PhysicalStorageBuffer %uchar + + %array_t = OpTypeArray %uchar %ulong_12 +%ptr_array_t = OpTypePointer PhysicalStorageBuffer %array_t + + %struct_t = OpTypeStruct %ptr_uchar %ptr_array_t + %ptr_struct = OpTypePointer StorageBuffer %struct_t + + %void_fn = OpTypeFunction %void + %foo_t = OpTypeFunction %ptr_uint + +%ptr_uchararr_sb = OpTypePointer StorageBuffer %ptr_array_t + + %globals = OpVariable %ptr_struct StorageBuffer + + %foo = OpFunction %ptr_uint None %foo_t + %foo_entry = OpLabel + %lea2 = OpAccessChain %ptr_uchararr_sb %globals %uint_1 + %loaded2 = OpLoad %ptr_array_t %lea2 + %cast = OpConvertPtrToU %ulong %loaded2 + %adjusted = OpIAdd %ulong %cast %ulong_16 + %cast2 = OpConvertUToPtr %ptr_uint %adjusted + OpStore %cast2 %uint_1 Aligned 4 ; eliminating this store generates different code and the problem disappears + OpReturnValue %cast2 + OpFunctionEnd + + %main = OpFunction %void None %void_fn + %main_entry = OpLabel + %nothing = OpFunctionCall %ptr_uint %foo + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.invalid.asm.comp new file mode 100644 index 000000000..3651a4de5 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.invalid.asm.comp @@ -0,0 +1,63 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 33 +; Schema: 0 + OpCapability Shader + OpCapability Float16 + OpCapability StorageBuffer16BitAccess + OpExtension "SPV_KHR_16bit_storage" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_shader_explicit_arithmetic_types" + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpMemberName %SSBO 2 "c" + OpMemberName %SSBO 3 "d" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpMemberDecorate %SSBO 2 Offset 8 + OpMemberDecorate %SSBO 3 Offset 12 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %half = OpTypeFloat 16 + %v2half = OpTypeVector %half 2 + %float = OpTypeFloat 32 + %SSBO = OpTypeStruct %v2half %float %float %v2half +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_v2half = OpTypePointer Uniform %v2half + %uint = OpTypeInt 32 0 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %int_3 = OpConstant %int 3 + %int_2 = OpConstant %int 2 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %16 = OpAccessChain %_ptr_Uniform_v2half %_ %int_0 + %17 = OpLoad %v2half %16 + %20 = OpBitcast %float %17 + %22 = OpAccessChain %_ptr_Uniform_float %_ %int_1 + OpStore %22 %20 + %25 = OpAccessChain %_ptr_Uniform_float %_ %int_2 + %26 = OpLoad %float %25 + %28 = OpBitcast %v2half %26 + %29 = OpAccessChain %_ptr_Uniform_v2half %_ %int_3 + OpStore %29 %28 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/bitfield-signed-operations.asm.comp b/shaders-opencl-no-opt/asm/comp/bitfield-signed-operations.asm.comp new file mode 100644 index 000000000..435fa3222 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/bitfield-signed-operations.asm.comp @@ -0,0 +1,97 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 26 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "ints" + OpMemberName %SSBO 1 "uints" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 16 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %v4int = OpTypeVector %int 4 + %uint = OpTypeInt 32 0 + %v4uint = OpTypeVector %uint 4 + + %int_1 = OpConstant %int 1 + %uint_11 = OpConstant %uint 11 + + %SSBO = OpTypeStruct %v4int %v4uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int +%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint + %main = OpFunction %void None %3 + %5 = OpLabel + %ints_ptr = OpAccessChain %_ptr_Uniform_v4int %_ %int_0 + %uints_ptr = OpAccessChain %_ptr_Uniform_v4uint %_ %int_1 + %ints = OpLoad %v4int %ints_ptr + %uints = OpLoad %v4uint %uints_ptr + + %ints_alt = OpVectorShuffle %v4int %ints %ints 3 2 1 0 + %uints_alt = OpVectorShuffle %v4uint %uints %uints 3 2 1 0 + + %int_to_int_popcount = OpBitCount %v4int %ints + %int_to_uint_popcount = OpBitCount %v4uint %ints + %uint_to_int_popcount = OpBitCount %v4int %uints + %uint_to_uint_popcount = OpBitCount %v4uint %uints + + ; BitReverse must have matching types w.r.t. sign, yay. + %int_to_int_reverse = OpBitReverse %v4int %ints + ;%int_to_uint_reverse = OpBitReverse %v4uint %ints + ;%uint_to_int_reverse = OpBitReverse %v4int %uints + %uint_to_uint_reverse = OpBitReverse %v4uint %uints + + ; Base and Result must match. + %int_to_int_sbit = OpBitFieldSExtract %v4int %ints %int_1 %uint_11 + ;%int_to_uint_sbit = OpBitFieldSExtract %v4uint %ints %offset %count + ;%uint_to_int_sbit = OpBitFieldSExtract %v4int %uints %offset %count + %uint_to_uint_sbit = OpBitFieldSExtract %v4uint %uints %uint_11 %int_1 + + ; Base and Result must match. + %int_to_int_ubit = OpBitFieldUExtract %v4int %ints %int_1 %uint_11 + ;%int_to_uint_ubit = OpBitFieldUExtract %v4uint %ints %offset %count + ;%uint_to_int_ubit = OpBitFieldUExtract %v4int %uints %offset %count + %uint_to_uint_ubit = OpBitFieldUExtract %v4uint %uints %uint_11 %int_1 + + %int_to_int_insert = OpBitFieldInsert %v4int %ints %ints_alt %int_1 %uint_11 + %uint_to_uint_insert = OpBitFieldInsert %v4uint %uints %uints_alt %uint_11 %int_1 + + OpStore %ints_ptr %int_to_int_popcount + OpStore %uints_ptr %int_to_uint_popcount + OpStore %ints_ptr %uint_to_int_popcount + OpStore %uints_ptr %uint_to_uint_popcount + + OpStore %ints_ptr %int_to_int_reverse + ;OpStore %uints_ptr %int_to_uint_reverse + ;OpStore %ints_ptr %uint_to_int_reverse + OpStore %uints_ptr %uint_to_uint_reverse + + OpStore %ints_ptr %int_to_int_sbit + ;OpStore %uints_ptr %int_to_uint_sbit + ;OpStore %ints_ptr %uint_to_int_sbit + OpStore %uints_ptr %uint_to_uint_sbit + + OpStore %ints_ptr %int_to_int_ubit + ;OpStore %uints_ptr %int_to_uint_ubit + ;OpStore %ints_ptr %uint_to_int_ubit + OpStore %uints_ptr %uint_to_uint_ubit + + OpStore %ints_ptr %int_to_int_insert + OpStore %uints_ptr %uint_to_uint_insert + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/bitscan.asm.comp b/shaders-opencl-no-opt/asm/comp/bitscan.asm.comp new file mode 100644 index 000000000..e3b785cd5 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/bitscan.asm.comp @@ -0,0 +1,72 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 35 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "u" + OpMemberName %SSBO 1 "i" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 16 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %ivec4 = OpTypeVector %int 4 + %uint = OpTypeInt 32 0 + %uvec4 = OpTypeVector %uint 4 + %SSBO = OpTypeStruct %uvec4 %ivec4 +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_uvec4 = OpTypePointer Uniform %uvec4 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_ivec4 = OpTypePointer Uniform %ivec4 + %main = OpFunction %void None %3 + %5 = OpLabel + %uptr = OpAccessChain %_ptr_Uniform_uvec4 %_ %int_0 + %iptr = OpAccessChain %_ptr_Uniform_ivec4 %_ %int_1 + %uvalue = OpLoad %uvec4 %uptr + %ivalue = OpLoad %ivec4 %iptr + + %lsb_uint_to_uint = OpExtInst %uvec4 %1 FindILsb %uvalue + %lsb_uint_to_int = OpExtInst %ivec4 %1 FindILsb %uvalue + %lsb_int_to_uint = OpExtInst %uvec4 %1 FindILsb %ivalue + %lsb_int_to_int = OpExtInst %ivec4 %1 FindILsb %ivalue + + %umsb_uint_to_uint = OpExtInst %uvec4 %1 FindUMsb %uvalue + %umsb_uint_to_int = OpExtInst %ivec4 %1 FindUMsb %uvalue + %umsb_int_to_uint = OpExtInst %uvec4 %1 FindUMsb %ivalue + %umsb_int_to_int = OpExtInst %ivec4 %1 FindUMsb %ivalue + + %smsb_uint_to_uint = OpExtInst %uvec4 %1 FindSMsb %uvalue + %smsb_uint_to_int = OpExtInst %ivec4 %1 FindSMsb %uvalue + %smsb_int_to_uint = OpExtInst %uvec4 %1 FindSMsb %ivalue + %smsb_int_to_int = OpExtInst %ivec4 %1 FindSMsb %ivalue + + OpStore %uptr %lsb_uint_to_uint + OpStore %iptr %lsb_uint_to_int + OpStore %uptr %lsb_int_to_uint + OpStore %iptr %lsb_int_to_int + + OpStore %uptr %umsb_uint_to_uint + OpStore %iptr %umsb_uint_to_int + OpStore %uptr %umsb_int_to_uint + OpStore %iptr %umsb_int_to_int + + OpStore %uptr %smsb_uint_to_uint + OpStore %iptr %smsb_uint_to_int + OpStore %uptr %smsb_int_to_uint + OpStore %iptr %smsb_int_to_int + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp b/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp new file mode 100644 index 000000000..37ff035fa --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct-2.asm.comp @@ -0,0 +1,85 @@ +; SPIR-V +; Version: 1.3 +; Generator: Google spiregg; 0 +; Bound: 40 +; Schema: 0 + OpCapability Shader + OpExtension "SPV_GOOGLE_hlsl_functionality1" + OpExtension "SPV_GOOGLE_user_type" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %csMainClear "main" %gl_GlobalInvocationID + OpExecutionMode %csMainClear LocalSize 64 1 1 + OpSource HLSL 600 + OpName %type_CommonConstants "type.CommonConstants" + OpMemberName %type_CommonConstants 0 "g_count" + OpMemberName %type_CommonConstants 1 "g_padding4" + OpName %CommonConstants "CommonConstants" + OpName %type_RWStructuredBuffer_MyStruct "type.RWStructuredBuffer.MyStruct" + OpName %MyStruct "MyStruct" + OpMemberName %MyStruct 0 "m_coefficients" + OpName %g_data "g_data" + OpName %csMainClear "csMainClear" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorateString %gl_GlobalInvocationID UserSemantic "SV_DispatchThreadID" + OpDecorate %CommonConstants DescriptorSet 0 + OpDecorate %CommonConstants Binding 0 + OpDecorate %g_data DescriptorSet 0 + OpDecorate %g_data Binding 1 + OpMemberDecorate %type_CommonConstants 0 Offset 0 + OpMemberDecorate %type_CommonConstants 1 Offset 4 + OpDecorate %type_CommonConstants Block + OpDecorateString %CommonConstants UserTypeGOOGLE "cbuffer" + OpDecorate %_arr_v4float_uint_4 ArrayStride 16 + OpMemberDecorate %MyStruct 0 Offset 0 + OpDecorate %_runtimearr_MyStruct ArrayStride 64 + OpMemberDecorate %type_RWStructuredBuffer_MyStruct 0 Offset 0 + OpDecorate %type_RWStructuredBuffer_MyStruct BufferBlock + OpDecorateString %g_data UserTypeGOOGLE "rwstructuredbuffer" + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %v3uint = OpTypeVector %uint 3 +%type_CommonConstants = OpTypeStruct %uint %v3uint +%_ptr_Uniform_type_CommonConstants = OpTypePointer Uniform %type_CommonConstants +%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4 + %MyStruct = OpTypeStruct %_arr_v4float_uint_4 +%_runtimearr_MyStruct = OpTypeRuntimeArray %MyStruct +%type_RWStructuredBuffer_MyStruct = OpTypeStruct %_runtimearr_MyStruct +%_ptr_Uniform_type_RWStructuredBuffer_MyStruct = OpTypePointer Uniform %type_RWStructuredBuffer_MyStruct +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %21 = OpTypeFunction %void +%_ptr_Uniform_uint = OpTypePointer Uniform %uint + %bool = OpTypeBool +%_ptr_Uniform_MyStruct = OpTypePointer Uniform %MyStruct +%CommonConstants = OpVariable %_ptr_Uniform_type_CommonConstants Uniform + %g_data = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_MyStruct Uniform +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 + %26 = OpConstantNull %v4float + %27 = OpConstantComposite %_arr_v4float_uint_4 %26 %26 %26 %26 + %28 = OpConstantComposite %MyStruct %27 +%csMainClear = OpFunction %void None %21 + %29 = OpLabel + %30 = OpLoad %v3uint %gl_GlobalInvocationID + OpSelectionMerge %31 None + OpSwitch %uint_0 %32 + %32 = OpLabel + %33 = OpCompositeExtract %uint %30 0 + %34 = OpAccessChain %_ptr_Uniform_uint %CommonConstants %int_0 + %35 = OpLoad %uint %34 + %36 = OpUGreaterThanEqual %bool %33 %35 + OpSelectionMerge %37 DontFlatten + OpBranchConditional %36 %38 %37 + %38 = OpLabel + OpBranch %31 + %37 = OpLabel + %39 = OpAccessChain %_ptr_Uniform_MyStruct %g_data %int_0 %33 + OpStore %39 %28 + OpBranch %31 + %31 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.invalid.asm.comp new file mode 100644 index 000000000..8aaa9500a --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.invalid.asm.comp @@ -0,0 +1,80 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 32 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %foo "foo" + OpName %foo2 "foo2" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpMemberName %SSBO 1 "b" + OpName %_ "" + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %_arr_float_uint_4 ArrayStride 4 + OpDecorate %struct_arr ArrayStride 32 + OpMemberDecorate %struct 0 Offset 0 + OpMemberDecorate %struct 1 Offset 16 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %uint_2 = OpConstant %uint 2 + %uint_4 = OpConstant %uint 4 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 +%_ptr_Private__arr_float_uint_4 = OpTypePointer Private %_arr_float_uint_4 + %foo = OpVariable %_ptr_Private__arr_float_uint_4 Private + %foo2 = OpVariable %_ptr_Private__arr_float_uint_4 Private + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float_1 = OpConstant %float 1 + %struct = OpTypeStruct %_arr_float_uint_4 %_arr_float_uint_4 + %struct_arr = OpTypeArray %struct %uint_2 + %ptr_struct = OpTypePointer Function %struct +%_ptr_Private_float = OpTypePointer Private %float + %int_1 = OpConstant %int 1 + %float_2 = OpConstant %float 2 + %int_2 = OpConstant %int 2 + %float_3 = OpConstant %float 3 + %int_3 = OpConstant %int 3 + %float_4 = OpConstant %float 4 + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %carr = OpConstantComposite %_arr_float_uint_4 %float_1 %float_2 %float_3 %float_4 + %struct_constant_0 = OpConstantComposite %struct %carr %carr + %struct_constant_1 = OpConstantComposite %struct %carr %carr + %struct_arr_constant = OpConstantComposite %struct_arr %struct_constant_0 %struct_constant_1 + %SSBO = OpTypeStruct %uint %int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %struct_var = OpVariable %ptr_struct Function + %16 = OpAccessChain %_ptr_Private_float %foo %int_0 + OpStore %16 %float_1 + OpStore %foo %carr + %19 = OpAccessChain %_ptr_Private_float %foo %int_1 + OpStore %19 %float_2 + %22 = OpAccessChain %_ptr_Private_float %foo %int_2 + OpStore %22 %float_3 + %25 = OpAccessChain %_ptr_Private_float %foo %int_3 + OpStore %25 %float_4 + OpCopyMemory %foo2 %foo + %l0 = OpLoad %_arr_float_uint_4 %foo + %l1 = OpLoad %_arr_float_uint_4 %foo2 + %struct0 = OpCompositeConstruct %struct %l0 %l1 + OpStore %struct_var %struct0 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/buffer-device-address-ptr-casting.asm.comp b/shaders-opencl-no-opt/asm/comp/buffer-device-address-ptr-casting.asm.comp new file mode 100644 index 000000000..ed8d0ba6f --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/buffer-device-address-ptr-casting.asm.comp @@ -0,0 +1,106 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 62 +; Schema: 0 + OpCapability Shader + OpCapability Int64 + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_KHR_physical_storage_buffer" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_ARB_gpu_shader_int64" + OpSourceExtension "GL_EXT_buffer_reference" + OpSourceExtension "GL_EXT_buffer_reference_uvec2" + OpName %main "main" + OpName %SomeBuffer "SomeBuffer" + OpMemberName %SomeBuffer 0 "v" + OpMemberName %SomeBuffer 1 "a" + OpMemberName %SomeBuffer 2 "b" + OpName %Registers "Registers" + OpMemberName %Registers 0 "address" + OpMemberName %Registers 1 "address2" + OpName %registers "registers" + OpName %a "a" + OpName %b "b" + OpMemberDecorate %SomeBuffer 0 Offset 0 + OpMemberDecorate %SomeBuffer 1 Offset 16 + OpMemberDecorate %SomeBuffer 2 Offset 24 + OpDecorate %SomeBuffer Block + OpMemberDecorate %Registers 0 Offset 0 + OpMemberDecorate %Registers 1 Offset 8 + OpDecorate %Registers Block + %void = OpTypeVoid + %3 = OpTypeFunction %void + OpTypeForwardPointer %_ptr_PhysicalStorageBuffer_SomeBuffer PhysicalStorageBuffer + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %ulong = OpTypeInt 64 0 + %uint = OpTypeInt 32 0 + %v2uint = OpTypeVector %uint 2 + %SomeBuffer = OpTypeStruct %v4float %ulong %v2uint +%_ptr_PhysicalStorageBuffer_SomeBuffer = OpTypePointer PhysicalStorageBuffer %SomeBuffer +%_ptr_Function__ptr_PhysicalStorageBuffer_SomeBuffer = OpTypePointer Function %_ptr_PhysicalStorageBuffer_SomeBuffer + %Registers = OpTypeStruct %ulong %v2uint +%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers + %registers = OpVariable %_ptr_PushConstant_Registers PushConstant + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_PushConstant_ulong = OpTypePointer PushConstant %ulong + %int_1 = OpConstant %int 1 +%_ptr_PushConstant_v2uint = OpTypePointer PushConstant %v2uint + %float_1 = OpConstant %float 1 + %float_2 = OpConstant %float 2 + %float_3 = OpConstant %float 3 + %float_4 = OpConstant %float 4 + %35 = OpConstantComposite %v4float %float_1 %float_2 %float_3 %float_4 +%_ptr_PhysicalStorageBuffer_v4float = OpTypePointer PhysicalStorageBuffer %v4float + %float_5 = OpConstant %float 5 + %float_6 = OpConstant %float 6 + %float_7 = OpConstant %float 7 + %float_8 = OpConstant %float 8 + %43 = OpConstantComposite %v4float %float_5 %float_6 %float_7 %float_8 +%_ptr_Function_ulong = OpTypePointer Function %ulong +%_ptr_Function_v2uint = OpTypePointer Function %v2uint +%_ptr_PhysicalStorageBuffer_ulong = OpTypePointer PhysicalStorageBuffer %ulong + %int_2 = OpConstant %int 2 +%_ptr_PhysicalStorageBuffer_v2uint = OpTypePointer PhysicalStorageBuffer %v2uint + %main = OpFunction %void None %3 + %5 = OpLabel + %a = OpVariable %_ptr_Function_ulong Function + %b = OpVariable %_ptr_Function_v2uint Function + %21 = OpAccessChain %_ptr_PushConstant_ulong %registers %int_0 + %27 = OpAccessChain %_ptr_PushConstant_v2uint %registers %int_1 + %uint_ptr0 = OpLoad %ulong %21 + %uint_ptr1 = OpLoad %v2uint %27 + + ; ConvertUToPtr and vice versa do not accept vectors. + %ulong_ptr0 = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr0 + %ulong_ptr1 = OpBitcast %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr0 + %uvec2_ptr0 = OpBitcast %_ptr_PhysicalStorageBuffer_SomeBuffer %uint_ptr1 + + %vec4_write0 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %ulong_ptr0 %int_0 + %vec4_write1 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %ulong_ptr1 %int_0 + %vec4_write2 = OpAccessChain %_ptr_PhysicalStorageBuffer_v4float %uvec2_ptr0 %int_0 + + OpStore %vec4_write0 %35 Aligned 16 + OpStore %vec4_write1 %35 Aligned 16 + OpStore %vec4_write2 %35 Aligned 16 + + %ulong_from_ptr0 = OpConvertPtrToU %ulong %ulong_ptr0 + %ulong_from_ptr1 = OpBitcast %ulong %ulong_ptr1 + %uvec2_from_ptr0 = OpBitcast %v2uint %uvec2_ptr0 + + %ptr0 = OpAccessChain %_ptr_PhysicalStorageBuffer_ulong %ulong_ptr0 %int_1 + %ptr1 = OpAccessChain %_ptr_PhysicalStorageBuffer_ulong %ulong_ptr1 %int_1 + %ptr2 = OpAccessChain %_ptr_PhysicalStorageBuffer_v2uint %uvec2_ptr0 %int_2 + + OpStore %ptr0 %ulong_from_ptr0 Aligned 8 + OpStore %ptr1 %ulong_from_ptr1 Aligned 8 + OpStore %ptr2 %uvec2_from_ptr0 Aligned 8 + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/composite-construct-buffer-struct.asm.invalid.comp b/shaders-opencl-no-opt/asm/comp/composite-construct-buffer-struct.asm.invalid.comp new file mode 100644 index 000000000..c7b76a8c0 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/composite-construct-buffer-struct.asm.invalid.comp @@ -0,0 +1,54 @@ + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %Block "Block" + OpName %SSBO "SSBO" + OpName %SSBO_Var "ssbo" + OpName %UBO_Var "ubo" + OpDecorate %SSBO_Var Binding 0 + OpDecorate %SSBO_Var DescriptorSet 0 + OpDecorate %UBO_Var Binding 1 + OpDecorate %UBO_Var DescriptorSet 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %Block 0 Offset 0 + OpMemberDecorate %Block 1 Offset 16 + OpDecorate %BlockArray ArrayStride 32 + OpDecorate %arr_uvec2_2 ArrayStride 8 + OpDecorate %SSBO Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uvec2 = OpTypeVector %uint 2 + %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2 + %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2 + %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2 + %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2 + %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2 + %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup + %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2 + %Block_ptr = OpTypePointer StorageBuffer %Block +%BlockArray = OpTypeArray %Block %uint_3 +%SSBO = OpTypeStruct %BlockArray +%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO +%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer +%UBO_Ptr = OpTypePointer Uniform %SSBO +%UBO_Var = OpVariable %UBO_Ptr Uniform +%void = OpTypeVoid +%func_type = OpTypeFunction %void + + %main = OpFunction %void None %func_type + %25 = OpLabel + %func = OpVariable %arr_uvec2_2_ptr_func Function + + ; Copy device array to temporary. + %ptr = OpAccessChain %Block_ptr %SSBO_Var %uint_0 %uint_0 + %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1 + %constructed = OpCompositeConstruct %Block %loaded_array %loaded_array + OpStore %ptr %constructed + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/constant-composite-undef.asm.comp b/shaders-opencl-no-opt/asm/comp/constant-composite-undef.asm.comp new file mode 100644 index 000000000..8997d0aaf --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/constant-composite-undef.asm.comp @@ -0,0 +1,40 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 20 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %Block "Block" + OpMemberName %Block 0 "f" + OpName %block "block" + OpMemberDecorate %Block 0 Offset 0 + OpDecorate %Block BufferBlock + OpDecorate %block DescriptorSet 0 + OpDecorate %block Binding 0 + %void = OpTypeVoid + %6 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %Block = OpTypeStruct %v4float +%_ptr_Uniform_Block = OpTypePointer Uniform %Block + %block = OpVariable %_ptr_Uniform_Block Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%float_0_100000001 = OpConstant %float 0.100000001 +%float_0_200000003 = OpConstant %float 0.200000003 +%float_0_300000012 = OpConstant %float 0.300000012 + %15 = OpUndef %float + %16 = OpConstantComposite %v4float %float_0_100000001 %float_0_200000003 %float_0_300000012 %15 +%_ptr_Uniform_v4float = OpTypePointer Uniform %v4float + %main = OpFunction %void None %6 + %18 = OpLabel + %19 = OpAccessChain %_ptr_Uniform_v4float %block %int_0 + OpStore %19 %16 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp b/shaders-opencl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp new file mode 100644 index 000000000..e1dcb0ef8 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/constant-lut-name-aliasing.asm.comp @@ -0,0 +1,81 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 49 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID %gl_LocalInvocationID + OpExecutionMode %main LocalSize 4 4 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpName %gl_LocalInvocationID "gl_LocalInvocationID" + OpName %indexable "indexable" + OpName %indexable_0 "indexable" + OpName %25 "indexable" + OpName %38 "indexable" + OpDecorate %_runtimearr_int ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_runtimearr_int = OpTypeRuntimeArray %int + %SSBO = OpTypeStruct %_runtimearr_int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %uint_4 = OpConstant %uint 4 +%_arr_int_uint_4 = OpTypeArray %int %uint_4 + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 + %int_3 = OpConstant %int 3 + %25 = OpConstantComposite %_arr_int_uint_4 %int_0 %int_1 %int_2 %int_3 +%gl_LocalInvocationID = OpVariable %_ptr_Input_v3uint Input +%_ptr_Function__arr_int_uint_4 = OpTypePointer Function %_arr_int_uint_4 +%_ptr_Function_int = OpTypePointer Function %int + %int_4 = OpConstant %int 4 + %int_5 = OpConstant %int 5 + %int_6 = OpConstant %int 6 + %int_7 = OpConstant %int 7 + %38 = OpConstantComposite %_arr_int_uint_4 %int_4 %int_5 %int_6 %int_7 + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_int = OpTypePointer Uniform %int +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_4 %uint_4 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %indexable = OpVariable %_ptr_Function__arr_int_uint_4 Function +%indexable_0 = OpVariable %_ptr_Function__arr_int_uint_4 Function + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %19 = OpLoad %uint %18 + %27 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0 + %28 = OpLoad %uint %27 + OpStore %indexable %25 + %32 = OpAccessChain %_ptr_Function_int %indexable %28 + %33 = OpLoad %int %32 + %40 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_1 + %41 = OpLoad %uint %40 + OpStore %indexable_0 %38 + %43 = OpAccessChain %_ptr_Function_int %indexable_0 %41 + %44 = OpLoad %int %43 + %45 = OpIAdd %int %33 %44 + %47 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19 + OpStore %47 %45 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp b/shaders-opencl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp new file mode 100644 index 000000000..6a7065a6f --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/copy-logical-2.spv14.asm.comp @@ -0,0 +1,81 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 48 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %ssbo + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %B1 "B1" + OpName %A "A" + OpName %C "C" + OpName %B2 "B2" + OpMemberName %A 0 "a" + OpMemberName %A 1 "b1" + OpMemberName %A 2 "b1_array" + OpMemberName %C 0 "c" + OpMemberName %C 1 "b2" + OpMemberName %C 2 "b2_array" + OpMemberName %B1 0 "elem1" + OpMemberName %B2 0 "elem2" + OpMemberName %SSBO 0 "a_block" + OpMemberName %SSBO 1 "c_block" + OpDecorate %B1Array ArrayStride 16 + OpDecorate %B2Array ArrayStride 16 + OpMemberDecorate %B1 0 Offset 0 + OpMemberDecorate %A 0 Offset 0 + OpMemberDecorate %A 1 Offset 16 + OpMemberDecorate %A 2 Offset 32 + OpMemberDecorate %A 3 Offset 96 + OpMemberDecorate %B2 0 Offset 0 + OpMemberDecorate %C 0 Offset 0 + OpMemberDecorate %C 1 Offset 16 + OpMemberDecorate %C 2 Offset 32 + OpMemberDecorate %C 3 Offset 96 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 112 + OpMemberDecorate %A0 0 Offset 0 + OpMemberDecorate %C0 0 Offset 0 + OpMemberDecorate %A0 0 RowMajor + OpMemberDecorate %A0 0 MatrixStride 8 + OpMemberDecorate %C0 0 ColMajor + OpMemberDecorate %C0 0 MatrixStride 16 + OpDecorate %SSBO Block + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 + %v4float = OpTypeVector %float 4 + %v2float = OpTypeVector %float 2 + %m2float = OpTypeMatrix %v2float 2 + %A0 = OpTypeStruct %m2float + %C0 = OpTypeStruct %m2float + %B2 = OpTypeStruct %v4float + %B2Array = OpTypeArray %B2 %uint_4 + %C = OpTypeStruct %v4float %B2 %B2Array %C0 + %B1 = OpTypeStruct %v4float + %B1Array = OpTypeArray %B1 %uint_4 + %A = OpTypeStruct %v4float %B1 %B1Array %A0 + %SSBO = OpTypeStruct %A %C +%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO + %ssbo = OpVariable %_ptr_Uniform_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_C = OpTypePointer StorageBuffer %C + %int_0 = OpConstant %int 0 +%_ptr_Uniform_A = OpTypePointer StorageBuffer %A + %main = OpFunction %void None %3 + %5 = OpLabel + %22 = OpAccessChain %_ptr_Uniform_C %ssbo %int_1 + %39 = OpAccessChain %_ptr_Uniform_A %ssbo %int_0 + %23 = OpLoad %C %22 + %24 = OpCopyLogical %A %23 + OpStore %39 %24 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp b/shaders-opencl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp new file mode 100644 index 000000000..026bd1131 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/copy-logical-offset-and-array-stride-diffs.spv14.asm.comp @@ -0,0 +1,60 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 24 +; Schema: 0 + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %2 "main" %3 %4 + OpExecutionMode %2 LocalSize 1 1 1 + OpDecorate %5 Block + OpMemberDecorate %5 0 Offset 0 + OpMemberDecorate %5 1 Offset 16 + OpMemberDecorate %5 2 Offset 48 + OpMemberDecorate %5 3 Offset 64 + OpMemberDecorate %5 4 Offset 80 + OpMemberDecorate %5 5 Offset 96 + OpMemberDecorate %5 6 Offset 112 + OpDecorate %6 Block + OpMemberDecorate %6 0 Offset 0 + OpMemberDecorate %6 1 Offset 4 + OpMemberDecorate %6 2 Offset 12 + OpMemberDecorate %6 3 Offset 16 + OpMemberDecorate %6 4 Offset 32 + OpMemberDecorate %6 5 Offset 48 + OpMemberDecorate %6 6 Offset 64 + OpDecorate %3 DescriptorSet 0 + OpDecorate %3 Binding 0 + OpDecorate %4 DescriptorSet 0 + OpDecorate %4 Binding 1 + OpDecorate %7 ArrayStride 4 + OpDecorate %8 ArrayStride 16 + OpMemberDecorate %9 0 Offset 4 + OpMemberDecorate %10 0 Offset 8 + %11 = OpTypeVoid + %12 = OpTypeFloat 32 + %13 = OpTypeVector %12 2 + %14 = OpTypeVector %12 3 + %15 = OpTypeVector %12 4 + %16 = OpTypeMatrix %15 4 + %17 = OpTypeInt 32 0 + %18 = OpConstant %17 2 + %7 = OpTypeArray %17 %18 + %8 = OpTypeArray %17 %18 + %9 = OpTypeStruct %17 + %10 = OpTypeStruct %17 + %5 = OpTypeStruct %17 %8 %17 %9 %15 %14 %13 + %19 = OpTypePointer StorageBuffer %5 + %6 = OpTypeStruct %17 %7 %17 %10 %15 %14 %13 + %20 = OpTypePointer StorageBuffer %6 + %3 = OpVariable %20 StorageBuffer + %4 = OpVariable %19 StorageBuffer + %21 = OpTypeFunction %11 + %2 = OpFunction %11 None %21 + %1 = OpLabel + %22 = OpLoad %6 %3 + %23 = OpCopyLogical %5 %22 + OpStore %4 %23 + OpReturn + OpFunctionEnd + diff --git a/shaders-opencl-no-opt/asm/comp/copy-logical.spv14.asm.comp b/shaders-opencl-no-opt/asm/comp/copy-logical.spv14.asm.comp new file mode 100644 index 000000000..20fa0b099 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/copy-logical.spv14.asm.comp @@ -0,0 +1,69 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 48 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %ssbo + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %B1 "B1" + OpName %A "A" + OpName %C "C" + OpName %B2 "B2" + OpMemberName %A 0 "a" + OpMemberName %A 1 "b1" + OpMemberName %A 2 "b1_array" + OpMemberName %C 0 "c" + OpMemberName %C 1 "b2" + OpMemberName %C 2 "b2_array" + OpMemberName %B1 0 "elem1" + OpMemberName %B2 0 "elem2" + OpMemberName %SSBO 0 "a_block" + OpMemberName %SSBO 1 "c_block" + OpDecorate %B1Array ArrayStride 16 + OpDecorate %B2Array ArrayStride 16 + OpMemberDecorate %B1 0 Offset 0 + OpMemberDecorate %A 0 Offset 0 + OpMemberDecorate %A 1 Offset 16 + OpMemberDecorate %A 2 Offset 32 + OpMemberDecorate %B2 0 Offset 0 + OpMemberDecorate %C 0 Offset 0 + OpMemberDecorate %C 1 Offset 16 + OpMemberDecorate %C 2 Offset 32 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 96 + OpDecorate %SSBO Block + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %uint_4 = OpConstant %uint 4 + %v4float = OpTypeVector %float 4 + %B2 = OpTypeStruct %v4float + %B2Array = OpTypeArray %B2 %uint_4 + %C = OpTypeStruct %v4float %B2 %B2Array + %B1 = OpTypeStruct %v4float + %B1Array = OpTypeArray %B1 %uint_4 + %A = OpTypeStruct %v4float %B1 %B1Array + %SSBO = OpTypeStruct %A %C +%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO + %ssbo = OpVariable %_ptr_Uniform_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_C = OpTypePointer StorageBuffer %C + %int_0 = OpConstant %int 0 +%_ptr_Uniform_A = OpTypePointer StorageBuffer %A + %main = OpFunction %void None %3 + %5 = OpLabel + %22 = OpAccessChain %_ptr_Uniform_C %ssbo %int_1 + %39 = OpAccessChain %_ptr_Uniform_A %ssbo %int_0 + %23 = OpLoad %C %22 + %24 = OpCopyLogical %A %23 + OpStore %39 %24 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.asm.invalid.comp b/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.asm.invalid.comp new file mode 100644 index 000000000..d59aad3ce --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.asm.invalid.comp @@ -0,0 +1,53 @@ + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %Block "Block" + OpName %SSBO "SSBO" + OpName %SSBO_Var "ssbo" + OpName %UBO_Var "ubo" + OpDecorate %SSBO_Var Binding 0 + OpDecorate %SSBO_Var DescriptorSet 0 + OpDecorate %UBO_Var Binding 1 + OpDecorate %UBO_Var DescriptorSet 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %Block 0 Offset 0 + OpMemberDecorate %Block 1 Offset 16 + OpDecorate %BlockArray ArrayStride 32 + OpDecorate %arr_uvec2_2 ArrayStride 8 + OpDecorate %SSBO Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uvec2 = OpTypeVector %uint 2 + %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2 + %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2 + %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2 + %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2 + %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2 + %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup + %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2 +%BlockArray = OpTypeArray %Block %uint_3 +%SSBO = OpTypeStruct %BlockArray +%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO +%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer +%UBO_Ptr = OpTypePointer Uniform %SSBO +%UBO_Var = OpVariable %UBO_Ptr Uniform +%void = OpTypeVoid +%func_type = OpTypeFunction %void + + %main = OpFunction %void None %func_type + %25 = OpLabel + %func = OpVariable %arr_uvec2_2_ptr_func Function + + ; Copy device array to temporary. + %ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0 + %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %ptr_arr_0 %loaded_array + OpStore %ptr_arr_0 %loaded_array + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.invalid.comp b/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.invalid.comp new file mode 100644 index 000000000..d59aad3ce --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.invalid.comp @@ -0,0 +1,53 @@ + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %Block "Block" + OpName %SSBO "SSBO" + OpName %SSBO_Var "ssbo" + OpName %UBO_Var "ubo" + OpDecorate %SSBO_Var Binding 0 + OpDecorate %SSBO_Var DescriptorSet 0 + OpDecorate %UBO_Var Binding 1 + OpDecorate %UBO_Var DescriptorSet 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %Block 0 Offset 0 + OpMemberDecorate %Block 1 Offset 16 + OpDecorate %BlockArray ArrayStride 32 + OpDecorate %arr_uvec2_2 ArrayStride 8 + OpDecorate %SSBO Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uvec2 = OpTypeVector %uint 2 + %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2 + %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2 + %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2 + %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2 + %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2 + %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup + %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2 +%BlockArray = OpTypeArray %Block %uint_3 +%SSBO = OpTypeStruct %BlockArray +%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO +%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer +%UBO_Ptr = OpTypePointer Uniform %SSBO +%UBO_Var = OpVariable %UBO_Ptr Uniform +%void = OpTypeVoid +%func_type = OpTypeFunction %void + + %main = OpFunction %void None %func_type + %25 = OpLabel + %func = OpVariable %arr_uvec2_2_ptr_func Function + + ; Copy device array to temporary. + %ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0 + %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %ptr_arr_0 %loaded_array + OpStore %ptr_arr_0 %loaded_array + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.asm.invalid.comp b/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.asm.invalid.comp new file mode 100644 index 000000000..d9d0d51c3 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.asm.invalid.comp @@ -0,0 +1,81 @@ + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %Block "Block" + OpName %SSBO "SSBO" + OpName %SSBO_Var "ssbo" + OpName %UBO_Var "ubo" + OpDecorate %SSBO_Var Binding 0 + OpDecorate %SSBO_Var DescriptorSet 0 + OpDecorate %UBO_Var Binding 1 + OpDecorate %UBO_Var DescriptorSet 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %Block 0 Offset 0 + OpMemberDecorate %Block 1 Offset 16 + OpDecorate %BlockArray ArrayStride 32 + OpDecorate %arr_uvec2_2 ArrayStride 8 + OpDecorate %SSBO Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uvec2 = OpTypeVector %uint 2 + %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2 + %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2 + %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2 + %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2 + %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2 + %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup + %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2 +%BlockArray = OpTypeArray %Block %uint_3 +%SSBO = OpTypeStruct %BlockArray +%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO +%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer +%UBO_Ptr = OpTypePointer Uniform %SSBO +%UBO_Var = OpVariable %UBO_Ptr Uniform +%void = OpTypeVoid +%func_type = OpTypeFunction %void + + %main = OpFunction %void None %func_type + %25 = OpLabel + %func = OpVariable %arr_uvec2_2_ptr_func Function + + ; DeviceToDevice + %ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0 + %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %ptr_arr_0 %loaded_array + + ; ConstantToDevice + %ptr_arr_1_const = OpAccessChain %arr_uvec2_2_ptr_const %UBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array_const = OpLoad %arr_uvec2_2 %ptr_arr_1_const + OpStore %ptr_arr_0 %loaded_array_const + + ; StackToDevice + %loaded_array_func = OpLoad %arr_uvec2_2 %func + OpStore %ptr_arr_0 %loaded_array_func + + ; ThreadGroupToDevice + %loaded_array_workgroup = OpLoad %arr_uvec2_2 %wg + OpStore %ptr_arr_0 %loaded_array_workgroup + + ; DeviceToThreadGroup + %loaded_array_2 = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %wg %loaded_array_2 + + ; DeviceToStack + %loaded_array_3 = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %func %loaded_array_3 + + ; ConstantToThreadGroup + %loaded_array_const_2 = OpLoad %arr_uvec2_2 %ptr_arr_1_const + OpStore %wg %loaded_array_const_2 + + ; ConstantToStack + %loaded_array_const_3 = OpLoad %arr_uvec2_2 %ptr_arr_1_const + OpStore %func %loaded_array_const_3 + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.invalid.comp b/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.invalid.comp new file mode 100644 index 000000000..d9d0d51c3 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.invalid.comp @@ -0,0 +1,81 @@ + OpCapability Shader + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpName %Block "Block" + OpName %SSBO "SSBO" + OpName %SSBO_Var "ssbo" + OpName %UBO_Var "ubo" + OpDecorate %SSBO_Var Binding 0 + OpDecorate %SSBO_Var DescriptorSet 0 + OpDecorate %UBO_Var Binding 1 + OpDecorate %UBO_Var DescriptorSet 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %Block 0 Offset 0 + OpMemberDecorate %Block 1 Offset 16 + OpDecorate %BlockArray ArrayStride 32 + OpDecorate %arr_uvec2_2 ArrayStride 8 + OpDecorate %SSBO Block + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uvec2 = OpTypeVector %uint 2 + %arr_uvec2_2 = OpTypeArray %uvec2 %uint_2 + %arr_uvec2_2_ptr = OpTypePointer StorageBuffer %arr_uvec2_2 + %arr_uvec2_2_ptr_const = OpTypePointer Uniform %arr_uvec2_2 + %arr_uvec2_2_ptr_func = OpTypePointer Function %arr_uvec2_2 + %arr_uvec2_2_ptr_workgroup = OpTypePointer Workgroup %arr_uvec2_2 + %wg = OpVariable %arr_uvec2_2_ptr_workgroup Workgroup + %Block = OpTypeStruct %arr_uvec2_2 %arr_uvec2_2 +%BlockArray = OpTypeArray %Block %uint_3 +%SSBO = OpTypeStruct %BlockArray +%SSBO_Ptr = OpTypePointer StorageBuffer %SSBO +%SSBO_Var = OpVariable %SSBO_Ptr StorageBuffer +%UBO_Ptr = OpTypePointer Uniform %SSBO +%UBO_Var = OpVariable %UBO_Ptr Uniform +%void = OpTypeVoid +%func_type = OpTypeFunction %void + + %main = OpFunction %void None %func_type + %25 = OpLabel + %func = OpVariable %arr_uvec2_2_ptr_func Function + + ; DeviceToDevice + %ptr_arr_0 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_0 + %ptr_arr_1 = OpAccessChain %arr_uvec2_2_ptr %SSBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %ptr_arr_0 %loaded_array + + ; ConstantToDevice + %ptr_arr_1_const = OpAccessChain %arr_uvec2_2_ptr_const %UBO_Var %uint_0 %uint_0 %uint_1 + %loaded_array_const = OpLoad %arr_uvec2_2 %ptr_arr_1_const + OpStore %ptr_arr_0 %loaded_array_const + + ; StackToDevice + %loaded_array_func = OpLoad %arr_uvec2_2 %func + OpStore %ptr_arr_0 %loaded_array_func + + ; ThreadGroupToDevice + %loaded_array_workgroup = OpLoad %arr_uvec2_2 %wg + OpStore %ptr_arr_0 %loaded_array_workgroup + + ; DeviceToThreadGroup + %loaded_array_2 = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %wg %loaded_array_2 + + ; DeviceToStack + %loaded_array_3 = OpLoad %arr_uvec2_2 %ptr_arr_1 + OpStore %func %loaded_array_3 + + ; ConstantToThreadGroup + %loaded_array_const_2 = OpLoad %arr_uvec2_2 %ptr_arr_1_const + OpStore %wg %loaded_array_const_2 + + ; ConstantToStack + %loaded_array_const_3 = OpLoad %arr_uvec2_2 %ptr_arr_1_const + OpStore %func %loaded_array_const_3 + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp b/shaders-opencl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp new file mode 100644 index 000000000..73f3ceee1 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/eliminate-globals-not-in-entry-point.noeliminate.spv14.asm.comp @@ -0,0 +1,59 @@ +; SPIR-V +; Version: 1.5 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 26 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + ;OpEntryPoint GLCompute %main "main" %Samp %ubo %ssbo %v %w + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 64 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %Samp "Samp" + OpName %UBO "UBO" + OpMemberName %UBO 0 "v" + OpName %ubo "ubo" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "v" + OpName %ssbo "ssbo" + OpName %v "v" + OpName %w "w" + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + OpDecorate %Samp DescriptorSet 0 + OpDecorate %Samp Binding 0 + OpMemberDecorate %UBO 0 Offset 0 + OpDecorate %UBO Block + OpDecorate %ubo DescriptorSet 0 + OpDecorate %ubo Binding 1 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %ssbo DescriptorSet 0 + OpDecorate %ssbo Binding 2 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 + %uint_64 = OpConstant %uint 64 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1 + %float = OpTypeFloat 32 + %12 = OpTypeImage %float 2D 0 0 0 1 Unknown + %13 = OpTypeSampledImage %12 +%_ptr_UniformConstant_13 = OpTypePointer UniformConstant %13 + %Samp = OpVariable %_ptr_UniformConstant_13 UniformConstant + %UBO = OpTypeStruct %float +%_ptr_Uniform_UBO = OpTypePointer Uniform %UBO + %ubo = OpVariable %_ptr_Uniform_UBO Uniform + %SSBO = OpTypeStruct %float +%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO + %ssbo = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer +%_ptr_Private_float = OpTypePointer Private %float + %v = OpVariable %_ptr_Private_float Private +%_ptr_Workgroup_float = OpTypePointer Workgroup %float + %w = OpVariable %_ptr_Workgroup_float Workgroup + %main = OpFunction %void None %3 + %5 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/glsl-signed-operations.asm.comp b/shaders-opencl-no-opt/asm/comp/glsl-signed-operations.asm.comp new file mode 100644 index 000000000..7da9f95b9 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/glsl-signed-operations.asm.comp @@ -0,0 +1,123 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 26 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "ints" + OpMemberName %SSBO 1 "uints" + OpName %_ "" + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 16 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %v4int = OpTypeVector %int 4 + %uint = OpTypeInt 32 0 + %v4uint = OpTypeVector %uint 4 + %SSBO = OpTypeStruct %v4int %v4uint +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 +%_ptr_Uniform_v4int = OpTypePointer Uniform %v4int + %int_1 = OpConstant %int 1 +%_ptr_Uniform_v4uint = OpTypePointer Uniform %v4uint + %main = OpFunction %void None %3 + %5 = OpLabel + %ints_ptr = OpAccessChain %_ptr_Uniform_v4int %_ %int_0 + %uints_ptr = OpAccessChain %_ptr_Uniform_v4uint %_ %int_1 + %ints = OpLoad %v4int %ints_ptr + %uints = OpLoad %v4uint %uints_ptr + + %int_to_int_sabs = OpExtInst %v4int %1 SAbs %ints + %int_to_uint_sabs = OpExtInst %v4uint %1 SAbs %ints + %uint_to_int_sabs = OpExtInst %v4int %1 SAbs %uints + %uint_to_uint_sabs = OpExtInst %v4uint %1 SAbs %uints + + %int_to_int_ssign = OpExtInst %v4int %1 SSign %ints + %int_to_uint_ssign = OpExtInst %v4uint %1 SSign %ints + %uint_to_int_ssign = OpExtInst %v4int %1 SSign %uints + %uint_to_uint_ssign = OpExtInst %v4uint %1 SSign %uints + + %int_to_int_smsb = OpExtInst %v4int %1 FindSMsb %uints + %int_to_uint_smsb = OpExtInst %v4uint %1 FindSMsb %uints + %uint_to_int_umsb = OpExtInst %v4int %1 FindUMsb %ints + %uint_to_uint_umsb = OpExtInst %v4uint %1 FindUMsb %ints + + %int_to_int_smin = OpExtInst %v4int %1 SMin %ints %ints + %int_to_uint_smin = OpExtInst %v4uint %1 SMin %ints %uints + %uint_to_int_smin = OpExtInst %v4int %1 SMin %uints %uints + %uint_to_uint_smin = OpExtInst %v4uint %1 SMin %uints %ints + + %int_to_int_umin = OpExtInst %v4int %1 UMin %ints %uints + %int_to_uint_umin = OpExtInst %v4uint %1 UMin %ints %uints + %uint_to_int_umin = OpExtInst %v4int %1 UMin %uints %ints + %uint_to_uint_umin = OpExtInst %v4uint %1 UMin %uints %ints + + %int_to_int_smax = OpExtInst %v4int %1 SMax %ints %ints + %int_to_uint_smax = OpExtInst %v4uint %1 SMax %ints %ints + %uint_to_int_smax = OpExtInst %v4int %1 SMax %uints %ints + %uint_to_uint_smax = OpExtInst %v4uint %1 SMax %uints %ints + + %int_to_int_umax = OpExtInst %v4int %1 UMax %ints %uints + %int_to_uint_umax = OpExtInst %v4uint %1 UMax %ints %ints + %uint_to_int_umax = OpExtInst %v4int %1 UMax %uints %ints + %uint_to_uint_umax = OpExtInst %v4uint %1 UMax %uints %ints + + %int_to_int_sclamp = OpExtInst %v4int %1 SClamp %uints %uints %uints + %int_to_uint_sclamp = OpExtInst %v4uint %1 SClamp %uints %uints %uints + %uint_to_int_uclamp = OpExtInst %v4int %1 UClamp %ints %ints %ints + %uint_to_uint_uclamp = OpExtInst %v4uint %1 UClamp %ints %ints %ints + + OpStore %ints_ptr %int_to_int_sabs + OpStore %uints_ptr %int_to_uint_sabs + OpStore %ints_ptr %uint_to_int_sabs + OpStore %uints_ptr %uint_to_uint_sabs + + OpStore %ints_ptr %int_to_int_ssign + OpStore %uints_ptr %int_to_uint_ssign + OpStore %ints_ptr %uint_to_int_ssign + OpStore %uints_ptr %uint_to_uint_ssign + + OpStore %ints_ptr %int_to_int_smsb + OpStore %uints_ptr %int_to_uint_smsb + OpStore %ints_ptr %uint_to_int_umsb + OpStore %uints_ptr %uint_to_uint_umsb + + OpStore %ints_ptr %int_to_int_smin + OpStore %uints_ptr %int_to_uint_smin + OpStore %ints_ptr %uint_to_int_smin + OpStore %uints_ptr %uint_to_uint_smin + + OpStore %ints_ptr %int_to_int_umin + OpStore %uints_ptr %int_to_uint_umin + OpStore %ints_ptr %uint_to_int_umin + OpStore %uints_ptr %uint_to_uint_umin + + OpStore %ints_ptr %int_to_int_smax + OpStore %uints_ptr %int_to_uint_smax + OpStore %ints_ptr %uint_to_int_smax + OpStore %uints_ptr %uint_to_uint_smax + + OpStore %ints_ptr %int_to_int_umax + OpStore %uints_ptr %int_to_uint_umax + OpStore %ints_ptr %uint_to_int_umax + OpStore %uints_ptr %uint_to_uint_umax + + OpStore %ints_ptr %int_to_int_sclamp + OpStore %uints_ptr %int_to_uint_sclamp + OpStore %ints_ptr %uint_to_int_uclamp + OpStore %uints_ptr %uint_to_uint_uclamp + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp b/shaders-opencl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp new file mode 100644 index 000000000..30db11d45 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/glsl.std450.frexp-modf-struct.asm.comp @@ -0,0 +1,55 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %ResTypeMod = OpTypeStruct %float %float +%_ptr_Function_ResTypeMod = OpTypePointer Function %ResTypeMod + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float_20 = OpConstant %float 20 + %int_1 = OpConstant %int 1 +%_ptr_Function_float = OpTypePointer Function %float +%ResTypeFrexp = OpTypeStruct %float %int +%_ptr_Function_ResTypeFrexp = OpTypePointer Function %ResTypeFrexp + %float_40 = OpConstant %float 40 +%_ptr_Function_int = OpTypePointer Function %int + %SSBO = OpTypeStruct %float %int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_Uniform_int = OpTypePointer Uniform %int + %main = OpFunction %void None %3 + %5 = OpLabel + %modres = OpExtInst %ResTypeMod %1 ModfStruct %float_20 + %frexpres = OpExtInst %ResTypeFrexp %1 FrexpStruct %float_40 + + %modres_f = OpCompositeExtract %float %modres 0 + %modres_i = OpCompositeExtract %float %modres 1 + %frexpres_f = OpCompositeExtract %float %frexpres 0 + %frexpres_i = OpCompositeExtract %int %frexpres 1 + + %float_ptr = OpAccessChain %_ptr_Uniform_float %_ %int_0 + %int_ptr = OpAccessChain %_ptr_Uniform_int %_ %int_1 + + OpStore %float_ptr %modres_f + OpStore %float_ptr %modres_i + OpStore %float_ptr %frexpres_f + OpStore %int_ptr %frexpres_i + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp new file mode 100644 index 000000000..b9876122a --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp @@ -0,0 +1,137 @@ +; SPIR-V +; Version: 1.3 +; Generator: Google spiregg; 0 +; Bound: 91 +; Schema: 0 + OpCapability Shader + OpCapability StorageImageReadWithoutFormat + OpExtension "SPV_GOOGLE_hlsl_functionality1" + OpExtension "SPV_GOOGLE_user_type" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %csMain "main" %gl_GlobalInvocationID + OpExecutionMode %csMain LocalSize 8 8 1 + OpSource HLSL 500 + OpName %Data "Data" + OpMemberName %Data 0 "sourceData" + OpName %g_data "g_data" + OpName %type_2d_image "type.2d.image" + OpName %g_inputTexture "g_inputTexture" + OpName %type_2d_image_0 "type.2d.image" + OpName %g_output "g_output" + OpName %csMain "csMain" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorateString %gl_GlobalInvocationID UserSemantic "SV_DispatchThreadID" + OpDecorate %g_inputTexture DescriptorSet 0 + OpDecorate %g_inputTexture Binding 0 + OpDecorate %g_output DescriptorSet 0 + OpDecorate %g_output Binding 0 + OpDecorateString %g_inputTexture UserTypeGOOGLE "texture2d:" + OpDecorateString %g_output UserTypeGOOGLE "rwtexture2d:" + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_4 = OpConstant %uint 4 + %uint_1 = OpConstant %uint 1 + %int_16 = OpConstant %int 16 + %float = OpTypeFloat 32 + %float_n1 = OpConstant %float -1 + %v3float = OpTypeVector %float 3 + %20 = OpConstantComposite %v3float %float_n1 %float_n1 %float_n1 + %float_0 = OpConstant %float 0 + %float_1 = OpConstant %float 1 + %int_1 = OpConstant %int 1 + %uint_64 = OpConstant %uint 64 + %uint_16 = OpConstant %uint 16 +%_arr_v3float_uint_16 = OpTypeArray %v3float %uint_16 + %Data = OpTypeStruct %_arr_v3float_uint_16 +%_arr_Data_uint_64 = OpTypeArray %Data %uint_64 +%_ptr_Workgroup__arr_Data_uint_64 = OpTypePointer Workgroup %_arr_Data_uint_64 +%type_2d_image = OpTypeImage %float 2D 2 0 0 1 Unknown +%_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +%type_2d_image_0 = OpTypeImage %uint 2D 2 0 0 2 R32ui +%_ptr_UniformConstant_type_2d_image_0 = OpTypePointer UniformConstant %type_2d_image_0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %34 = OpTypeFunction %void + %v2uint = OpTypeVector %uint 2 +%_ptr_Function__arr_v3float_uint_16 = OpTypePointer Function %_arr_v3float_uint_16 +%_ptr_Workgroup__arr_v3float_uint_16 = OpTypePointer Workgroup %_arr_v3float_uint_16 +%_ptr_Function_v3float = OpTypePointer Function %v3float + %bool = OpTypeBool + %v3int = OpTypeVector %int 3 + %v2int = OpTypeVector %int 2 + %v4float = OpTypeVector %float 4 +%_ptr_Workgroup_v3float = OpTypePointer Workgroup %v3float + %g_data = OpVariable %_ptr_Workgroup__arr_Data_uint_64 Workgroup +%g_inputTexture = OpVariable %_ptr_UniformConstant_type_2d_image UniformConstant + %g_output = OpVariable %_ptr_UniformConstant_type_2d_image_0 UniformConstant +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %csMain = OpFunction %void None %34 + %44 = OpLabel + %45 = OpVariable %_ptr_Function__arr_v3float_uint_16 Function + %46 = OpLoad %v3uint %gl_GlobalInvocationID + %47 = OpCompositeExtract %uint %46 0 + OpBranch %48 + %48 = OpLabel + %49 = OpPhi %uint %uint_0 %44 %50 %51 + %52 = OpULessThan %bool %49 %uint_4 + OpLoopMerge %53 %51 None + OpBranchConditional %52 %54 %53 + %54 = OpLabel + OpBranch %55 + %55 = OpLabel + %56 = OpPhi %uint %uint_0 %54 %57 %58 + %59 = OpULessThan %bool %56 %uint_4 + OpLoopMerge %60 %58 None + OpBranchConditional %59 %58 %60 + %58 = OpLabel + %61 = OpBitcast %v3int %46 + %62 = OpBitcast %int %56 + %63 = OpBitcast %int %49 + %64 = OpCompositeConstruct %v3int %62 %63 %int_0 + %65 = OpIAdd %v3int %61 %64 + %66 = OpVectorShuffle %v2int %65 %65 0 1 + %67 = OpCompositeExtract %int %65 2 + %68 = OpLoad %type_2d_image %g_inputTexture + %69 = OpImageFetch %v4float %68 %66 Lod %67 + %70 = OpVectorShuffle %v3float %69 %69 0 1 2 + %71 = OpIMul %uint %49 %uint_4 + %72 = OpIAdd %uint %71 %56 + %73 = OpAccessChain %_ptr_Workgroup_v3float %g_data %47 %int_0 %72 + OpStore %73 %70 + %57 = OpIAdd %uint %56 %uint_1 + OpBranch %55 + %60 = OpLabel + OpBranch %51 + %51 = OpLabel + %50 = OpIAdd %uint %49 %uint_1 + OpBranch %48 + %53 = OpLabel + %74 = OpAccessChain %_ptr_Workgroup__arr_v3float_uint_16 %g_data %47 %int_0 + %75 = OpLoad %_arr_v3float_uint_16 %74 + OpStore %45 %75 + OpBranch %76 + %76 = OpLabel + %77 = OpPhi %uint %uint_0 %53 %78 %79 + %80 = OpPhi %int %int_0 %53 %81 %79 + %82 = OpSLessThan %bool %80 %int_16 + OpLoopMerge %83 %79 None + OpBranchConditional %82 %79 %83 + %79 = OpLabel + %84 = OpAccessChain %_ptr_Function_v3float %45 %80 + %85 = OpLoad %v3float %84 + %86 = OpDot %float %85 %20 + %87 = OpExtInst %float %1 FClamp %86 %float_0 %float_1 + %88 = OpConvertFToU %uint %87 + %78 = OpBitwiseOr %uint %77 %88 + %81 = OpIAdd %int %80 %int_1 + OpBranch %76 + %83 = OpLabel + %89 = OpVectorShuffle %v2uint %46 %46 0 1 + %90 = OpLoad %type_2d_image_0 %g_output + OpImageWrite %90 %89 %77 None + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/image-atomic-mismatch-sign.asm.invalid.comp b/shaders-opencl-no-opt/asm/comp/image-atomic-mismatch-sign.asm.invalid.comp new file mode 100644 index 000000000..3817a6152 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/image-atomic-mismatch-sign.asm.invalid.comp @@ -0,0 +1,71 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 11 +; Bound: 45 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource ESSL 310 + OpSourceExtension "GL_OES_shader_image_atomic" + OpName %main "main" + OpName %uImage "uImage" + OpName %uImageArray "uImageArray" + OpName %iImage "iImage" + OpName %iImageArray "iImageArray" + OpDecorate %uImage DescriptorSet 0 + OpDecorate %uImage Binding 0 + OpDecorate %uImageArray DescriptorSet 0 + OpDecorate %uImageArray Binding 2 + OpDecorate %iImage DescriptorSet 0 + OpDecorate %iImage Binding 1 + OpDecorate %iImageArray DescriptorSet 0 + OpDecorate %iImageArray Binding 3 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %7 = OpTypeImage %uint 2D 0 0 0 2 R32ui +%_ptr_UniformConstant_7 = OpTypePointer UniformConstant %7 + %uImage = OpVariable %_ptr_UniformConstant_7 UniformConstant + %int = OpTypeInt 32 1 + %v2int = OpTypeVector %int 2 + %int_1 = OpConstant %int 1 + %int_5 = OpConstant %int 5 + %14 = OpConstantComposite %v2int %int_1 %int_5 + %uint_1 = OpConstant %uint 1 + %uint_0 = OpConstant %uint 0 +%_ptr_Image_uint = OpTypePointer Image %uint + %20 = OpTypeImage %uint 2D 0 1 0 2 R32ui +%_ptr_UniformConstant_20 = OpTypePointer UniformConstant %20 +%uImageArray = OpVariable %_ptr_UniformConstant_20 UniformConstant + %v3int = OpTypeVector %int 3 + %int_4 = OpConstant %int 4 + %25 = OpConstantComposite %v3int %int_1 %int_5 %int_4 + %28 = OpTypeImage %int 2D 0 0 0 2 R32i +%_ptr_UniformConstant_28 = OpTypePointer UniformConstant %28 + %iImage = OpVariable %_ptr_UniformConstant_28 UniformConstant + %int_6 = OpConstant %int 6 + %32 = OpConstantComposite %v2int %int_1 %int_6 +%_ptr_Image_int = OpTypePointer Image %int + %36 = OpTypeImage %int 2D 0 1 0 2 R32i +%_ptr_UniformConstant_36 = OpTypePointer UniformConstant %36 +%iImageArray = OpVariable %_ptr_UniformConstant_36 UniformConstant + %int_9 = OpConstant %int 9 + %40 = OpConstantComposite %v3int %int_1 %int_6 %int_9 + %v3uint = OpTypeVector %uint 3 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpImageTexelPointer %_ptr_Image_uint %uImage %14 %uint_0 + %19 = OpAtomicSMin %uint %18 %uint_1 %uint_0 %uint_1 + %26 = OpImageTexelPointer %_ptr_Image_uint %uImageArray %25 %uint_0 + %27 = OpAtomicSMax %uint %26 %uint_1 %uint_0 %uint_1 + %34 = OpImageTexelPointer %_ptr_Image_int %iImage %32 %uint_0 + %35 = OpAtomicUMin %int %34 %uint_1 %uint_0 %int_1 + %41 = OpImageTexelPointer %_ptr_Image_int %iImageArray %40 %uint_0 + %42 = OpAtomicUMax %int %41 %uint_1 %uint_0 %int_1 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/local-size-id-override.asm.comp b/shaders-opencl-no-opt/asm/comp/local-size-id-override.asm.comp new file mode 100644 index 000000000..2eaef4bdb --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/local-size-id-override.asm.comp @@ -0,0 +1,60 @@ + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %spec_1 SpecId 1 + OpDecorate %spec_2 SpecId 2 + OpDecorate %spec_3 SpecId 3 + OpDecorate %spec_4 SpecId 4 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %SSBO = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %float_2 = OpConstant %float 2 +%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float + %spec_1 = OpSpecConstant %uint 11 + %spec_2 = OpSpecConstant %uint 12 + %spec_3 = OpSpecConstant %uint 13 + %spec_4 = OpSpecConstant %uint 14 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 +%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %uint_3 %spec_1 %spec_2 + %main = OpFunction %void None %3 + %5 = OpLabel + %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %21 = OpLoad %uint %20 + %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + %25 = OpLoad %v4float %24 + %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2 + %27 = OpFAdd %v4float %25 %26 + %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + OpStore %28 %27 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/local-size-id.asm.invalid.comp b/shaders-opencl-no-opt/asm/comp/local-size-id.asm.invalid.comp new file mode 100644 index 000000000..3031f4bb8 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/local-size-id.asm.invalid.comp @@ -0,0 +1,76 @@ + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionModeId %main LocalSizeId %spec_3 %spec_4 %uint_2 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %_runtimearr_v4float ArrayStride 16 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %spec_1 SpecId 1 + OpDecorate %spec_2 SpecId 2 + OpDecorate %spec_3 SpecId 3 + OpDecorate %spec_4 SpecId 4 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 + %v4float = OpTypeVector %float 4 +%_runtimearr_v4float = OpTypeRuntimeArray %v4float + %SSBO = OpTypeStruct %_runtimearr_v4float +%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %float_2 = OpConstant %float 2 +%_ptr_Uniform_v4float = OpTypePointer StorageBuffer %v4float + ; Test that we can declare the spec constant as signed. + ; Needs implicit bitcast since WorkGroupSize is uint. + %spec_1 = OpSpecConstant %int 11 + %spec_2 = OpSpecConstant %int 12 + %spec_3 = OpSpecConstant %int 13 + %spec_4 = OpSpecConstant %int 14 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + ; Test that we can build spec constant composites out of local size id values. + ; Needs special case handling. + %spec_3_op = OpSpecConstantOp %uint IAdd %spec_3 %uint_3 +%WorkGroupSize = OpSpecConstantComposite %v3uint %spec_3_op %spec_4 %uint_2 + %main = OpFunction %void None %3 + %5 = OpLabel + %20 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %21 = OpLoad %uint %20 + %24 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + %25 = OpLoad %v4float %24 + %26 = OpCompositeConstruct %v4float %float_2 %float_2 %float_2 %float_2 + %27 = OpFAdd %v4float %25 %26 + %wg_f = OpConvertUToF %v3float %WorkGroupSize + %wg_f4 = OpVectorShuffle %v4float %wg_f %wg_f 0 1 2 2 + ; Test that we can use the spec constants directly which needs to translate to gl_WorkGroupSize.elem. + ; Needs special case handling. + %res = OpFAdd %v4float %27 %wg_f4 + %f0 = OpConvertSToF %float %spec_3 + %f1 = OpConvertSToF %float %spec_4 + %f2 = OpConvertSToF %float %uint_2 + %res1 = OpVectorTimesScalar %v4float %res %f0 + %res2 = OpVectorTimesScalar %v4float %res1 %f1 + %res3 = OpVectorTimesScalar %v4float %res2 %f2 + %28 = OpAccessChain %_ptr_Uniform_v4float %_ %int_0 %21 + OpStore %28 %res3 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/modf-storage-class.asm.comp b/shaders-opencl-no-opt/asm/comp/modf-storage-class.asm.comp new file mode 100644 index 000000000..126b01e46 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/modf-storage-class.asm.comp @@ -0,0 +1,116 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 91 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %2 "main" + OpExecutionMode %2 LocalSize 1 1 1 + OpDecorate %_arr_v2uint_uint_324 ArrayStride 8 + OpMemberDecorate %_struct_6 0 NonWritable + OpMemberDecorate %_struct_6 0 Offset 0 + OpDecorate %_struct_6 BufferBlock + OpDecorate %7 DescriptorSet 0 + OpDecorate %7 Binding 0 + OpDecorate %_arr_v2float_uint_648 ArrayStride 8 + OpMemberDecorate %_struct_9 0 Offset 0 + OpDecorate %_struct_9 BufferBlock + OpDecorate %11 DescriptorSet 0 + OpDecorate %11 Binding 1 + OpDecorate %_arr_v2float_uint_648_0 ArrayStride 8 + OpMemberDecorate %_struct_13 0 Offset 0 + OpDecorate %_struct_13 BufferBlock + OpDecorate %14 DescriptorSet 0 + OpDecorate %14 Binding 2 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 +%_ptr_Function_v2float = OpTypePointer Function %v2float + %_struct_19 = OpTypeStruct %v2float %v2float + %10 = OpTypeFunction %_struct_19 %_ptr_Function_v2float +%_ptr_Function__struct_19 = OpTypePointer Function %_struct_19 + %uint = OpTypeInt 32 0 +%_ptr_Function_uint = OpTypePointer Function %uint + %uint_0 = OpConstant %uint 0 + %uint_648 = OpConstant %uint 648 + %bool = OpTypeBool + %v2uint = OpTypeVector %uint 2 +%_ptr_Function_v2uint = OpTypePointer Function %v2uint + %uint_324 = OpConstant %uint 324 +%_arr_v2uint_uint_324 = OpTypeArray %v2uint %uint_324 + %_struct_6 = OpTypeStruct %_arr_v2uint_uint_324 +%_ptr_Uniform__struct_6 = OpTypePointer Uniform %_struct_6 + %7 = OpVariable %_ptr_Uniform__struct_6 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_2 = OpConstant %uint 2 +%_ptr_Uniform_v2uint = OpTypePointer Uniform %v2uint +%_arr_v2float_uint_648 = OpTypeArray %v2float %uint_648 + %_struct_9 = OpTypeStruct %_arr_v2float_uint_648 +%_ptr_Uniform__struct_9 = OpTypePointer Uniform %_struct_9 + %11 = OpVariable %_ptr_Uniform__struct_9 Uniform + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float +%_arr_v2float_uint_648_0 = OpTypeArray %v2float %uint_648 + %_struct_13 = OpTypeStruct %_arr_v2float_uint_648_0 +%_ptr_Uniform__struct_13 = OpTypePointer Uniform %_struct_13 + %14 = OpVariable %_ptr_Uniform__struct_13 Uniform + %int_1 = OpConstant %int 1 + %2 = OpFunction %void None %3 + %5 = OpLabel + %46 = OpVariable %_ptr_Function_uint Function + %47 = OpVariable %_ptr_Function_v2uint Function + %48 = OpVariable %_ptr_Function_v2float Function + %50 = OpVariable %_ptr_Function__struct_19 Function + OpStore %46 %uint_0 + OpBranch %30 + %30 = OpLabel + OpLoopMerge %32 %33 None + OpBranch %34 + %34 = OpLabel + %35 = OpLoad %uint %46 + %38 = OpULessThan %bool %35 %uint_648 + OpBranchConditional %38 %31 %32 + %31 = OpLabel + %49 = OpLoad %uint %46 + %51 = OpUDiv %uint %49 %uint_2 + %53 = OpAccessChain %_ptr_Uniform_v2uint %7 %int_0 %51 + %54 = OpLoad %v2uint %53 + OpStore %47 %54 + %56 = OpLoad %v2uint %47 + %57 = OpBitcast %v2float %56 + OpStore %48 %57 + %62 = OpLoad %uint %46 + %64 = OpIAdd %uint %62 %uint_1 + %65 = OpLoad %v2float %48 + %66 = OpLoad %uint %46 + %68 = OpAccessChain %_ptr_Uniform_v2float %11 %int_0 %66 + %69 = OpExtInst %v2float %1 Modf %65 %68 + %70 = OpAccessChain %_ptr_Uniform_v2float %11 %int_0 %64 + OpStore %70 %69 + %73 = OpLoad %v2float %48 + %74 = OpExtInst %_struct_19 %1 ModfStruct %73 + OpStore %50 %74 + %79 = OpLoad %uint %46 + %81 = OpAccessChain %_ptr_Function_v2float %50 %int_1 + %82 = OpLoad %v2float %81 + %83 = OpAccessChain %_ptr_Uniform_v2float %14 %int_0 %79 + OpStore %83 %82 + %84 = OpLoad %uint %46 + %85 = OpIAdd %uint %84 %uint_1 + %86 = OpAccessChain %_ptr_Function_v2float %50 %int_0 + %87 = OpLoad %v2float %86 + %88 = OpAccessChain %_ptr_Uniform_v2float %14 %int_0 %85 + OpStore %88 %87 + OpBranch %33 + %33 = OpLabel + %89 = OpLoad %uint %46 + %90 = OpIAdd %uint %89 %uint_2 + OpStore %46 %90 + OpBranch %30 + %32 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/opptrdiff-basic.spv14.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/opptrdiff-basic.spv14.invalid.asm.comp new file mode 100644 index 000000000..8319dfdb6 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/opptrdiff-basic.spv14.invalid.asm.comp @@ -0,0 +1,98 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 59 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointersStorageBuffer + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %2 %3 %4 %5 %6 + OpExecutionMode %1 LocalSize 4 1 1 + OpDecorate %7 Block + OpMemberDecorate %7 0 Offset 0 + OpDecorate %8 ArrayStride 16 + OpDecorate %9 Block + OpMemberDecorate %9 0 Offset 0 + OpDecorate %10 ArrayStride 68 + OpDecorate %11 Block + OpMemberDecorate %11 0 Offset 0 + OpDecorate %12 ArrayStride 4 + OpDecorate %13 ArrayStride 4 + OpDecorate %2 DescriptorSet 0 + OpDecorate %2 Binding 0 + OpDecorate %3 DescriptorSet 0 + OpDecorate %3 Binding 1 + OpDecorate %4 DescriptorSet 0 + OpDecorate %4 Binding 2 + OpDecorate %5 BuiltIn LocalInvocationId + OpDecorate %6 BuiltIn WorkgroupId + %14 = OpTypeVoid + %15 = OpTypeBool + %16 = OpTypeInt 32 1 + %17 = OpConstant %16 0 + %18 = OpConstant %16 1 + %19 = OpConstant %16 4 + %20 = OpConstant %16 16 + %21 = OpConstant %16 17 + %22 = OpTypeVector %16 3 + %23 = OpTypePointer Input %22 + %12 = OpTypeArray %16 %19 + %8 = OpTypeRuntimeArray %12 + %7 = OpTypeStruct %8 + %24 = OpTypePointer StorageBuffer %7 + %25 = OpTypePointer StorageBuffer %12 + %13 = OpTypeArray %16 %21 + %10 = OpTypeRuntimeArray %13 + %9 = OpTypeStruct %10 + %26 = OpTypePointer StorageBuffer %9 + %27 = OpTypePointer StorageBuffer %13 + %28 = OpTypePointer StorageBuffer %16 + %11 = OpTypeStruct %16 + %29 = OpTypePointer Uniform %11 + %30 = OpTypePointer Uniform %16 + %2 = OpVariable %24 StorageBuffer + %3 = OpVariable %26 StorageBuffer + %4 = OpVariable %29 Uniform + %5 = OpVariable %23 Input + %6 = OpVariable %23 Input + %31 = OpTypeFunction %14 + %1 = OpFunction %14 None %31 + %32 = OpLabel + %33 = OpAccessChain %30 %4 %17 + %34 = OpLoad %16 %33 + %35 = OpLoad %22 %6 + %36 = OpCompositeExtract %16 %35 0 + %37 = OpLoad %22 %5 + %38 = OpCompositeExtract %16 %37 0 + %39 = OpAccessChain %25 %2 %17 %17 + %40 = OpAccessChain %25 %2 %17 %36 + %41 = OpSGreaterThanEqual %15 %36 %34 + OpSelectionMerge %42 None + OpBranchConditional %41 %43 %42 + %43 = OpLabel + OpReturn + %42 = OpLabel + %44 = OpIEqual %15 %38 %18 + OpSelectionMerge %45 None + OpBranchConditional %44 %46 %45 + %46 = OpLabel + %47 = OpPtrDiff %16 %40 %39 + %48 = OpAccessChain %28 %3 %17 %36 %20 + OpStore %48 %47 + OpBranch %45 + %45 = OpLabel + %49 = OpPhi %16 %17 %42 %17 %46 %50 %45 + %50 = OpIAdd %16 %49 %18 + %51 = OpIEqual %15 %50 %19 + %52 = OpIMul %16 %38 %19 + %53 = OpIAdd %16 %52 %49 + %54 = OpAccessChain %28 %40 %38 + %55 = OpAccessChain %28 %40 %49 + %56 = OpPtrDiff %16 %54 %55 + %57 = OpAccessChain %28 %3 %17 %36 %53 + OpStore %57 %56 + OpLoopMerge %58 %45 None + OpBranchConditional %51 %58 %45 + %58 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp b/shaders-opencl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp new file mode 100644 index 000000000..856649195 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/opptrdiff-opptraccesschain-elem-offset.spv14.asm.comp @@ -0,0 +1,79 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 46 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointersStorageBuffer + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %2 %3 %4 %5 + OpExecutionMode %1 LocalSize 1 1 1 + OpDecorate %6 ArrayStride 4 + OpDecorate %7 Block + OpMemberDecorate %7 0 Offset 0 + OpMemberDecorate %7 1 Offset 4 + OpDecorate %2 DescriptorSet 0 + OpDecorate %2 Binding 0 + OpDecorate %8 ArrayStride 8 + OpDecorate %9 Block + OpMemberDecorate %9 0 Offset 0 + OpDecorate %3 DescriptorSet 0 + OpDecorate %3 Binding 1 + OpDecorate %10 ArrayStride 4 + %11 = OpTypeVoid + %12 = OpTypeBool + %13 = OpTypeInt 32 1 + %14 = OpConstant %13 -1 + %15 = OpConstant %13 0 + %16 = OpConstant %13 1 + %17 = OpConstant %13 2 + %18 = OpConstant %13 3 + %19 = OpTypeVector %13 2 + %6 = OpTypeRuntimeArray %13 + %7 = OpTypeStruct %13 %6 + %20 = OpTypePointer StorageBuffer %7 + %2 = OpVariable %20 StorageBuffer + %8 = OpTypeRuntimeArray %19 + %9 = OpTypeStruct %8 + %21 = OpTypePointer StorageBuffer %9 + %3 = OpVariable %21 StorageBuffer + %10 = OpTypePointer StorageBuffer %13 + %22 = OpTypePointer Private %10 + %4 = OpVariable %22 Private + %5 = OpVariable %22 Private + %23 = OpTypePointer StorageBuffer %13 + %24 = OpTypePointer StorageBuffer %19 + %25 = OpTypeFunction %11 + %1 = OpFunction %11 None %25 + %26 = OpLabel + %27 = OpAccessChain %23 %2 %15 + %28 = OpLoad %13 %27 + %29 = OpAccessChain %10 %2 %16 %15 + OpStore %4 %29 + %30 = OpPtrAccessChain %10 %29 %28 + OpStore %5 %30 + %31 = OpSLessThanEqual %12 %28 %15 + OpSelectionMerge %32 None + OpBranchConditional %31 %32 %33 + %33 = OpLabel + %34 = OpPhi %13 %15 %26 %35 %33 + %36 = OpLoad %10 %4 + %37 = OpLoad %10 %5 + %38 = OpPtrAccessChain %10 %36 %16 + %39 = OpPtrAccessChain %10 %37 %14 + %35 = OpIAdd %13 %34 %16 + OpStore %4 %38 + OpStore %5 %39 + %40 = OpPtrDiff %13 %36 %37 + %41 = OpPtrDiff %13 %37 %36 + %42 = OpCompositeConstruct %19 %40 %41 + %43 = OpAccessChain %24 %3 %15 %34 + OpStore %43 %42 + %44 = OpSGreaterThanEqual %12 %34 %28 + OpLoopMerge %45 %33 None + OpBranchConditional %44 %45 %33 + %45 = OpLabel + OpBranch %32 + %32 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp b/shaders-opencl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp new file mode 100644 index 000000000..5a97976ce --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/opptrequal-basic.spv14.asm.comp @@ -0,0 +1,96 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 64 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointers + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %2 %3 %4 %5 + OpExecutionMode %1 LocalSize 1 1 1 + OpDecorate %6 ArrayStride 4 + OpDecorate %7 Block + OpMemberDecorate %7 0 Offset 0 + OpDecorate %2 DescriptorSet 0 + OpDecorate %2 Binding 0 + OpDecorate %3 DescriptorSet 0 + OpDecorate %3 Binding 1 + OpDecorate %4 DescriptorSet 0 + OpDecorate %4 Binding 2 + OpDecorate %5 DescriptorSet 0 + OpDecorate %5 Binding 3 + %8 = OpTypeVoid + %9 = OpTypeBool + %10 = OpTypeInt 32 0 + %11 = OpConstant %10 0 + %12 = OpConstant %10 1 + %6 = OpTypeRuntimeArray %10 + %7 = OpTypeStruct %6 + %13 = OpTypePointer StorageBuffer %7 + %14 = OpTypePointer StorageBuffer %6 + %15 = OpTypePointer StorageBuffer %10 + %2 = OpVariable %13 StorageBuffer + %3 = OpVariable %13 StorageBuffer + %4 = OpVariable %13 StorageBuffer + %5 = OpVariable %13 StorageBuffer + %16 = OpTypeFunction %8 + %1 = OpFunction %8 None %16 + %17 = OpLabel + %18 = OpCopyObject %10 %11 + %19 = OpAccessChain %14 %2 %11 + %20 = OpAccessChain %15 %2 %11 %11 + %21 = OpAccessChain %14 %3 %11 + %22 = OpAccessChain %15 %3 %11 %11 + %23 = OpAccessChain %14 %4 %11 + %24 = OpAccessChain %15 %4 %11 %11 + %25 = OpPtrEqual %9 %2 %3 + %26 = OpSelect %10 %25 %12 %11 + %27 = OpAccessChain %15 %5 %11 %18 + %28 = OpIAdd %10 %18 %12 + OpStore %27 %26 + %29 = OpPtrEqual %9 %19 %21 + %30 = OpSelect %10 %29 %12 %11 + %31 = OpAccessChain %15 %5 %11 %28 + %32 = OpIAdd %10 %28 %12 + OpStore %31 %30 + %33 = OpPtrEqual %9 %20 %22 + %34 = OpSelect %10 %33 %12 %11 + %35 = OpAccessChain %15 %5 %11 %32 + %36 = OpIAdd %10 %32 %12 + OpStore %35 %34 + %37 = OpPtrEqual %9 %2 %4 + %38 = OpSelect %10 %37 %12 %11 + %39 = OpAccessChain %15 %5 %11 %36 + %40 = OpIAdd %10 %36 %12 + OpStore %39 %38 + %41 = OpPtrEqual %9 %19 %23 + %42 = OpSelect %10 %41 %12 %11 + %43 = OpAccessChain %15 %5 %11 %40 + %44 = OpIAdd %10 %40 %12 + OpStore %43 %42 + %45 = OpPtrEqual %9 %20 %24 + %46 = OpSelect %10 %45 %12 %11 + %47 = OpAccessChain %15 %5 %11 %44 + %48 = OpIAdd %10 %44 %12 + OpStore %47 %46 + %49 = OpPtrEqual %9 %3 %4 + %50 = OpSelect %10 %49 %12 %11 + %51 = OpAccessChain %15 %5 %11 %48 + %52 = OpIAdd %10 %48 %12 + OpStore %51 %50 + %53 = OpPtrEqual %9 %21 %23 + %54 = OpSelect %10 %53 %12 %11 + %55 = OpAccessChain %15 %5 %11 %52 + %56 = OpIAdd %10 %52 %12 + OpStore %55 %54 + %57 = OpPtrEqual %9 %22 %24 + %58 = OpSelect %10 %57 %12 %11 + %59 = OpAccessChain %15 %5 %11 %56 + %60 = OpIAdd %10 %56 %12 + OpStore %59 %58 + %61 = OpPtrEqual %9 %2 %2 + %62 = OpSelect %10 %61 %12 %11 + %63 = OpAccessChain %15 %5 %11 %60 + OpStore %63 %62 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp b/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp new file mode 100644 index 000000000..89813b226 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp @@ -0,0 +1,98 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 63 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointersStorageBuffer + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %2 %3 %4 + OpExecutionMode %1 LocalSize 1 1 1 + OpDecorate %5 ArrayStride 4 + OpDecorate %6 Block + OpDecorate %7 Block + OpMemberDecorate %6 0 ColMajor + OpMemberDecorate %6 0 Offset 0 + OpMemberDecorate %6 0 MatrixStride 16 + OpMemberDecorate %6 1 RowMajor + OpMemberDecorate %6 1 Offset 64 + OpMemberDecorate %6 1 MatrixStride 16 + OpMemberDecorate %6 2 Offset 128 + OpMemberDecorate %6 3 Offset 132 + OpMemberDecorate %7 0 Offset 0 + OpDecorate %2 DescriptorSet 0 + OpDecorate %2 Binding 0 + OpDecorate %3 DescriptorSet 0 + OpDecorate %3 Binding 1 + OpDecorate %4 DescriptorSet 0 + OpDecorate %4 Binding 2 + %8 = OpTypeVoid + %9 = OpTypeBool + %10 = OpTypeInt 32 0 + %11 = OpConstant %10 0 + %12 = OpConstant %10 1 + %13 = OpConstant %10 2 + %14 = OpConstant %10 3 + %15 = OpTypeFloat 32 + %5 = OpTypeRuntimeArray %10 + %16 = OpTypeVector %15 4 + %17 = OpTypeMatrix %16 4 + %6 = OpTypeStruct %17 %17 %15 %15 + %7 = OpTypeStruct %5 + %18 = OpTypePointer StorageBuffer %6 + %19 = OpTypePointer StorageBuffer %7 + %20 = OpTypePointer StorageBuffer %17 + %21 = OpTypePointer StorageBuffer %10 + %22 = OpTypePointer StorageBuffer %15 + %23 = OpTypePointer StorageBuffer %16 + %2 = OpVariable %18 StorageBuffer + %3 = OpVariable %18 StorageBuffer + %4 = OpVariable %19 StorageBuffer + %24 = OpTypeFunction %8 + %1 = OpFunction %8 None %24 + %25 = OpLabel + %26 = OpCopyObject %10 %11 + %27 = OpAccessChain %22 %2 %13 + %28 = OpAccessChain %22 %2 %14 + %29 = OpAccessChain %22 %3 %13 + %30 = OpAccessChain %22 %3 %14 + %31 = OpAccessChain %20 %2 %11 + %32 = OpAccessChain %20 %2 %12 + %33 = OpAccessChain %23 %2 %11 %11 + %34 = OpAccessChain %23 %2 %11 %12 + %35 = OpAccessChain %22 %2 %11 %11 %11 + %36 = OpPtrEqual %9 %27 %28 + %37 = OpSelect %10 %36 %11 %12 + %38 = OpAccessChain %21 %4 %11 %26 + %39 = OpIAdd %10 %26 %12 + OpStore %38 %37 + %40 = OpPtrEqual %9 %27 %29 + %41 = OpSelect %10 %40 %11 %12 + %42 = OpAccessChain %21 %4 %11 %39 + %43 = OpIAdd %10 %39 %12 + OpStore %42 %41 + %44 = OpSelect %22 %40 %27 %28 + %45 = OpSelect %22 %40 %29 %30 + %46 = OpPtrEqual %9 %44 %45 + %47 = OpSelect %10 %46 %11 %12 + %48 = OpAccessChain %21 %4 %11 %43 + %49 = OpIAdd %10 %43 %12 + OpStore %48 %47 + %50 = OpSelect %22 %46 %27 %28 + %51 = OpPtrEqual %9 %50 %35 + %52 = OpSelect %10 %51 %11 %12 + %53 = OpAccessChain %21 %4 %11 %49 + %54 = OpIAdd %10 %49 %12 + OpStore %53 %52 + %55 = OpPtrEqual %9 %31 %32 + %56 = OpSelect %10 %55 %11 %12 + %57 = OpAccessChain %21 %4 %11 %54 + %58 = OpIAdd %10 %54 %12 + OpStore %57 %56 + %59 = OpPtrEqual %9 %33 %34 + %60 = OpSelect %10 %59 %11 %12 + %61 = OpAccessChain %21 %4 %11 %58 + %62 = OpIAdd %10 %58 %12 + OpStore %61 %56 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp b/shaders-opencl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp new file mode 100644 index 000000000..1cbf8045c --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/opptrnotequal-basic.spv14.asm.comp @@ -0,0 +1,96 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 64 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointers + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %2 %3 %4 %5 + OpExecutionMode %1 LocalSize 1 1 1 + OpDecorate %6 ArrayStride 4 + OpDecorate %7 Block + OpMemberDecorate %7 0 Offset 0 + OpDecorate %2 DescriptorSet 0 + OpDecorate %2 Binding 0 + OpDecorate %3 DescriptorSet 0 + OpDecorate %3 Binding 1 + OpDecorate %4 DescriptorSet 0 + OpDecorate %4 Binding 2 + OpDecorate %5 DescriptorSet 0 + OpDecorate %5 Binding 3 + %8 = OpTypeVoid + %9 = OpTypeBool + %10 = OpTypeInt 32 0 + %11 = OpConstant %10 0 + %12 = OpConstant %10 1 + %6 = OpTypeRuntimeArray %10 + %7 = OpTypeStruct %6 + %13 = OpTypePointer StorageBuffer %7 + %14 = OpTypePointer StorageBuffer %6 + %15 = OpTypePointer StorageBuffer %10 + %2 = OpVariable %13 StorageBuffer + %3 = OpVariable %13 StorageBuffer + %4 = OpVariable %13 StorageBuffer + %5 = OpVariable %13 StorageBuffer + %16 = OpTypeFunction %8 + %1 = OpFunction %8 None %16 + %17 = OpLabel + %18 = OpCopyObject %10 %11 + %19 = OpAccessChain %14 %2 %11 + %20 = OpAccessChain %15 %2 %11 %11 + %21 = OpAccessChain %14 %3 %11 + %22 = OpAccessChain %15 %3 %11 %11 + %23 = OpAccessChain %14 %4 %11 + %24 = OpAccessChain %15 %4 %11 %11 + %25 = OpPtrNotEqual %9 %2 %3 + %26 = OpSelect %10 %25 %12 %11 + %27 = OpAccessChain %15 %5 %11 %18 + %28 = OpIAdd %10 %18 %12 + OpStore %27 %26 + %29 = OpPtrNotEqual %9 %19 %21 + %30 = OpSelect %10 %29 %12 %11 + %31 = OpAccessChain %15 %5 %11 %28 + %32 = OpIAdd %10 %28 %12 + OpStore %31 %30 + %33 = OpPtrNotEqual %9 %20 %22 + %34 = OpSelect %10 %33 %12 %11 + %35 = OpAccessChain %15 %5 %11 %32 + %36 = OpIAdd %10 %32 %12 + OpStore %35 %34 + %37 = OpPtrNotEqual %9 %2 %4 + %38 = OpSelect %10 %37 %12 %11 + %39 = OpAccessChain %15 %5 %11 %36 + %40 = OpIAdd %10 %36 %12 + OpStore %39 %38 + %41 = OpPtrNotEqual %9 %19 %23 + %42 = OpSelect %10 %41 %12 %11 + %43 = OpAccessChain %15 %5 %11 %40 + %44 = OpIAdd %10 %40 %12 + OpStore %43 %42 + %45 = OpPtrNotEqual %9 %20 %24 + %46 = OpSelect %10 %45 %12 %11 + %47 = OpAccessChain %15 %5 %11 %44 + %48 = OpIAdd %10 %44 %12 + OpStore %47 %46 + %49 = OpPtrNotEqual %9 %3 %4 + %50 = OpSelect %10 %49 %12 %11 + %51 = OpAccessChain %15 %5 %11 %48 + %52 = OpIAdd %10 %48 %12 + OpStore %51 %50 + %53 = OpPtrNotEqual %9 %21 %23 + %54 = OpSelect %10 %53 %12 %11 + %55 = OpAccessChain %15 %5 %11 %52 + %56 = OpIAdd %10 %52 %12 + OpStore %55 %54 + %57 = OpPtrNotEqual %9 %22 %24 + %58 = OpSelect %10 %57 %12 %11 + %59 = OpAccessChain %15 %5 %11 %56 + %60 = OpIAdd %10 %56 %12 + OpStore %59 %58 + %61 = OpPtrNotEqual %9 %2 %2 + %62 = OpSelect %10 %61 %12 %11 + %63 = OpAccessChain %15 %5 %11 %60 + OpStore %63 %62 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/ptr-access-chain-custom-array-stride.asm.comp b/shaders-opencl-no-opt/asm/comp/ptr-access-chain-custom-array-stride.asm.comp new file mode 100644 index 000000000..298b4e750 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/ptr-access-chain-custom-array-stride.asm.comp @@ -0,0 +1,98 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 11 +; Bound: 66 +; Schema: 0 + OpCapability Shader + OpCapability PhysicalStorageBufferAddresses + OpExtension "SPV_KHR_physical_storage_buffer" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel PhysicalStorageBuffer64 GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 64 1 1 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_buffer_reference" + OpSourceExtension "GL_EXT_buffer_reference_uvec2" + OpSourceExtension "GL_EXT_scalar_block_layout" + OpName %main "main" + OpName %Registers "Registers" + OpMemberName %Registers 0 "a" + OpMemberName %Registers 1 "b" + OpMemberName %Registers 2 "c" + OpMemberName %Registers 3 "d" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpMemberDecorate %Registers 0 Offset 0 + OpMemberDecorate %Registers 1 Offset 8 + OpMemberDecorate %Registers 2 Offset 16 + OpMemberDecorate %Registers 3 Offset 24 + OpDecorate %Registers Block + OpDecorate %v3float_stride12_ptr ArrayStride 12 + OpDecorate %v3float_stride16_ptr ArrayStride 16 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 + %v2uint = OpTypeVector %uint 2 + %float = OpTypeFloat 32 + %v3float = OpTypeVector %float 3 +%_ptr_PhysicalStorageBuffer_v3float = OpTypePointer PhysicalStorageBuffer %v3float +%v3float_stride12_ptr = OpTypePointer PhysicalStorageBuffer %v3float +%v3float_stride16_ptr = OpTypePointer PhysicalStorageBuffer %v3float +%v3float_stride12_ptr_push = OpTypePointer PushConstant %v3float_stride12_ptr +%v3float_stride16_ptr_push = OpTypePointer PushConstant %v3float_stride16_ptr +%v2uint_ptr = OpTypePointer PushConstant %v2uint + %Registers = OpTypeStruct %v3float_stride12_ptr %v3float_stride16_ptr %v2uint %v2uint +%_ptr_PushConstant_Registers = OpTypePointer PushConstant %Registers + %_ = OpVariable %_ptr_PushConstant_Registers PushConstant + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %int_1 = OpConstant %int 1 + %int_2 = OpConstant %int 2 +%_ptr_PushConstant_v2uint = OpTypePointer PushConstant %v2uint + %int_3 = OpConstant %int 3 + %uint_64 = OpConstant %uint 64 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %29 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %index = OpLoad %uint %29 + + %ptr_member_0 = OpAccessChain %v3float_stride12_ptr_push %_ %int_0 + %ptr0 = OpLoad %v3float_stride12_ptr %ptr_member_0 + + %ptr_member_1 = OpAccessChain %v3float_stride16_ptr_push %_ %int_1 + %ptr1 = OpLoad %v3float_stride16_ptr %ptr_member_1 + + %ptr_member_2 = OpAccessChain %v2uint_ptr %_ %int_2 + %ptr2v = OpLoad %v2uint %ptr_member_2 + %ptr2 = OpBitcast %v3float_stride12_ptr %ptr2v + + %ptr_member_3 = OpAccessChain %v2uint_ptr %_ %int_3 + %ptr3v = OpLoad %v2uint %ptr_member_3 + %ptr3 = OpBitcast %v3float_stride16_ptr %ptr3v + + %ptr0_chain = OpPtrAccessChain %v3float_stride12_ptr %ptr0 %index + %ptr1_chain = OpPtrAccessChain %v3float_stride16_ptr %ptr1 %index + %ptr2_chain = OpPtrAccessChain %v3float_stride12_ptr %ptr2 %index + %ptr3_chain = OpPtrAccessChain %v3float_stride16_ptr %ptr3 %index + + %loaded0 = OpLoad %v3float %ptr0_chain Aligned 4 + %loaded1 = OpLoad %v3float %ptr1_chain Aligned 16 + %loaded2 = OpLoad %v3float %ptr2_chain Aligned 4 + %loaded3 = OpLoad %v3float %ptr3_chain Aligned 16 + + %added0 = OpFAdd %v3float %loaded0 %loaded1 + %added1 = OpFAdd %v3float %loaded2 %loaded3 + OpStore %ptr0_chain %added0 Aligned 4 + OpStore %ptr2_chain %added1 Aligned 4 + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp b/shaders-opencl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp new file mode 100644 index 000000000..b4e622bac --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/spec-constant-name-aliasing.asm.comp @@ -0,0 +1,78 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 35 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "values" + OpName %_ "" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpName %A "A" + OpName %B "A" + OpName %C "A" + OpName %D "A" + OpName %E "A" + OpName %F "A" + OpName %G "A" + OpName %H "A" + OpName %I "A" + OpName %J "A" + OpName %K "A" + OpName %L "A" + OpDecorate %_runtimearr_int ArrayStride 4 + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %A SpecId 0 + OpDecorate %B SpecId 1 + OpDecorate %C SpecId 2 + OpDecorate %D SpecId 3 + OpDecorate %E SpecId 4 + OpDecorate %F SpecId 5 + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %int = OpTypeInt 32 1 +%_runtimearr_int = OpTypeRuntimeArray %int + %SSBO = OpTypeStruct %_runtimearr_int +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int_0 = OpConstant %int 0 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %A = OpSpecConstant %int 0 + %B = OpSpecConstant %int 1 + %C = OpSpecConstant %int 2 + %D = OpSpecConstant %int 3 + %E = OpSpecConstant %int 4 + %F = OpSpecConstant %int 5 + %G = OpSpecConstantOp %int ISub %A %B + %H = OpSpecConstantOp %int ISub %G %C + %I = OpSpecConstantOp %int ISub %H %D + %J = OpSpecConstantOp %int ISub %I %E + %K = OpSpecConstantOp %int ISub %J %F + %L = OpSpecConstantOp %int IAdd %K %F +%_ptr_Uniform_int = OpTypePointer Uniform %int + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %18 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %19 = OpLoad %uint %18 + %32 = OpAccessChain %_ptr_Uniform_int %_ %int_0 %19 + OpStore %32 %L + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp new file mode 100644 index 000000000..bdf2027a8 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp @@ -0,0 +1,58 @@ +; SPIR-V +; Version: 1.0 +; Generator: Codeplay; 0 +; Bound: 31 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointers + OpExtension "SPV_KHR_storage_buffer_storage_class" + OpExtension "SPV_KHR_variable_pointers" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %22 "main" %gl_WorkGroupID + OpSource OpenCL_C 120 + OpDecorate %15 SpecId 0 + ;OpDecorate %16 SpecId 1 + OpDecorate %17 SpecId 2 + OpDecorate %_runtimearr_float ArrayStride 4 + OpMemberDecorate %_struct_4 0 Offset 0 + OpDecorate %_struct_4 Block + OpDecorate %gl_WorkGroupID BuiltIn WorkgroupId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + OpDecorate %20 DescriptorSet 0 + OpDecorate %20 Binding 0 + OpDecorate %21 DescriptorSet 0 + OpDecorate %21 Binding 1 + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %size1 = OpConstant %uint 1 +%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float +%_runtimearr_float = OpTypeArray %float %size1 ; Runtime arrays do not work yet in MSL. + %_struct_4 = OpTypeStruct %_runtimearr_float +%_ptr_StorageBuffer__struct_4 = OpTypePointer StorageBuffer %_struct_4 + %void = OpTypeVoid + %8 = OpTypeFunction %void + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%_ptr_Input_uint = OpTypePointer Input %uint +%_ptr_Private_v3uint = OpTypePointer Private %v3uint + %uint_0 = OpConstant %uint 0 +%gl_WorkGroupID = OpVariable %_ptr_Input_v3uint Input + %15 = OpSpecConstant %uint 1 + %16 = OpConstant %uint 2 + %17 = OpSpecConstant %uint 3 +%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %15 %16 %17 + %19 = OpVariable %_ptr_Private_v3uint Private %gl_WorkGroupSize + %20 = OpVariable %_ptr_StorageBuffer__struct_4 StorageBuffer + %21 = OpVariable %_ptr_StorageBuffer__struct_4 StorageBuffer + %22 = OpFunction %void None %8 + %23 = OpLabel + %24 = OpAccessChain %_ptr_Input_uint %gl_WorkGroupID %uint_0 + %25 = OpLoad %uint %24 + %26 = OpAccessChain %_ptr_StorageBuffer_float %21 %uint_0 %25 + %27 = OpLoad %float %26 + %28 = OpAccessChain %_ptr_StorageBuffer_float %20 %uint_0 %25 + %29 = OpLoad %float %28 + %30 = OpFAdd %float %27 %29 + OpStore %28 %30 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp b/shaders-opencl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp new file mode 100644 index 000000000..010d17c20 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp @@ -0,0 +1,63 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 30 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointersStorageBuffer + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %copy_out_f1_f1_ "copy_out(f1;f1;" + OpName %A "A" + OpName %B "B" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "a" + OpName %_ "" + OpName %SSBORead "SSBORead" + OpMemberName %SSBORead 0 "b" + OpName %__0 "" + OpMemberDecorate %SSBO 0 NonReadable + OpMemberDecorate %SSBO 0 Offset 0 + OpDecorate %SSBO Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + OpMemberDecorate %SSBORead 0 NonWritable + OpMemberDecorate %SSBORead 0 Offset 0 + OpDecorate %SSBORead Block + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 1 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_StorageBuffer_float = OpTypePointer StorageBuffer %float + %8 = OpTypeFunction %void %_ptr_StorageBuffer_float %_ptr_StorageBuffer_float + %SSBO = OpTypeStruct %float +%_ptr_StorageBuffer_SSBO = OpTypePointer StorageBuffer %SSBO + %_ = OpVariable %_ptr_StorageBuffer_SSBO StorageBuffer + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %SSBORead = OpTypeStruct %float +%_ptr_StorageBuffer_SSBORead = OpTypePointer StorageBuffer %SSBORead + %__0 = OpVariable %_ptr_StorageBuffer_SSBORead StorageBuffer + %main = OpFunction %void None %3 + %5 = OpLabel + %param = OpVariable %_ptr_Function_float Function + %param_0 = OpVariable %_ptr_Function_float Function + %25 = OpAccessChain %_ptr_StorageBuffer_float %_ %int_0 + %26 = OpAccessChain %_ptr_StorageBuffer_float %__0 %int_0 + %27 = OpFunctionCall %void %copy_out_f1_f1_ %25 %26 + OpReturn + OpFunctionEnd +%copy_out_f1_f1_ = OpFunction %void None %8 + %A = OpFunctionParameter %_ptr_StorageBuffer_float + %B = OpFunctionParameter %_ptr_StorageBuffer_float + %12 = OpLabel + %13 = OpLoad %float %B + OpStore %A %13 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp16.fp16.asm.comp b/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp16.fp16.asm.comp new file mode 100644 index 000000000..fca4fff77 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp16.fp16.asm.comp @@ -0,0 +1,225 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 11 +; Bound: 173 +; Schema: 0 + OpCapability Shader + OpCapability Float16 + OpCapability StorageBuffer16BitAccess + OpCapability FloatControls2 + OpExtension "SPV_KHR_16bit_storage" + OpExtension "SPV_KHR_float_controls2" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex + OpExecutionMode %main LocalSize 4 1 1 + OpExecutionModeId %main FPFastMathDefault %half %fp32_modes + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "v" + OpMemberName %SSBO 1 "f16" + OpName %_ "" + OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex" + OpDecorate %_arr_float_uint_4 ArrayStride 4 + OpDecorate %_arr_half_uint_4 ArrayStride 2 + OpDecorate %SSBO BufferBlock + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 16 + OpDecorate %_ Binding 0 + OpDecorate %_ DescriptorSet 0 + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + OpDecorate %24 FPFastMathMode NotNaN|NotInf + OpDecorate %30 FPFastMathMode NotNaN ; This isn't enough to avoid precise:: + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %fp32_modes = OpConstant %uint 0x7000f + %uint_4 = OpConstant %uint 4 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 + %half = OpTypeFloat 16 +%_arr_half_uint_4 = OpTypeArray %half %uint_4 + %SSBO = OpTypeStruct %_arr_float_uint_4 %_arr_half_uint_4 +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input +%_ptr_Uniform_float = OpTypePointer Uniform %float + %float_4 = OpConstant %float 4 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_half = OpTypePointer Uniform %half + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_4 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpLoad %uint %gl_LocalInvocationIndex + %20 = OpLoad %uint %gl_LocalInvocationIndex + %22 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %20 + %23 = OpLoad %float %22 + %24 = OpExtInst %float %1 Cos %23 + %25 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %19 + OpStore %25 %24 + %26 = OpLoad %uint %gl_LocalInvocationIndex + %27 = OpLoad %uint %gl_LocalInvocationIndex + %28 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %27 + %29 = OpLoad %float %28 + %30 = OpExtInst %float %1 Sin %29 + %31 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %26 + %32 = OpLoad %float %31 + %33 = OpFAdd %float %32 %30 + %34 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %26 + OpStore %34 %33 + %35 = OpLoad %uint %gl_LocalInvocationIndex + %36 = OpLoad %uint %gl_LocalInvocationIndex + %37 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %36 + %38 = OpLoad %float %37 + %39 = OpExtInst %float %1 Tan %38 + %40 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %35 + %41 = OpLoad %float %40 + %42 = OpFAdd %float %41 %39 + %43 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %35 + OpStore %43 %42 + %44 = OpLoad %uint %gl_LocalInvocationIndex + %45 = OpLoad %uint %gl_LocalInvocationIndex + %46 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %45 + %47 = OpLoad %float %46 + %48 = OpExtInst %float %1 Acos %47 + %49 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %44 + %50 = OpLoad %float %49 + %51 = OpFAdd %float %50 %48 + %52 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %44 + OpStore %52 %51 + %53 = OpLoad %uint %gl_LocalInvocationIndex + %54 = OpLoad %uint %gl_LocalInvocationIndex + %55 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %54 + %56 = OpLoad %float %55 + %57 = OpExtInst %float %1 Asin %56 + %58 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %53 + %59 = OpLoad %float %58 + %60 = OpFAdd %float %59 %57 + %61 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %53 + OpStore %61 %60 + %62 = OpLoad %uint %gl_LocalInvocationIndex + %63 = OpLoad %uint %gl_LocalInvocationIndex + %64 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %63 + %65 = OpLoad %float %64 + %66 = OpExtInst %float %1 Atan %65 + %67 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %62 + %68 = OpLoad %float %67 + %69 = OpFAdd %float %68 %66 + %70 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %62 + OpStore %70 %69 + %71 = OpLoad %uint %gl_LocalInvocationIndex + %72 = OpLoad %uint %gl_LocalInvocationIndex + %73 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %72 + %74 = OpLoad %float %73 + %75 = OpExtInst %float %1 Exp %74 + %76 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %71 + %77 = OpLoad %float %76 + %78 = OpFAdd %float %77 %75 + %79 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %71 + OpStore %79 %78 + %80 = OpLoad %uint %gl_LocalInvocationIndex + %81 = OpLoad %uint %gl_LocalInvocationIndex + %82 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %81 + %83 = OpLoad %float %82 + %84 = OpExtInst %float %1 Exp2 %83 + %85 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %80 + %86 = OpLoad %float %85 + %87 = OpFAdd %float %86 %84 + %88 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %80 + OpStore %88 %87 + %89 = OpLoad %uint %gl_LocalInvocationIndex + %90 = OpLoad %uint %gl_LocalInvocationIndex + %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %90 + %92 = OpLoad %float %91 + %93 = OpExtInst %float %1 Log %92 + %94 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %89 + %95 = OpLoad %float %94 + %96 = OpFAdd %float %95 %93 + %97 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %89 + OpStore %97 %96 + %98 = OpLoad %uint %gl_LocalInvocationIndex + %99 = OpLoad %uint %gl_LocalInvocationIndex + %100 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %99 + %101 = OpLoad %float %100 + %102 = OpExtInst %float %1 Log2 %101 + %103 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %98 + %104 = OpLoad %float %103 + %105 = OpFAdd %float %104 %102 + %106 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %98 + OpStore %106 %105 + %107 = OpLoad %uint %gl_LocalInvocationIndex + %108 = OpLoad %uint %gl_LocalInvocationIndex + %109 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %108 + %110 = OpLoad %float %109 + %111 = OpExtInst %float %1 Sqrt %110 + %112 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %107 + %113 = OpLoad %float %112 + %114 = OpFAdd %float %113 %111 + %115 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %107 + OpStore %115 %114 + %116 = OpLoad %uint %gl_LocalInvocationIndex + %117 = OpLoad %uint %gl_LocalInvocationIndex + %118 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %117 + %119 = OpLoad %float %118 + %120 = OpExtInst %float %1 InverseSqrt %119 + %121 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %116 + %122 = OpLoad %float %121 + %123 = OpFAdd %float %122 %120 + %124 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %116 + OpStore %124 %123 + %125 = OpLoad %uint %gl_LocalInvocationIndex + %126 = OpLoad %uint %gl_LocalInvocationIndex + %127 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %126 + %128 = OpLoad %float %127 + %130 = OpExtInst %float %1 Pow %128 %float_4 + %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %125 + %132 = OpLoad %float %131 + %133 = OpFAdd %float %132 %130 + %134 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %125 + OpStore %134 %133 + %136 = OpLoad %uint %gl_LocalInvocationIndex + %137 = OpLoad %uint %gl_LocalInvocationIndex + %139 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %137 + %140 = OpLoad %half %139 + %141 = OpExtInst %half %1 Cos %140 + %142 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %136 + OpStore %142 %141 + %143 = OpLoad %uint %gl_LocalInvocationIndex + %144 = OpLoad %uint %gl_LocalInvocationIndex + %145 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %144 + %146 = OpLoad %half %145 + %147 = OpExtInst %half %1 Sin %146 + %148 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %143 + %149 = OpLoad %half %148 + %150 = OpFAdd %half %149 %147 + %151 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %143 + OpStore %151 %150 + %152 = OpLoad %uint %gl_LocalInvocationIndex + %153 = OpLoad %uint %gl_LocalInvocationIndex + %154 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %153 + %155 = OpLoad %half %154 + %156 = OpExtInst %half %1 Cosh %155 + %157 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %152 + %158 = OpLoad %half %157 + %159 = OpFAdd %half %158 %156 + %160 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %152 + OpStore %160 %159 + %161 = OpLoad %uint %gl_LocalInvocationIndex + %162 = OpLoad %uint %gl_LocalInvocationIndex + %163 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %162 + %164 = OpLoad %half %163 + %165 = OpExtInst %half %1 Sinh %164 + %166 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %161 + %167 = OpLoad %half %166 + %168 = OpFAdd %half %167 %165 + %169 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %161 + OpStore %169 %168 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp32.fp16.asm.comp b/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp32.fp16.asm.comp new file mode 100644 index 000000000..c95c72ddb --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/trancendentals-float-controls-2-fp32.fp16.asm.comp @@ -0,0 +1,224 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 11 +; Bound: 173 +; Schema: 0 + OpCapability Shader + OpCapability Float16 + OpCapability StorageBuffer16BitAccess + OpCapability FloatControls2 + OpExtension "SPV_KHR_16bit_storage" + OpExtension "SPV_KHR_float_controls2" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex + OpExecutionMode %main LocalSize 4 1 1 + OpExecutionModeId %main FPFastMathDefault %float %fp32_modes + OpSource GLSL 450 + OpName %main "main" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "v" + OpMemberName %SSBO 1 "f16" + OpName %_ "" + OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex" + OpDecorate %_arr_float_uint_4 ArrayStride 4 + OpDecorate %_arr_half_uint_4 ArrayStride 2 + OpDecorate %SSBO BufferBlock + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 16 + OpDecorate %_ Binding 0 + OpDecorate %_ DescriptorSet 0 + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + OpDecorate %24 FPFastMathMode None + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %fp32_modes = OpConstant %uint 0x7000f + %uint_4 = OpConstant %uint 4 +%_arr_float_uint_4 = OpTypeArray %float %uint_4 + %half = OpTypeFloat 16 +%_arr_half_uint_4 = OpTypeArray %half %uint_4 + %SSBO = OpTypeStruct %_arr_float_uint_4 %_arr_half_uint_4 +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input +%_ptr_Uniform_float = OpTypePointer Uniform %float + %float_4 = OpConstant %float 4 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_half = OpTypePointer Uniform %half + %v3uint = OpTypeVector %uint 3 + %uint_1 = OpConstant %uint 1 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_4 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %19 = OpLoad %uint %gl_LocalInvocationIndex + %20 = OpLoad %uint %gl_LocalInvocationIndex + %22 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %20 + %23 = OpLoad %float %22 + %24 = OpExtInst %float %1 Cos %23 + %25 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %19 + OpStore %25 %24 + %26 = OpLoad %uint %gl_LocalInvocationIndex + %27 = OpLoad %uint %gl_LocalInvocationIndex + %28 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %27 + %29 = OpLoad %float %28 + %30 = OpExtInst %float %1 Sin %29 + %31 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %26 + %32 = OpLoad %float %31 + %33 = OpFAdd %float %32 %30 + %34 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %26 + OpStore %34 %33 + %35 = OpLoad %uint %gl_LocalInvocationIndex + %36 = OpLoad %uint %gl_LocalInvocationIndex + %37 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %36 + %38 = OpLoad %float %37 + %39 = OpExtInst %float %1 Tan %38 + %40 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %35 + %41 = OpLoad %float %40 + %42 = OpFAdd %float %41 %39 + %43 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %35 + OpStore %43 %42 + %44 = OpLoad %uint %gl_LocalInvocationIndex + %45 = OpLoad %uint %gl_LocalInvocationIndex + %46 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %45 + %47 = OpLoad %float %46 + %48 = OpExtInst %float %1 Acos %47 + %49 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %44 + %50 = OpLoad %float %49 + %51 = OpFAdd %float %50 %48 + %52 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %44 + OpStore %52 %51 + %53 = OpLoad %uint %gl_LocalInvocationIndex + %54 = OpLoad %uint %gl_LocalInvocationIndex + %55 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %54 + %56 = OpLoad %float %55 + %57 = OpExtInst %float %1 Asin %56 + %58 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %53 + %59 = OpLoad %float %58 + %60 = OpFAdd %float %59 %57 + %61 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %53 + OpStore %61 %60 + %62 = OpLoad %uint %gl_LocalInvocationIndex + %63 = OpLoad %uint %gl_LocalInvocationIndex + %64 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %63 + %65 = OpLoad %float %64 + %66 = OpExtInst %float %1 Atan %65 + %67 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %62 + %68 = OpLoad %float %67 + %69 = OpFAdd %float %68 %66 + %70 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %62 + OpStore %70 %69 + %71 = OpLoad %uint %gl_LocalInvocationIndex + %72 = OpLoad %uint %gl_LocalInvocationIndex + %73 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %72 + %74 = OpLoad %float %73 + %75 = OpExtInst %float %1 Exp %74 + %76 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %71 + %77 = OpLoad %float %76 + %78 = OpFAdd %float %77 %75 + %79 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %71 + OpStore %79 %78 + %80 = OpLoad %uint %gl_LocalInvocationIndex + %81 = OpLoad %uint %gl_LocalInvocationIndex + %82 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %81 + %83 = OpLoad %float %82 + %84 = OpExtInst %float %1 Exp2 %83 + %85 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %80 + %86 = OpLoad %float %85 + %87 = OpFAdd %float %86 %84 + %88 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %80 + OpStore %88 %87 + %89 = OpLoad %uint %gl_LocalInvocationIndex + %90 = OpLoad %uint %gl_LocalInvocationIndex + %91 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %90 + %92 = OpLoad %float %91 + %93 = OpExtInst %float %1 Log %92 + %94 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %89 + %95 = OpLoad %float %94 + %96 = OpFAdd %float %95 %93 + %97 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %89 + OpStore %97 %96 + %98 = OpLoad %uint %gl_LocalInvocationIndex + %99 = OpLoad %uint %gl_LocalInvocationIndex + %100 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %99 + %101 = OpLoad %float %100 + %102 = OpExtInst %float %1 Log2 %101 + %103 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %98 + %104 = OpLoad %float %103 + %105 = OpFAdd %float %104 %102 + %106 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %98 + OpStore %106 %105 + %107 = OpLoad %uint %gl_LocalInvocationIndex + %108 = OpLoad %uint %gl_LocalInvocationIndex + %109 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %108 + %110 = OpLoad %float %109 + %111 = OpExtInst %float %1 Sqrt %110 + %112 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %107 + %113 = OpLoad %float %112 + %114 = OpFAdd %float %113 %111 + %115 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %107 + OpStore %115 %114 + %116 = OpLoad %uint %gl_LocalInvocationIndex + %117 = OpLoad %uint %gl_LocalInvocationIndex + %118 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %117 + %119 = OpLoad %float %118 + %120 = OpExtInst %float %1 InverseSqrt %119 + %121 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %116 + %122 = OpLoad %float %121 + %123 = OpFAdd %float %122 %120 + %124 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %116 + OpStore %124 %123 + %125 = OpLoad %uint %gl_LocalInvocationIndex + %126 = OpLoad %uint %gl_LocalInvocationIndex + %127 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %126 + %128 = OpLoad %float %127 + %130 = OpExtInst %float %1 Pow %128 %float_4 + %131 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %125 + %132 = OpLoad %float %131 + %133 = OpFAdd %float %132 %130 + %134 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %125 + OpStore %134 %133 + %136 = OpLoad %uint %gl_LocalInvocationIndex + %137 = OpLoad %uint %gl_LocalInvocationIndex + %139 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %137 + %140 = OpLoad %half %139 + %141 = OpExtInst %half %1 Cos %140 + %142 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %136 + OpStore %142 %141 + %143 = OpLoad %uint %gl_LocalInvocationIndex + %144 = OpLoad %uint %gl_LocalInvocationIndex + %145 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %144 + %146 = OpLoad %half %145 + %147 = OpExtInst %half %1 Sin %146 + %148 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %143 + %149 = OpLoad %half %148 + %150 = OpFAdd %half %149 %147 + %151 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %143 + OpStore %151 %150 + %152 = OpLoad %uint %gl_LocalInvocationIndex + %153 = OpLoad %uint %gl_LocalInvocationIndex + %154 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %153 + %155 = OpLoad %half %154 + %156 = OpExtInst %half %1 Cosh %155 + %157 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %152 + %158 = OpLoad %half %157 + %159 = OpFAdd %half %158 %156 + %160 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %152 + OpStore %160 %159 + %161 = OpLoad %uint %gl_LocalInvocationIndex + %162 = OpLoad %uint %gl_LocalInvocationIndex + %163 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %162 + %164 = OpLoad %half %163 + %165 = OpExtInst %half %1 Sinh %164 + %166 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %161 + %167 = OpLoad %half %166 + %168 = OpFAdd %half %167 %165 + %169 = OpAccessChain %_ptr_Uniform_half %_ %int_1 %161 + OpStore %169 %168 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/variable-pointers-2.asm.comp b/shaders-opencl-no-opt/asm/comp/variable-pointers-2.asm.comp new file mode 100644 index 000000000..9c2afe393 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/variable-pointers-2.asm.comp @@ -0,0 +1,71 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 11 +; Bound: 26 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointers + OpExtension "SPV_KHR_variable_pointers" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 64 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %test "test" + OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %uint = OpTypeInt 32 0 + %uint_64 = OpConstant %uint 64 +%_arr_v2float_uint_64 = OpTypeArray %v2float %uint_64 +%_ptr_Workgroup__arr_v2float_uint_64 = OpTypePointer Workgroup %_arr_v2float_uint_64 + %test = OpVariable %_ptr_Workgroup__arr_v2float_uint_64 Workgroup +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 +%_ptr_Workgroup_float = OpTypePointer Workgroup %float +%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %14 = OpLoad %uint %gl_LocalInvocationIndex + %19 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %20 = OpLoad %uint %19 + %21 = OpConvertUToF %float %20 + %22 = OpCompositeConstruct %v2float %21 %21 + + ; Dummy expression. *(&test + 0) + %ptr0 = OpPtrAccessChain %_ptr_Workgroup__arr_v2float_uint_64 %test %uint_0 + %ptr1 = OpPtrAccessChain %_ptr_Workgroup_v2float %ptr0 %uint_0 %uint_1 + %ptr2 = OpPtrAccessChain %_ptr_Workgroup_v2float %ptr1 %uint_2 + OpStore %ptr2 %22 + + ; Chain PtrAccessChain while keeping pointer type. + %ptr3 = OpPtrAccessChain %_ptr_Workgroup_v2float %test %uint_0 %uint_1 + %ptr4 = OpPtrAccessChain %_ptr_Workgroup_v2float %ptr3 %uint_2 + OpStore %ptr4 %22 + + ; Same semantics. + %ptr5 = OpPtrAccessChain %_ptr_Workgroup_v2float %test %uint_0 %uint_3 + OpStore %ptr5 %22 + + ; Scalar shenanigans. + %ptr6 = OpPtrAccessChain %_ptr_Workgroup_float %test %uint_0 %uint_2 %uint_0 + %ptr7 = OpPtrAccessChain %_ptr_Workgroup_float %ptr6 %uint_1 + OpStore %ptr7 %21 + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/variable-pointers-3.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/variable-pointers-3.invalid.asm.comp new file mode 100644 index 000000000..79ff08edc --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/variable-pointers-3.invalid.asm.comp @@ -0,0 +1,60 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 11 +; Bound: 26 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointers + OpExtension "SPV_KHR_variable_pointers" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 64 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %test "test" + OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %bool = OpTypeBool + %true = OpConstantTrue %bool + %v2float = OpTypeVector %float 2 + %uint = OpTypeInt 32 0 + %uint_64 = OpConstant %uint 64 +%_arr_v2float_uint_64 = OpTypeArray %v2float %uint_64 +%_ptr_Workgroup__arr_v2float_uint_64 = OpTypePointer Workgroup %_arr_v2float_uint_64 + %test = OpVariable %_ptr_Workgroup__arr_v2float_uint_64 Workgroup +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 +%_ptr_Workgroup_float = OpTypePointer Workgroup %float +%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %14 = OpLoad %uint %gl_LocalInvocationIndex + %19 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %20 = OpLoad %uint %19 + %21 = OpConvertUToF %float %20 + %22 = OpCompositeConstruct %v2float %21 %21 + + ; Scalar shenanigans. + %ptr6 = OpPtrAccessChain %_ptr_Workgroup_float %test %uint_0 %uint_2 %uint_0 + %ptr6_alt = OpPtrAccessChain %_ptr_Workgroup_float %test %uint_0 %uint_2 %uint_0 + %ptr6_sel = OpSelect %_ptr_Workgroup_float %true %ptr6 %ptr6_alt + %ptr7 = OpPtrAccessChain %_ptr_Workgroup_float %ptr6_sel %uint_1 + OpStore %ptr7 %21 + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/variable-pointers-vector-to-scalar.asm.comp b/shaders-opencl-no-opt/asm/comp/variable-pointers-vector-to-scalar.asm.comp new file mode 100644 index 000000000..c4512858a --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/variable-pointers-vector-to-scalar.asm.comp @@ -0,0 +1,60 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 11 +; Bound: 26 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointers + OpExtension "SPV_KHR_variable_pointers" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_LocalInvocationIndex %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 64 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %test "test" + OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex" + OpName %gl_GlobalInvocationID "gl_GlobalInvocationID" + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %bool = OpTypeBool + %true = OpConstantTrue %bool + %v2float = OpTypeVector %float 2 + %uint = OpTypeInt 32 0 + %uint_64 = OpConstant %uint 64 +%_arr_v2float_uint_64 = OpTypeArray %v2float %uint_64 +%_ptr_Workgroup__arr_v2float_uint_64 = OpTypePointer Workgroup %_arr_v2float_uint_64 + %test = OpVariable %_ptr_Workgroup__arr_v2float_uint_64 Workgroup +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 +%_ptr_Workgroup_float = OpTypePointer Workgroup %float +%_ptr_Workgroup_v2float = OpTypePointer Workgroup %v2float +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_64 %uint_1 %uint_1 + %main = OpFunction %void None %3 + %5 = OpLabel + %14 = OpLoad %uint %gl_LocalInvocationIndex + %19 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %20 = OpLoad %uint %19 + %21 = OpConvertUToF %float %20 + %22 = OpCompositeConstruct %v2float %21 %21 + + %a = OpAccessChain %_ptr_Workgroup_v2float %test %uint_1 + %b = OpAccessChain %_ptr_Workgroup_v2float %test %uint_2 + %c = OpSelect %_ptr_Workgroup_v2float %true %a %b + + %d = OpAccessChain %_ptr_Workgroup_float %c %uint_1 + OpStore %d %21 + + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/variable-pointers.asm.invalid.comp b/shaders-opencl-no-opt/asm/comp/variable-pointers.asm.invalid.comp new file mode 100644 index 000000000..ba6267cc0 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/variable-pointers.asm.invalid.comp @@ -0,0 +1,152 @@ +; SPIR-V +; Version: 1.3 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 89 +; Schema: 0 + OpCapability Shader + OpCapability VariablePointers + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %foo "foo" + OpMemberName %foo 0 "a" + OpMemberName %foo 1 "b" + OpMemberName %foo 2 "c" + OpName %bar "bar" + OpMemberName %bar 0 "d" + OpName %baz "baz" + OpMemberName %baz 0 "e" + OpName %buf "buf" + OpName %buf2 "buf2" + OpName %cb "cb" + OpName %tgsm "tgsm" + OpName %sbuf "sbuf" + OpName %sbuf2 "sbuf2" + OpName %stgsm "stgsm" + OpName %select_buffer "select_buffer" + OpName %select_buffer_null "select_buffer_null" + OpName %select_tgsm "select_tgsm" + OpName %cur "cur" + OpMemberDecorate %foo 0 Offset 0 + OpMemberDecorate %foo 1 Offset 512 + OpMemberDecorate %foo 2 Offset 520 + OpMemberDecorate %bar 0 Offset 0 + OpMemberDecorate %baz 0 Offset 0 + OpDecorate %foo Block + OpDecorate %bar Block + OpDecorate %baz Block + OpDecorate %buf DescriptorSet 0 + OpDecorate %buf Binding 0 + OpDecorate %cb DescriptorSet 0 + OpDecorate %cb Binding 3 + OpDecorate %buf2 DescriptorSet 0 + OpDecorate %buf2 Binding 4 + OpDecorate %_ptr_Workgroup_int ArrayStride 4 + OpDecorate %_ptr_StorageBuffer_int ArrayStride 4 + OpDecorate %_arr_int_uint_128 ArrayStride 4 + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + %void = OpTypeVoid + %22 = OpTypeFunction %void + %int = OpTypeInt 32 1 + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %uint_128 = OpConstant %uint 128 +%_arr_int_uint_128 = OpTypeArray %int %uint_128 + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %foo = OpTypeStruct %_arr_int_uint_128 %uint %v2float +%_ptr_StorageBuffer_foo = OpTypePointer StorageBuffer %foo + %buf = OpVariable %_ptr_StorageBuffer_foo StorageBuffer + %bar = OpTypeStruct %int +%_ptr_Uniform_bar = OpTypePointer Uniform %bar + %cb = OpVariable %_ptr_Uniform_bar Uniform + %baz = OpTypeStruct %_arr_int_uint_128 +%_ptr_StorageBuffer_baz = OpTypePointer StorageBuffer %baz + %buf2 = OpVariable %_ptr_StorageBuffer_baz StorageBuffer +%_ptr_Workgroup__arr_int_uint_128 = OpTypePointer Workgroup %_arr_int_uint_128 + %tgsm = OpVariable %_ptr_Workgroup__arr_int_uint_128 Workgroup +%_ptr_StorageBuffer_int = OpTypePointer StorageBuffer %int +%_ptr_Private__ptr_StorageBuffer_int = OpTypePointer Private %_ptr_StorageBuffer_int + %sbuf = OpVariable %_ptr_Private__ptr_StorageBuffer_int Private + %sbuf2 = OpVariable %_ptr_Private__ptr_StorageBuffer_int Private +%_ptr_Workgroup_int = OpTypePointer Workgroup %int +%_ptr_Private__ptr_Workgroup_int = OpTypePointer Private %_ptr_Workgroup_int + %stgsm = OpVariable %_ptr_Private__ptr_Workgroup_int Private + %uint_0 = OpConstant %uint 0 + %bool = OpTypeBool +%_ptr_Uniform_int = OpTypePointer Uniform %int + %44 = OpTypeFunction %_ptr_StorageBuffer_int + %int_0 = OpConstant %int 0 + %uint_1 = OpConstant %uint 1 + %47 = OpConstantNull %_ptr_StorageBuffer_int + %48 = OpTypeFunction %_ptr_Workgroup_int + %49 = OpConstantNull %_ptr_Workgroup_int +%_ptr_Function__ptr_Workgroup_int = OpTypePointer Function %_ptr_Workgroup_int +%select_buffer = OpFunction %_ptr_StorageBuffer_int None %44 + %51 = OpLabel + %52 = OpAccessChain %_ptr_Uniform_int %cb %uint_0 + %53 = OpLoad %int %52 + %54 = OpINotEqual %bool %53 %int_0 + %55 = OpAccessChain %_ptr_StorageBuffer_int %buf %uint_0 %uint_0 + %56 = OpAccessChain %_ptr_StorageBuffer_int %buf2 %uint_0 %uint_0 + %57 = OpSelect %_ptr_StorageBuffer_int %54 %55 %56 + OpReturnValue %57 + OpFunctionEnd +%select_buffer_null = OpFunction %_ptr_StorageBuffer_int None %44 + %58 = OpLabel + %59 = OpAccessChain %_ptr_Uniform_int %cb %uint_0 + %60 = OpLoad %int %59 + %61 = OpINotEqual %bool %60 %int_0 + %62 = OpAccessChain %_ptr_StorageBuffer_int %buf %uint_0 %uint_0 + %63 = OpSelect %_ptr_StorageBuffer_int %61 %62 %47 + OpReturnValue %63 + OpFunctionEnd +%select_tgsm = OpFunction %_ptr_Workgroup_int None %48 + %64 = OpLabel + %65 = OpAccessChain %_ptr_Uniform_int %cb %uint_0 + %66 = OpLoad %int %65 + %67 = OpINotEqual %bool %66 %int_0 + %68 = OpAccessChain %_ptr_Workgroup_int %tgsm %uint_0 + %69 = OpSelect %_ptr_Workgroup_int %67 %68 %49 + OpReturnValue %69 + OpFunctionEnd + %main = OpFunction %void None %22 + %70 = OpLabel + %cur = OpVariable %_ptr_Function__ptr_Workgroup_int Function + %71 = OpFunctionCall %_ptr_StorageBuffer_int %select_buffer + OpStore %sbuf %71 + %72 = OpFunctionCall %_ptr_StorageBuffer_int %select_buffer_null + OpStore %sbuf2 %72 + %73 = OpFunctionCall %_ptr_Workgroup_int %select_tgsm + OpStore %stgsm %73 + %74 = OpAccessChain %_ptr_StorageBuffer_int %buf %uint_0 %uint_0 + %75 = OpLoad %_ptr_Workgroup_int %stgsm + %76 = OpCopyObject %_ptr_Workgroup_int %75 + OpStore %cur %76 + OpBranch %77 + %77 = OpLabel + %78 = OpPhi %_ptr_StorageBuffer_int %74 %70 %79 %80 + %81 = OpLoad %_ptr_Workgroup_int %cur + %82 = OpLoad %int %78 + %83 = OpINotEqual %bool %82 %int_0 + OpLoopMerge %85 %80 None + OpBranchConditional %83 %84 %85 + %84 = OpLabel + %86 = OpLoad %int %81 + %87 = OpIAdd %int %82 %86 + OpStore %78 %87 + OpStore %81 %87 + OpBranch %80 + %80 = OpLabel + %79 = OpPtrAccessChain %_ptr_StorageBuffer_int %78 %uint_1 + %88 = OpPtrAccessChain %_ptr_Workgroup_int %81 %uint_1 + OpStore %cur %88 + OpBranch %77 + %85 = OpLabel + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/variable-ssbo-argument.spv16.asm.comp b/shaders-opencl-no-opt/asm/comp/variable-ssbo-argument.spv16.asm.comp new file mode 100644 index 000000000..0cb791703 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/variable-ssbo-argument.spv16.asm.comp @@ -0,0 +1,44 @@ +; SPIR-V +; Version: 1.6 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 24 +; Schema: 0 + OpCapability VariablePointersStorageBuffer + OpCapability Int8 + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %2 + OpExecutionMode %1 LocalSize 16 1 1 + OpDecorate %2 DescriptorSet 0 + OpDecorate %2 Binding 0 + OpDecorate %_struct_3 Block + OpMemberDecorate %_struct_3 0 Offset 0 + OpDecorate %_runtimearr_uchar ArrayStride 1 + OpDecorate %_ptr_StorageBuffer_uchar ArrayStride 1 + %void = OpTypeVoid + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uchar = OpTypeInt 8 0 + %uchar_0 = OpConstant %uchar 0 +%_runtimearr_uchar = OpTypeRuntimeArray %uchar + %_struct_3 = OpTypeStruct %_runtimearr_uchar +%_ptr_StorageBuffer_uchar = OpTypePointer StorageBuffer %uchar +%_ptr_StorageBuffer__struct_3 = OpTypePointer StorageBuffer %_struct_3 + %2 = OpVariable %_ptr_StorageBuffer__struct_3 StorageBuffer + %15 = OpTypeFunction %void %_ptr_StorageBuffer_uchar + %16 = OpTypeFunction %void + %1 = OpFunction %void None %16 + %17 = OpLabel + %18 = OpAccessChain %_ptr_StorageBuffer_uchar %2 %uint_0 %uint_1 + %19 = OpFunctionCall %void %20 %18 + OpReturn + OpFunctionEnd + %20 = OpFunction %void None %15 + %21 = OpFunctionParameter %_ptr_StorageBuffer_uchar + %22 = OpLabel + %23 = OpPtrAccessChain %_ptr_StorageBuffer_uchar %21 %uint_2 + OpStore %23 %uchar_0 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.invalid.asm.comp new file mode 100644 index 000000000..8dd687ca9 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.invalid.asm.comp @@ -0,0 +1,45 @@ +; SPIR-V +; Version: 1.6 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 24 +; Schema: 0 + OpCapability VariablePointersStorageBuffer + OpCapability Int8 + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %1 "main" %2 + OpExecutionMode %1 LocalSize 16 1 1 + OpDecorate %2 DescriptorSet 0 + OpDecorate %2 Binding 0 + OpDecorate %_struct_3 Block + OpMemberDecorate %_struct_3 0 Offset 0 + OpDecorate %uchar_array ArrayStride 1 + %void = OpTypeVoid + %uint = OpTypeInt 32 0 + %uint_0 = OpConstant %uint 0 + %uint_1 = OpConstant %uint 1 + %uint_2 = OpConstant %uint 2 + %uint_3 = OpConstant %uint 3 + %uint_16 = OpConstant %uint 16 + %uchar = OpTypeInt 8 0 + %uchar_0 = OpConstant %uchar 0 +%uchar_array = OpTypeArray %uchar %uint_16 + %_struct_3 = OpTypeStruct %uchar_array +%_ptr_StorageBuffer_uchar = OpTypePointer StorageBuffer %uchar +%_ptr_StorageBuffer_uchar_array = OpTypePointer StorageBuffer %uchar_array +%_ptr_StorageBuffer__struct_3 = OpTypePointer StorageBuffer %_struct_3 + %2 = OpVariable %_ptr_StorageBuffer__struct_3 StorageBuffer + %15 = OpTypeFunction %void %_ptr_StorageBuffer_uchar_array + %16 = OpTypeFunction %void + %1 = OpFunction %void None %16 + %17 = OpLabel + %18 = OpAccessChain %_ptr_StorageBuffer_uchar_array %2 %uint_0 + %19 = OpFunctionCall %void %20 %18 + OpReturn + OpFunctionEnd + %20 = OpFunction %void None %15 + %21 = OpFunctionParameter %_ptr_StorageBuffer_uchar_array + %22 = OpLabel + %23 = OpAccessChain %_ptr_StorageBuffer_uchar %21 %uint_2 + OpStore %23 %uchar_0 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/asm/comp/workgroup-uint-to-uchar-alias-ptr-access-chain.asm.invalid.comp b/shaders-opencl-no-opt/asm/comp/workgroup-uint-to-uchar-alias-ptr-access-chain.asm.invalid.comp new file mode 100644 index 000000000..ed4e10446 --- /dev/null +++ b/shaders-opencl-no-opt/asm/comp/workgroup-uint-to-uchar-alias-ptr-access-chain.asm.invalid.comp @@ -0,0 +1,214 @@ +; SPIR-V +; Version: 1.0 +; Generator: Google Clspv; 0 +; Bound: 175 +; Schema: 0 + OpCapability Shader + OpCapability Int8 + OpCapability VariablePointers + OpExtension "SPV_KHR_storage_buffer_storage_class" + OpExtension "SPV_KHR_variable_pointers" + OpExtension "SPV_KHR_non_semantic_info" + %163 = OpExtInstImport "NonSemantic.ClspvReflection.5" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %32 "main" %gl_LocalInvocationID %gl_WorkGroupID + OpSource OpenCL_C 120 + %164 = OpString "main" + %165 = OpString " __kernel" + %167 = OpString "out_data" + %170 = OpString "pix_in_block" + OpDecorate %gl_LocalInvocationID BuiltIn LocalInvocationId + OpDecorate %gl_WorkGroupID BuiltIn WorkgroupId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + OpDecorate %_runtimearr_v4uint ArrayStride 16 + OpMemberDecorate %_struct_23 0 Offset 0 + OpDecorate %_struct_23 Block + OpMemberDecorate %_struct_26 0 Offset 0 + OpMemberDecorate %_struct_27 0 Offset 0 + OpDecorate %_struct_27 Block + OpDecorate %25 DescriptorSet 0 + OpDecorate %25 Binding 0 + OpDecorate %_arr_uint_uint_256 ArrayStride 4 + OpDecorate %_arr_uchar_uint_1024 ArrayStride 1 + OpDecorate %15 SpecId 0 + OpDecorate %16 SpecId 1 + OpDecorate %17 SpecId 2 + %uint = OpTypeInt 32 0 + %uint_256 = OpConstant %uint 256 +%_arr_uint_uint_256 = OpTypeArray %uint %uint_256 +%_ptr_Workgroup__arr_uint_uint_256 = OpTypePointer Workgroup %_arr_uint_uint_256 + %uchar = OpTypeInt 8 0 + %uint_1024 = OpConstant %uint 1024 +%_arr_uchar_uint_1024 = OpTypeArray %uchar %uint_1024 +%_ptr_Workgroup__arr_uchar_uint_1024 = OpTypePointer Workgroup %_arr_uchar_uint_1024 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %15 = OpSpecConstant %uint 1 + %16 = OpSpecConstant %uint 1 + %17 = OpSpecConstant %uint 1 +%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %15 %16 %17 +%_ptr_Private_v3uint = OpTypePointer Private %v3uint + %v4uint = OpTypeVector %uint 4 +%_runtimearr_v4uint = OpTypeRuntimeArray %v4uint + %_struct_23 = OpTypeStruct %_runtimearr_v4uint +%_ptr_StorageBuffer__struct_23 = OpTypePointer StorageBuffer %_struct_23 + %_struct_26 = OpTypeStruct %uint + %_struct_27 = OpTypeStruct %_struct_26 +%_ptr_PushConstant__struct_27 = OpTypePointer PushConstant %_struct_27 + %void = OpTypeVoid + %31 = OpTypeFunction %void +%_ptr_PushConstant__struct_26 = OpTypePointer PushConstant %_struct_26 + %uint_0 = OpConstant %uint 0 + %bool = OpTypeBool +%_ptr_Input_uint = OpTypePointer Input %uint + %uint_1 = OpConstant %uint 1 + %uint_255 = OpConstant %uint 255 +%_ptr_Workgroup_uint = OpTypePointer Workgroup %uint + %uint_2 = OpConstant %uint 2 + %uint_10 = OpConstant %uint 10 + %uint_1020 = OpConstant %uint 1020 + %v4uchar = OpTypeVector %uchar 4 +%_ptr_Workgroup_uchar = OpTypePointer Workgroup %uchar + %uint_3 = OpConstant %uint 3 + %uint_264 = OpConstant %uint 264 +%_ptr_StorageBuffer_v4uint = OpTypePointer StorageBuffer %v4uint + %137 = OpUndef %v4uchar + %uint_4 = OpConstant %uint 4 + %5 = OpVariable %_ptr_Workgroup__arr_uint_uint_256 Workgroup + %10 = OpVariable %_ptr_Workgroup__arr_uchar_uint_1024 Workgroup +%gl_LocalInvocationID = OpVariable %_ptr_Input_v3uint Input +%gl_WorkGroupID = OpVariable %_ptr_Input_v3uint Input + %20 = OpVariable %_ptr_Private_v3uint Private %gl_WorkGroupSize + %25 = OpVariable %_ptr_StorageBuffer__struct_23 StorageBuffer + %29 = OpVariable %_ptr_PushConstant__struct_27 PushConstant + %32 = OpFunction %void None %31 + %33 = OpLabel + %36 = OpAccessChain %_ptr_PushConstant__struct_26 %29 %uint_0 + %37 = OpLoad %_struct_26 %36 + %38 = OpCompositeExtract %uint %37 0 + %40 = OpINotEqual %bool %38 %uint_0 + OpSelectionMerge %105 None + OpBranchConditional %40 %43 %105 + %43 = OpLabel + %45 = OpAccessChain %_ptr_Input_uint %gl_WorkGroupID %uint_0 + %46 = OpLoad %uint %45 + %48 = OpAccessChain %_ptr_Input_uint %gl_WorkGroupID %uint_1 + %49 = OpLoad %uint %48 + %50 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_0 + %51 = OpLoad %uint %50 + %52 = OpAccessChain %_ptr_Input_uint %gl_LocalInvocationID %uint_1 + %53 = OpLoad %uint %52 + %54 = OpIMul %uint %53 %51 + %55 = OpUDiv %uint %54 %49 + %57 = OpUMod %uint %55 %uint_255 + %58 = OpUConvert %uchar %57 + OpBranch %60 + %60 = OpLabel + %61 = OpPhi %uint %100 %99 %uint_0 %43 + %62 = OpIMul %uint %61 %38 + OpLoopMerge %103 %99 None + OpBranch %65 + %65 = OpLabel + %66 = OpPhi %uint %93 %65 %uint_0 %60 + %67 = OpIAdd %uint %66 %62 + %68 = OpIMul %uint %66 %61 + %69 = OpIAdd %uint %46 %68 + %71 = OpAccessChain %_ptr_Workgroup_uint %5 %67 + OpStore %71 %69 + %72 = OpIAdd %uint %49 %68 + %74 = OpShiftLeftLogical %uint %67 %uint_2 + %76 = OpShiftRightLogical %uint %74 %uint_10 + %78 = OpBitwiseAnd %uint %74 %uint_1020 + %80 = OpBitcast %v4uchar %72 + %81 = OpCompositeExtract %uchar %80 1 + %82 = OpCompositeExtract %uchar %80 2 + %83 = OpCompositeExtract %uchar %80 3 + %85 = OpPtrAccessChain %_ptr_Workgroup_uchar %10 %76 %78 + %86 = OpBitwiseOr %uint %78 %uint_1 + %87 = OpPtrAccessChain %_ptr_Workgroup_uchar %10 %76 %86 + OpStore %87 %81 + %88 = OpBitwiseOr %uint %78 %uint_2 + %89 = OpPtrAccessChain %_ptr_Workgroup_uchar %10 %76 %88 + OpStore %89 %82 + %91 = OpBitwiseOr %uint %78 %uint_3 + %92 = OpPtrAccessChain %_ptr_Workgroup_uchar %10 %76 %91 + OpStore %92 %83 + OpStore %85 %58 + %93 = OpIAdd %uint %66 %uint_1 + %94 = OpUGreaterThanEqual %bool %93 %38 + OpLoopMerge %97 %65 None + OpBranchConditional %94 %97 %65 + %97 = OpLabel + OpBranch %99 + %99 = OpLabel + %100 = OpIAdd %uint %61 %uint_1 + %101 = OpUGreaterThanEqual %bool %100 %38 + OpBranchConditional %101 %103 %60 + %103 = OpLabel + OpBranch %105 + %105 = OpLabel + OpBranch %107 + %107 = OpLabel + OpControlBarrier %uint_2 %uint_2 %uint_264 + OpSelectionMerge %162 None + OpBranchConditional %40 %111 %162 + %111 = OpLabel + %112 = OpPhi %uint %157 %156 %uint_0 %107 + %113 = OpIMul %uint %112 %38 + OpLoopMerge %160 %156 None + OpBranch %116 + %116 = OpLabel + %117 = OpPhi %uint %150 %116 %uint_0 %111 + %118 = OpIAdd %uint %117 %113 + %120 = OpAccessChain %_ptr_StorageBuffer_v4uint %25 %uint_0 %118 + %121 = OpAccessChain %_ptr_Workgroup_uint %5 %118 + %122 = OpLoad %uint %121 + %123 = OpShiftLeftLogical %uint %118 %uint_2 + %124 = OpShiftRightLogical %uint %123 %uint_10 + %125 = OpBitwiseAnd %uint %123 %uint_1020 + %126 = OpPtrAccessChain %_ptr_Workgroup_uchar %10 %124 %125 + %127 = OpLoad %uchar %126 + %128 = OpBitwiseOr %uint %125 %uint_1 + %129 = OpPtrAccessChain %_ptr_Workgroup_uchar %10 %124 %128 + %130 = OpLoad %uchar %129 + %131 = OpBitwiseOr %uint %125 %uint_2 + %132 = OpPtrAccessChain %_ptr_Workgroup_uchar %10 %124 %131 + %133 = OpLoad %uchar %132 + %134 = OpBitwiseOr %uint %125 %uint_3 + %135 = OpPtrAccessChain %_ptr_Workgroup_uchar %10 %124 %134 + %136 = OpLoad %uchar %135 + %138 = OpCompositeInsert %v4uchar %127 %137 0 + %139 = OpCompositeInsert %v4uchar %130 %138 1 + %140 = OpCompositeInsert %v4uchar %133 %139 2 + %141 = OpCompositeInsert %v4uchar %136 %140 3 + %142 = OpBitcast %uint %141 + %143 = OpIAdd %uint %122 %142 + %144 = OpLoad %v4uint %120 + %145 = OpCompositeInsert %v4uint %143 %144 0 + %146 = OpShiftRightLogical %uint %143 %uint_2 + %147 = OpCompositeInsert %v4uint %146 %145 1 + %148 = OpShiftRightLogical %uint %143 %uint_3 + %149 = OpCompositeInsert %v4uint %148 %147 3 + OpStore %120 %149 + %150 = OpIAdd %uint %117 %uint_1 + %151 = OpUGreaterThanEqual %bool %150 %38 + OpLoopMerge %154 %116 None + OpBranchConditional %151 %154 %116 + %154 = OpLabel + OpBranch %156 + %156 = OpLabel + %157 = OpIAdd %uint %112 %uint_1 + %158 = OpUGreaterThanEqual %bool %157 %38 + OpBranchConditional %158 %160 %111 + %160 = OpLabel + OpBranch %162 + %162 = OpLabel + OpControlBarrier %uint_2 %uint_2 %uint_264 + OpReturn + OpFunctionEnd + %166 = OpExtInst %void %163 Kernel %32 %164 %uint_2 %uint_0 %165 + %168 = OpExtInst %void %163 ArgumentInfo %167 + %169 = OpExtInst %void %163 ArgumentStorageBuffer %166 %uint_0 %uint_0 %uint_0 %168 + %171 = OpExtInst %void %163 ArgumentInfo %170 + %173 = OpExtInst %void %163 ArgumentPodPushConstant %166 %uint_1 %uint_0 %uint_4 %171 + %174 = OpExtInst %void %163 SpecConstantWorkgroupSize %uint_0 %uint_1 %uint_2 diff --git a/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp b/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp new file mode 100644 index 000000000..f5f05a1ae --- /dev/null +++ b/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp @@ -0,0 +1,9 @@ +#version 450 +#extension GL_EXT_nonuniform_qualifier : require + +layout (binding = 0) readonly buffer A {float data_a[];} a[]; +layout (binding = 0) writeonly buffer D {float data_d[];} d[]; + +void main() { + d[gl_WorkGroupID.x].data_d[0] = a[gl_WorkGroupID.x].data_a[0]; +} diff --git a/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp b/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp new file mode 100644 index 000000000..081c39626 --- /dev/null +++ b/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp @@ -0,0 +1,18 @@ +#version 450 +layout(local_size_x = 8) in; + +shared float shared_group[8][8]; +shared float shared_group_alt[8][8]; + +void main() +{ + float blob[8]; + for (int i = 0; i < 8; i++) + blob[i] = float(i); + shared_group[gl_LocalInvocationIndex] = blob; + + barrier(); + + float copied_blob[8] = shared_group[gl_LocalInvocationIndex ^ 1u]; + shared_group_alt[gl_LocalInvocationIndex] = shared_group[gl_LocalInvocationIndex]; +} diff --git a/shaders-opencl-no-opt/comp/atomic-cmpxchg-packed-vector.invalid.comp b/shaders-opencl-no-opt/comp/atomic-cmpxchg-packed-vector.invalid.comp new file mode 100644 index 000000000..f54fc5f52 --- /dev/null +++ b/shaders-opencl-no-opt/comp/atomic-cmpxchg-packed-vector.invalid.comp @@ -0,0 +1,17 @@ +#version 460 +#extension GL_EXT_nonuniform_qualifier : enable +#extension GL_EXT_scalar_block_layout : require + +layout(scalar, binding=1) restrict buffer AttData0 { + uvec3 att0[]; +}; + +void main() { + uint newVal = 432; + uint prevVal = 0; + uint curVal = 0; + + while ( (curVal = atomicCompSwap(att0[0].x, prevVal, newVal)) != prevVal) + { + } +}; diff --git a/shaders-opencl-no-opt/comp/basic.invalid.comp b/shaders-opencl-no-opt/comp/basic.invalid.comp new file mode 100644 index 000000000..abf100f49 --- /dev/null +++ b/shaders-opencl-no-opt/comp/basic.invalid.comp @@ -0,0 +1,27 @@ +#version 450 +layout(local_size_x = 3, local_size_y = 3, local_size_z = 2) in; + +layout(set = 0, binding = 0) uniform Foo +{ + int a; + int b; +}; + +layout(set = 0, binding = 1) uniform Bar +{ + int c; + int d; +}; + +layout(set = 1, binding = 2) buffer Baz +{ + int e; + int f; +} baz[3 * 3 * 2]; + +void main() +{ + uvec3 coords = gl_GlobalInvocationID; + baz[coords.x + coords.y + coords.z].e = a + c; + baz[coords.x + coords.y + coords.z].f = b * d; +} diff --git a/shaders-opencl-no-opt/comp/bda-atomics.invalid.comp b/shaders-opencl-no-opt/comp/bda-atomics.invalid.comp new file mode 100644 index 000000000..eb7ae42fa --- /dev/null +++ b/shaders-opencl-no-opt/comp/bda-atomics.invalid.comp @@ -0,0 +1,34 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference_uvec2 : require + +layout(local_size_x = 1) in; + +layout(buffer_reference) buffer Ptr +{ + uint i; + uvec2 i2; +}; + +layout(push_constant, std430) uniform Registers +{ + Ptr ptr; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + Ptr ptr_ubo; +}; + +layout(set = 0, binding = 1) readonly buffer SSBO +{ + Ptr ptr_ssbo; +}; + +void main() +{ + atomicAdd(ptr.i, 10u); + atomicAdd(ptr_ubo.i, 11u); + atomicAdd(ptr_ssbo.i, 12u); + atomicAdd(Ptr(ptr.i2).i, 13u); +} diff --git a/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.invalid.comp b/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.invalid.comp new file mode 100644 index 000000000..82f069249 --- /dev/null +++ b/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.invalid.comp @@ -0,0 +1,20 @@ +#version 460 +#extension GL_EXT_buffer_reference_uvec2 : require + +layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; + +layout(buffer_reference) buffer SSBO +{ + float data[]; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + SSBO ptrs[2]; +}; + +void main() +{ + SSBO s0 = ptrs[0]; + s0.data[gl_GlobalInvocationID.x] += 1.0; +} diff --git a/shaders-opencl-no-opt/comp/bda-nonwritable-glslang-workaround.comp b/shaders-opencl-no-opt/comp/bda-nonwritable-glslang-workaround.comp new file mode 100644 index 000000000..61a6585fb --- /dev/null +++ b/shaders-opencl-no-opt/comp/bda-nonwritable-glslang-workaround.comp @@ -0,0 +1,22 @@ +#version 460 +#extension GL_EXT_buffer_reference_uvec2 : require + +layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in; + + +layout(push_constant) uniform Registers +{ + uvec2 bda; +}; + +// glslang emits NonWritable on the member, but forgets to actually validate that, +// meaning we cannot trust NonWritable on BDA. +layout(buffer_reference) readonly buffer SSBO +{ + float data[]; +}; + +void main() +{ + SSBO(bda).data[gl_GlobalInvocationID.x] = 0.0; +} diff --git a/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.invalid.comp b/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.invalid.comp new file mode 100644 index 000000000..8f1d97861 --- /dev/null +++ b/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.invalid.comp @@ -0,0 +1,18 @@ +#version 450 +#extension GL_EXT_buffer_reference : require + +layout(buffer_reference) buffer Ref +{ + vec4 v; +}; + +layout(push_constant) uniform Registers +{ + Ref foo; +}; + +void main() +{ + restrict Ref ref = foo; + ref.v = vec4(1.0); +} diff --git a/shaders-opencl-no-opt/comp/bitcast-16bit-1.invalid.comp b/shaders-opencl-no-opt/comp/bitcast-16bit-1.invalid.comp new file mode 100644 index 000000000..0c21cda30 --- /dev/null +++ b/shaders-opencl-no-opt/comp/bitcast-16bit-1.invalid.comp @@ -0,0 +1,23 @@ +#version 450 core +#extension GL_AMD_gpu_shader_half_float : require +#extension GL_AMD_gpu_shader_int16 : require +layout(local_size_x = 1) in; + +layout(binding = 0, std430) buffer SSBO0 +{ + i16vec4 inputs[]; +}; + +layout(binding = 1, std430) buffer SSBO1 +{ + ivec4 outputs[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + f16vec2 a = int16BitsToFloat16(inputs[ident].xy); + outputs[ident].x = int(packFloat2x16(a + f16vec2(1, 1))); + outputs[ident].y = packInt2x16(inputs[ident].zw); + outputs[ident].z = int(packUint2x16(u16vec2(inputs[ident].xy))); +} diff --git a/shaders-opencl-no-opt/comp/bitcast-16bit-2.invalid.comp b/shaders-opencl-no-opt/comp/bitcast-16bit-2.invalid.comp new file mode 100644 index 000000000..6bb662412 --- /dev/null +++ b/shaders-opencl-no-opt/comp/bitcast-16bit-2.invalid.comp @@ -0,0 +1,26 @@ +#version 450 core +#extension GL_AMD_gpu_shader_half_float : require +#extension GL_AMD_gpu_shader_int16 : require +layout(local_size_x = 1) in; + +layout(binding = 0, std430) buffer SSBO0 +{ + ivec4 inputs[]; +}; + +layout(binding = 1, std430) buffer SSBO1 +{ + i16vec4 outputs[]; +}; + +layout(binding = 2) uniform UBO +{ + f16vec4 const0; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + outputs[ident].xy = unpackInt2x16(inputs[ident].x) + float16BitsToInt16(const0.xy); + outputs[ident].zw = i16vec2(unpackUint2x16(uint(inputs[ident].y)) - float16BitsToUint16(const0.zw)); +} diff --git a/shaders-opencl-no-opt/comp/bitfield.comp b/shaders-opencl-no-opt/comp/bitfield.comp new file mode 100644 index 000000000..0cac0b257 --- /dev/null +++ b/shaders-opencl-no-opt/comp/bitfield.comp @@ -0,0 +1,23 @@ +#version 310 es + +void main() +{ + int signed_value = 0; + uint unsigned_value = 0u; + + int s = bitfieldExtract(signed_value, 5, 20); + uint u = bitfieldExtract(unsigned_value, 6, 21); + s = bitfieldInsert(s, 40, 5, 4); + u = bitfieldInsert(u, 60u, 5, 4); + + u = bitfieldReverse(u); + s = bitfieldReverse(s); + + int v0 = bitCount(u); + int v1 = bitCount(s); + + int v2 = findMSB(u); + int v3 = findMSB(s); + int v4 = findLSB(u); + int v5 = findLSB(s); +} diff --git a/shaders-opencl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.comp b/shaders-opencl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.comp new file mode 100644 index 000000000..56c11bbb7 --- /dev/null +++ b/shaders-opencl-no-opt/comp/buffer-device-address-from-pointer-complex-chain.comp @@ -0,0 +1,21 @@ +#version 460 + +#extension GL_EXT_buffer_reference: enable +#extension GL_EXT_buffer_reference_uvec2: enable + +struct S { + vec3 v; +}; + +layout(buffer_reference) buffer SSBO{ + S s[]; +}; + +layout(push_constant) uniform PC { + uvec2 ptr; +} pc; + +void main(){ + SSBO ssbo = SSBO(pc.ptr); + ssbo.s[0].v = vec3(1.0); +} diff --git a/shaders-opencl-no-opt/comp/extract-atomics-from-function.invalid.comp b/shaders-opencl-no-opt/comp/extract-atomics-from-function.invalid.comp new file mode 100644 index 000000000..ce730ba64 --- /dev/null +++ b/shaders-opencl-no-opt/comp/extract-atomics-from-function.invalid.comp @@ -0,0 +1,69 @@ +#version 460 + +#extension GL_KHR_memory_scope_semantics : enable + +layout(local_size_x = 64) in; + +shared uint var; + +void testAdd() +{ + atomicAdd(var, 1); +} + +void testMin() +{ + atomicMin(var, 2); +} + +void testMax() +{ + atomicMax(var, 3); +} + +void testAnd() +{ + atomicAnd(var, 4); +} + +void testOr() +{ + atomicOr(var, 5); +} + +void testXor() +{ + atomicXor(var, 6); +} + +void testExchange() +{ + atomicExchange(var, 7); +} + +void testCompSwap() +{ + atomicCompSwap(var, 8, 9); +} + +void testStore() +{ + atomicStore(var, 10u, gl_ScopeDevice, 0, gl_SemanticsRelaxed); +} + +void foo() +{ + testAdd(); + testMin(); + testMax(); + testOr(); + testXor(); + testExchange(); + testCompSwap(); + testStore(); +} + +void main() +{ + foo(); +} diff --git a/shaders-opencl-no-opt/comp/global-invocation-id-writable-ssbo-in-function.comp b/shaders-opencl-no-opt/comp/global-invocation-id-writable-ssbo-in-function.comp new file mode 100644 index 000000000..2fe074df7 --- /dev/null +++ b/shaders-opencl-no-opt/comp/global-invocation-id-writable-ssbo-in-function.comp @@ -0,0 +1,12 @@ +#version 450 +layout(set = 0, binding = 0) buffer myBlock { + int a; + float b[1]; +} myStorage; +float getB() { + return myStorage.b[gl_GlobalInvocationID.x]; +} +void main() { + myStorage.a = (myStorage.a + 1) % 256; + myStorage.b[gl_GlobalInvocationID.x] = mod((getB() + 0.02), 1.0); +} diff --git a/shaders-opencl-no-opt/comp/glsl.std450.comp b/shaders-opencl-no-opt/comp/glsl.std450.comp new file mode 100644 index 000000000..a17a82b82 --- /dev/null +++ b/shaders-opencl-no-opt/comp/glsl.std450.comp @@ -0,0 +1,129 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(binding = 0, std430) buffer SSBO +{ + float res; + int ires; + uint ures; + + vec4 f32; + ivec4 s32; + uvec4 u32; + + mat2 m2; + mat3 m3; + mat4 m4; +}; + +void main() +{ + float tmp; + vec2 v2; + vec3 v3; + vec4 v4; + int itmp; + + res = round(f32.x); + res = roundEven(f32.x); + res = trunc(f32.x); + res = abs(f32.x); + ires = abs(s32.x); + res = sign(f32.x); + ires = sign(s32.x); + res = floor(f32.x); + res = ceil(f32.x); + res = fract(f32.x); + res = radians(f32.x); + res = degrees(f32.x); + res = sin(f32.x); + res = cos(f32.x); + res = tan(f32.x); + res = asin(f32.x); + res = acos(f32.x); + res = atan(f32.x); + res = sinh(f32.x); + res = cosh(f32.x); + res = tanh(f32.x); + res = asinh(f32.x); + res = acosh(f32.x); + res = atanh(f32.x); + res = atan(f32.x, f32.y); + res = pow(f32.x, f32.y); + res = exp(f32.x); + res = log(f32.x); + res = exp2(f32.x); + res = log2(f32.x); + res = sqrt(f32.x); + res = inversesqrt(f32.x); + + res = length(f32.x); + res = distance(f32.x, f32.y); + res = normalize(f32.x); + res = faceforward(f32.x, f32.y, f32.z); + res = reflect(f32.x, f32.y); + res = refract(f32.x, f32.y, f32.z); + + res = length(f32.xy); + res = distance(f32.xy, f32.zw); + v2 = normalize(f32.xy); + v2 = faceforward(f32.xy, f32.yz, f32.zw); + v2 = reflect(f32.xy, f32.zw); + v2 = refract(f32.xy, f32.yz, f32.w); + + v3 = cross(f32.xyz, f32.yzw); + + res = determinant(m2); + res = determinant(m3); + res = determinant(m4); + m2 = inverse(m2); + m3 = inverse(m3); + m4 = inverse(m4); + + res = modf(f32.x, tmp); + // ModfStruct + + res = min(f32.x, f32.y); + ures = min(u32.x, u32.y); + ires = min(s32.x, s32.y); + res = max(f32.x, f32.y); + ures = max(u32.x, u32.y); + ires = max(s32.x, s32.y); + + res = clamp(f32.x, f32.y, f32.z); + ures = clamp(u32.x, u32.y, u32.z); + ires = clamp(s32.x, s32.y, s32.z); + + res = mix(f32.x, f32.y, f32.z); + res = step(f32.x, f32.y); + res = smoothstep(f32.x, f32.y, f32.z); + res = fma(f32.x, f32.y, f32.z); + + res = frexp(f32.x, itmp); + // FrexpStruct + res = ldexp(f32.x, itmp); + + ures = packSnorm4x8(f32); + ures = packUnorm4x8(f32); + ures = packSnorm2x16(f32.xy); + ures = packUnorm2x16(f32.xy); + ures = packHalf2x16(f32.xy); + // packDouble2x32 + + v2 = unpackSnorm2x16(u32.x); + v2 = unpackUnorm2x16(u32.x); + v2 = unpackHalf2x16(u32.x); + v4 = unpackSnorm4x8(u32.x); + v4 = unpackUnorm4x8(u32.x); + // unpackDouble2x32 + + s32 = findLSB(s32); + s32 = findLSB(u32); + s32 = findMSB(s32); + s32 = findMSB(u32); + + // interpolateAtSample + // interpolateAtOffset + + // NMin, NMax, NClamp +} diff --git a/shaders-opencl-no-opt/comp/illegal-struct-name.asm.comp b/shaders-opencl-no-opt/comp/illegal-struct-name.asm.comp new file mode 100644 index 000000000..f7a8787d3 --- /dev/null +++ b/shaders-opencl-no-opt/comp/illegal-struct-name.asm.comp @@ -0,0 +1,62 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 8 +; Bound: 31 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %main "main" + OpExecutionMode %main LocalSize 1 1 1 + OpSource GLSL 450 + OpName %main "main" + OpName %Foo "Foo" + OpMemberName %Foo 0 "abs" + OpName %f "f" + OpName %Foo_0 "Foo" + OpMemberName %Foo_0 0 "abs" + OpName %SSBO "SSBO" + OpMemberName %SSBO 0 "foo" + OpMemberName %SSBO 1 "foo2" + OpName %_ "" + OpName %linear "abs" + OpMemberDecorate %Foo_0 0 Offset 0 + OpMemberDecorate %SSBO 0 Offset 0 + OpMemberDecorate %SSBO 1 Offset 4 + OpDecorate %SSBO BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %Foo = OpTypeStruct %float +%_ptr_Function_Foo = OpTypePointer Function %Foo + %Foo_0 = OpTypeStruct %float + %SSBO = OpTypeStruct %Foo_0 %Foo_0 +%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO + %_ = OpVariable %_ptr_Uniform_SSBO Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0 +%_ptr_Function_float = OpTypePointer Function %float +%_ptr_Function_int = OpTypePointer Function %int + %int_10 = OpConstant %int 10 + %int_1 = OpConstant %int 1 +%_ptr_Uniform_float = OpTypePointer Uniform %float + %main = OpFunction %void None %3 + %5 = OpLabel + %f = OpVariable %_ptr_Function_Foo Function + %linear = OpVariable %_ptr_Function_int Function + %17 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_0 + %18 = OpLoad %Foo_0 %17 + %19 = OpCompositeExtract %float %18 0 + %21 = OpAccessChain %_ptr_Function_float %f %int_0 + OpStore %21 %19 + OpStore %linear %int_10 + %26 = OpLoad %Foo %f + %27 = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_1 + %28 = OpCompositeExtract %float %26 0 + %30 = OpAccessChain %_ptr_Uniform_float %27 %int_0 + OpStore %30 %28 + OpReturn + OpFunctionEnd diff --git a/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.invalid.comp b/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.invalid.comp new file mode 100644 index 000000000..a0ee95b3a --- /dev/null +++ b/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.invalid.comp @@ -0,0 +1,85 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(set = 0, binding = 0) buffer BUF0 +{ + f16vec2 f16s; + u16vec2 u16; + i16vec2 i16; + u16vec4 u16s; + i16vec4 i16s; + float16_t f16; +}; + +void test_i16() +{ + f16 += int16BitsToFloat16(i16.x + i16.y); + f16 += int16BitsToFloat16(i16.x - i16.y); + f16 += int16BitsToFloat16(i16.x * i16.y); + f16 += int16BitsToFloat16(i16.x / i16.y); + f16 += int16BitsToFloat16(i16.x % i16.y); + f16 += int16BitsToFloat16(i16.x << i16.y); + f16 += int16BitsToFloat16(i16.x >> i16.y); + f16 += int16BitsToFloat16(~i16.x); + f16 += int16BitsToFloat16(-i16.x); + f16 += int16BitsToFloat16(i16.x ^ i16.y); + f16 += int16BitsToFloat16(i16.x & i16.y); + f16 += int16BitsToFloat16(i16.x | i16.y); +} + +void test_u16() +{ + f16 += uint16BitsToFloat16(u16.x + u16.y); + f16 += uint16BitsToFloat16(u16.x - u16.y); + f16 += uint16BitsToFloat16(u16.x * u16.y); + f16 += uint16BitsToFloat16(u16.x / u16.y); + f16 += uint16BitsToFloat16(u16.x % u16.y); + f16 += uint16BitsToFloat16(u16.x << u16.y); + f16 += uint16BitsToFloat16(u16.x >> u16.y); + f16 += uint16BitsToFloat16(~u16.x); + f16 += uint16BitsToFloat16(-u16.x); + f16 += uint16BitsToFloat16(u16.x ^ u16.y); + f16 += uint16BitsToFloat16(u16.x & u16.y); + f16 += uint16BitsToFloat16(u16.x | u16.y); +} + +void test_u16s() +{ + f16s += uint16BitsToFloat16(u16s.xy + u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy - u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy * u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy / u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy % u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy << u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy >> u16s.zw); + f16s += uint16BitsToFloat16(~u16s.xy); + f16s += uint16BitsToFloat16(-u16s.xy); + f16s += uint16BitsToFloat16(u16s.xy ^ u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy & u16s.zw); + f16s += uint16BitsToFloat16(u16s.xy | u16s.zw); +} + +void test_i16s() +{ + f16s += int16BitsToFloat16(i16s.xy + i16s.zw); + f16s += int16BitsToFloat16(i16s.xy - i16s.zw); + f16s += int16BitsToFloat16(i16s.xy * i16s.zw); + f16s += int16BitsToFloat16(i16s.xy / i16s.zw); + f16s += int16BitsToFloat16(i16s.xy % i16s.zw); + f16s += int16BitsToFloat16(i16s.xy << i16s.zw); + f16s += int16BitsToFloat16(i16s.xy >> i16s.zw); + f16s += int16BitsToFloat16(~i16s.xy); + f16s += int16BitsToFloat16(-i16s.xy); + f16s += int16BitsToFloat16(i16s.xy ^ i16s.zw); + f16s += int16BitsToFloat16(i16s.xy & i16s.zw); + f16s += int16BitsToFloat16(i16s.xy | i16s.zw); +} + +void main() +{ + test_u16(); + test_i16(); + test_u16s(); + test_i16s(); +} diff --git a/shaders-opencl-no-opt/comp/int16min-literal.fp16.invalid.comp b/shaders-opencl-no-opt/comp/int16min-literal.fp16.invalid.comp new file mode 100644 index 000000000..c1b345266 --- /dev/null +++ b/shaders-opencl-no-opt/comp/int16min-literal.fp16.invalid.comp @@ -0,0 +1,22 @@ +#version 450 +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require + +layout(local_size_x = 1) in; + +layout(set = 0, binding = 1) buffer SSBO +{ + float16_t a; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + float16_t b; +}; + +void main() +{ + int16_t v = float16BitsToInt16(b); + v ^= 0x8000s; + a = int16BitsToFloat16(v); +} diff --git a/shaders-opencl-no-opt/comp/int64.invalid.comp b/shaders-opencl-no-opt/comp/int64.invalid.comp new file mode 100644 index 000000000..965bed4ae --- /dev/null +++ b/shaders-opencl-no-opt/comp/int64.invalid.comp @@ -0,0 +1,65 @@ +#version 450 +#extension GL_ARB_gpu_shader_int64 : require +layout(local_size_x = 1) in; + +struct M0 +{ + int64_t v; + i64vec2 b[2]; + uint64_t c; + uint64_t d[5]; +}; + +struct SSBO0_Type +{ + i64vec4 a; + M0 m0; +}; + +struct SSBO1_Type +{ + u64vec4 b; + M0 m0; +}; + +struct SSBO2_Type +{ + int64_t a[4]; + i64vec2 b[4]; +}; + +struct SSBO3_Type +{ + int64_t a[4]; + i64vec2 b[4]; +}; + +layout(set = 0, binding = 0, std430) buffer SSBO +{ + int s32; + uint u32; +}; + +void main() +{ + SSBO0_Type ssbo_0; + SSBO1_Type ssbo_1; + SSBO2_Type ssbo_2; + SSBO3_Type ssbo_3; + + ssbo_0.a += i64vec4(10, 20, 30, 40); + ssbo_1.b += u64vec4(999999999999999999ul, 8888888888888888ul, 77777777777777777ul, 6666666666666666ul); + ssbo_0.a += 20; + ssbo_0.a = abs(ssbo_0.a + i64vec4(ssbo_1.b)); + + ssbo_0.a++; + ssbo_1.b++; + ssbo_0.a--; + ssbo_1.b--; + + ssbo_2.a[0] += 1l; + ssbo_3.a[0] += 2l; + + s32 = int(ssbo_0.a.x + ssbo_1.b.y + ssbo_2.a[1] + ssbo_3.a[2]); + u32 = uint(ssbo_0.a.y + ssbo_1.b.z + ssbo_2.a[0] + ssbo_3.a[1]); +} diff --git a/shaders-opencl-no-opt/comp/int64min-literal.comp b/shaders-opencl-no-opt/comp/int64min-literal.comp new file mode 100644 index 000000000..792960544 --- /dev/null +++ b/shaders-opencl-no-opt/comp/int64min-literal.comp @@ -0,0 +1,21 @@ +#version 450 +#extension GL_ARB_gpu_shader_int64 : require + +layout(local_size_x = 1) in; + +layout(set = 0, binding = 1) buffer SSBO +{ + float a; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + float b; +}; + +void main() +{ + int64_t v = int64_t(floatBitsToInt(b)); + v ^= 0x8000000000000000L; + a = intBitsToFloat(int(v)); +} diff --git a/shaders-opencl-no-opt/comp/integer-dot-product.comp b/shaders-opencl-no-opt/comp/integer-dot-product.comp new file mode 100644 index 000000000..8b6630922 --- /dev/null +++ b/shaders-opencl-no-opt/comp/integer-dot-product.comp @@ -0,0 +1,114 @@ +#version 450 +#extension GL_EXT_shader_8bit_storage : require +#extension GL_EXT_shader_16bit_storage : require +#extension GL_EXT_shader_explicit_arithmetic_types : require +#extension GL_EXT_spirv_intrinsics : require + +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer InOut { + uvec4 x; + uvec4 y; + int result; +} comp; + +layout(std430, binding = 1) buffer InOut2 { + uint x; + uint y; + uint result; +} comp2; + +layout(std430, binding = 1) buffer InOut3 { + u16vec4 x; + u16vec4 y; + int acc; + int result; +} comp3; + +// Signed integer dot with unsigned integer +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4450) +int sdot_int_result(u16vec4 x, u16vec4 y); +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4450) +uint sdot_uint_result(u16vec4 x, u16vec4 y); + +// Unsigned integer dot with signed integer. Only unsigned result is allowed in SPIR-V. +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4451) +uint udot_uint_result(u16vec4 x, u16vec4 y); + +// Mixed integer dot with unsigned integer +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4452) +int sudot_int_result(u16vec4 x, u16vec4 y); +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4452) +uint sudot_uint_result(u16vec4 x, u16vec4 y); + +// Signed packed dot product with different output widths. +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4450) +uint8_t spdot_to_8(uint x, uint y, spirv_literal uint packedFormat); +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4450) +uint16_t spdot_to_16(uint x, uint y, spirv_literal uint packedFormat); +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4450) +uint spdot_to_32(uint x, uint y, spirv_literal uint packedFormat); +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4450) +int spdot_to_i32(uint x, uint y, spirv_literal uint packedFormat); + +// Unsigned packed dot product with different output widths. +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4451) +uint8_t updot_to_8(uint x, uint y, spirv_literal uint packedFormat); +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4451) +uint16_t updot_to_16(uint x, uint y, spirv_literal uint packedFormat); +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4451) +uint updot_to_32(uint x, uint y, spirv_literal uint packedFormat); + +// Mixed packed dot product with different output widths. +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4452) +uint8_t supdot_to_8(uint x, uint y, spirv_literal uint packedFormat); +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4452) +uint16_t supdot_to_16(uint x, uint y, spirv_literal uint packedFormat); +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4452) +uint supdot_to_32(uint x, uint y, spirv_literal uint packedFormat); +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4452) +int supdot_to_i32(uint x, uint y, spirv_literal uint packedFormat); + +// SDotAccSat with unsigned input and result type +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4453) +int sdotaddsat_int_result(u16vec4 x, u16vec4 y, int acc); +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4453) +uint sdotaddsat_uint_result(u16vec4 x, u16vec4 y, int acc); + +// UDotAccSat. Result type must be unsigned in SPIR-V. +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4454) +uint udotaddsat(u16vec4 x, u16vec4 y, int acc); + +// SUDotAccSat +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4455) +int sudotaddsat_int_result(u16vec4 x, u16vec4 y, int acc); +spirv_instruction (extensions = ["SPV_KHR_integer_dot_product"], capabilities = [6019], id = 4455) +uint sudotaddsat_uint_result(u16vec4 x, u16vec4 y, int acc); + +void main() { + int sdot_int = sdot_int_result(comp3.x, comp3.y); + uint sdot_uint = sdot_uint_result(comp3.x, comp3.y); + uint udot_uint = udot_uint_result(comp3.x, comp3.y); + int sudot_int = sudot_int_result(comp3.x, comp3.y); + uint sudot_uint = sudot_uint_result(comp3.x, comp3.y); + + uint8_t spdot8 = spdot_to_8(comp2.x, comp2.y, 0x0); // PackedVectorFormat4x8Bit + uint16_t spdot16 = spdot_to_16(comp2.x, comp2.y, 0x0); // PackedVectorFormat4x8Bit + uint spdot32 = spdot_to_32(comp2.x, comp2.y, 0x0); // PackedVectorFormat4x8Bit + int spdoti32 = spdot_to_i32(comp2.x, comp2.y, 0x0); // PackedVectorFormat4x8Bit + + uint8_t updot8 = updot_to_8(comp2.x, comp2.y, 0x0); // PackedVectorFormat4x8Bit + uint16_t updot16 = updot_to_16(comp2.x, comp2.y, 0x0); // PackedVectorFormat4x8Bit + uint updot32 = updot_to_32(comp2.x, comp2.y, 0x0); // PackedVectorFormat4x8Bit + + uint8_t supdot8 = supdot_to_8(comp2.x, comp2.y, 0x0); // PackedVectorFormat4x8Bit + uint16_t supdot16 = supdot_to_16(comp2.x, comp2.y, 0x0); // PackedVectorFormat4x8Bit + uint supdot32 = supdot_to_32(comp2.x, comp2.y, 0x0); // PackedVectorFormat4x8Bit + int supdoti32 = supdot_to_i32(comp2.x, comp2.y, 0x0); // PackedVectorFormat4x8Bit + + int sdotaddsat_int = sdotaddsat_int_result(comp3.x, comp3.y, comp3.acc); + uint sdotaddsat_uint = sdotaddsat_uint_result(comp3.x, comp3.y, comp3.acc); + uint udotaddsat_uint = udotaddsat(comp3.x, comp3.y, comp3.acc); + int sudotaddsat_int = sudotaddsat_int_result(comp3.x, comp3.y, comp3.acc); + uint sudotaddsat_uint = sudotaddsat_uint_result(comp3.x, comp3.y, comp3.acc); +} diff --git a/shaders-opencl-no-opt/comp/intmin-literal.comp b/shaders-opencl-no-opt/comp/intmin-literal.comp new file mode 100644 index 000000000..ee35cedab --- /dev/null +++ b/shaders-opencl-no-opt/comp/intmin-literal.comp @@ -0,0 +1,18 @@ +#version 450 + +layout(local_size_x = 1) in; + +layout(set = 0, binding = 1) buffer SSBO +{ + float a; +}; + +layout(set = 0, binding = 0) uniform UBO +{ + float b; +}; + +void main() +{ + a = intBitsToFloat(floatBitsToInt(b) ^ 0x80000000); +} diff --git a/shaders-opencl-no-opt/comp/loop.comp b/shaders-opencl-no-opt/comp/loop.comp new file mode 100644 index 000000000..6d6c32424 --- /dev/null +++ b/shaders-opencl-no-opt/comp/loop.comp @@ -0,0 +1,98 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 0) readonly buffer SSBO +{ + mat4 mvp; + vec4 in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + vec4 idat = in_data[ident]; + + int k = 0; + uint i = 0u; + + if (idat.y == 20.0) + { + do + { + k = k * 2; + i++; + } while (i < ident); + } + + switch (k) + { + case 10: + for (;;) + { + i++; + if (i > 10u) + break; + } + break; + + default: + for (;;) + { + i += 2u; + if (i > 20u) + break; + } + break; + } + + while (k < 10) + { + idat *= 2.0; + k++; + } + + for (uint i = 0u; i < 16u; i++, k++) + for (uint j = 0u; j < 30u; j++) + idat = mvp * idat; + + k = 0; + for (;;) + { + k++; + if (k > 10) + { + k += 2; + } + else + { + k += 3; + continue; + } + + k += 10; + } + + k = 0; + do + { + k++; + } while (k > 10); + + int l = 0; + for (;; l++) + { + if (l == 5) + { + continue; + } + + idat += 1.0; + } + out_data[ident] = idat; +} + diff --git a/shaders-opencl-no-opt/comp/read-only-coherent-image.invalid.comp b/shaders-opencl-no-opt/comp/read-only-coherent-image.invalid.comp new file mode 100644 index 000000000..39d4c38a8 --- /dev/null +++ b/shaders-opencl-no-opt/comp/read-only-coherent-image.invalid.comp @@ -0,0 +1,17 @@ +#version 450 +#extension GL_EXT_buffer_reference : require +layout(local_size_x = 1) in; + +layout(set = 0, binding = 1, r32ui) volatile uniform uimage2D img; + +layout(set = 0, binding = 0) buffer SSBO +{ + uint val; +}; + +void main() +{ + //imageAtomicAdd(img, ivec2(10), 40); + val = imageLoad(img, ivec2(10)).x; +} + diff --git a/shaders-opencl-no-opt/comp/return.comp b/shaders-opencl-no-opt/comp/return.comp new file mode 100644 index 000000000..617f43718 --- /dev/null +++ b/shaders-opencl-no-opt/comp/return.comp @@ -0,0 +1,33 @@ +#version 310 es +layout(local_size_x = 1) in; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + vec4 out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + + if (ident == 2u) + { + out_data[ident] = vec4(20.0); + } + else if (ident == 4u) + { + out_data[ident] = vec4(10.0); + return; + } + + for (int i = 0; i < 20; i++) + { + if (i == 10) + break; + + return; + } + + out_data[ident] = vec4(10.0); +} + diff --git a/shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp b/shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp new file mode 100644 index 000000000..0db56342c --- /dev/null +++ b/shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp @@ -0,0 +1,13 @@ +#version 450 +#extension GL_EXT_nonuniform_qualifier : require +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0) buffer SSBO +{ + vec4 a; +} ssbos[]; + +void main() +{ + ssbos[gl_WorkGroupID.x].a += 2.0; +} diff --git a/shaders-opencl-no-opt/comp/std140-array-load-composite-construct.comp b/shaders-opencl-no-opt/comp/std140-array-load-composite-construct.comp new file mode 100644 index 000000000..af1c47b32 --- /dev/null +++ b/shaders-opencl-no-opt/comp/std140-array-load-composite-construct.comp @@ -0,0 +1,13 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(std140, binding = 0) buffer SSBO +{ + float a[16]; + vec4 b[16]; +}; + +void main() +{ + b[gl_GlobalInvocationID.x] = vec4(a[gl_GlobalInvocationID.x]); +} diff --git a/shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp new file mode 100644 index 000000000..47d88912f --- /dev/null +++ b/shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp @@ -0,0 +1,100 @@ +#version 310 es +#extension GL_EXT_scalar_block_layout : require + +layout(local_size_x = 1) in; + +struct S0 +{ + vec2 a[1]; + float b; +}; + +struct S1 +{ + vec3 a; + float b; +}; + +struct S2 +{ + vec3 a[1]; + float b; +}; + +struct S3 +{ + vec2 a; + float b; +}; + +struct S4 +{ + vec2 c; +}; + +struct Content +{ + S0 m0s[1]; + S1 m1s[1]; + S2 m2s[1]; + S0 m0; + S1 m1; + S2 m2; + S3 m3; + float m4; + + // glslang seems to miscompile this atm into ArrayStride of 16 even in scalar layout. + //S4 m3s[8]; +}; + +layout(binding = 2, scalar) restrict buffer SSBO2 +{ + float m0; + mat2 m1; + layout(row_major) mat3x2 m2; +} ssbo_scalar2; + +layout(binding = 1, scalar) restrict buffer SSBO1 +{ + Content content; + Content content1[2]; + Content content2; + + layout(column_major) mat2 m0; + layout(column_major) mat2 m1; + layout(column_major) mat2x3 m2[4]; + layout(column_major) mat3x2 m3; + layout(row_major) mat2 m4; + layout(row_major) mat2 m5[9]; + layout(row_major) mat2x3 m6[4][2]; + layout(row_major) mat3x2 m7; + float array[]; +} ssbo_scalar; + +layout(binding = 0, std140) restrict buffer SSBO0 +{ + Content content; + Content content1[2]; + Content content2; + + layout(column_major) mat2 m0; + layout(column_major) mat2 m1; + layout(column_major) mat2x3 m2[4]; + layout(column_major) mat3x2 m3; + layout(row_major) mat2 m4; + layout(row_major) mat2 m5[9]; + layout(row_major) mat2x3 m6[4][2]; + layout(row_major) mat3x2 m7; + + float array[]; +} ssbo_140; + +void main() +{ + ssbo_scalar.content = ssbo_140.content; + ssbo_scalar.content.m1.a = ssbo_scalar.m2[1] * ssbo_scalar.content.m0.a[0]; // test packed matrix access + ssbo_scalar.m0 = ssbo_scalar2.m1; + ssbo_scalar2.m1 = ssbo_scalar.m4; + ssbo_scalar2.m2 = ssbo_scalar.m3; +} + diff --git a/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp b/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp new file mode 100644 index 000000000..8a0be2269 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp @@ -0,0 +1,25 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +// Reduced test for emulated functionality. + +void main() +{ + // basic + FragColor = float(gl_NumSubgroups); + FragColor = float(gl_SubgroupID); + FragColor = float(gl_SubgroupSize); + FragColor = float(gl_SubgroupInvocationID); + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + bool elected = subgroupElect(); +} diff --git a/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp b/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp new file mode 100644 index 000000000..c8172fd95 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp @@ -0,0 +1,211 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_shuffle : require +#extension GL_KHR_shader_subgroup_shuffle_relative : require +#extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_KHR_shader_subgroup_clustered : require +#extension GL_KHR_shader_subgroup_quad : require +#extension GL_KHR_shader_subgroup_rotate : require +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +void doClusteredRotate() +{ + uint rotated_clustered = subgroupClusteredRotate(20u, 4u, 8u); + bool rotated_clustered_bool = subgroupClusteredRotate(false, 4u, 8u); +} + +void main() +{ + // basic + FragColor = float(gl_NumSubgroups); + FragColor = float(gl_SubgroupID); + FragColor = float(gl_SubgroupSize); + FragColor = float(gl_SubgroupInvocationID); + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + bool elected = subgroupElect(); + + // ballot + FragColor = float(gl_SubgroupEqMask); + FragColor = float(gl_SubgroupGeMask); + FragColor = float(gl_SubgroupGtMask); + FragColor = float(gl_SubgroupLeMask); + FragColor = float(gl_SubgroupLtMask); + vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); + bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u); + vec3 first = subgroupBroadcastFirst(vec3(20.0)); + bvec4 first_bool = subgroupBroadcastFirst(bvec4(false)); + uvec4 ballot_value = subgroupBallot(true); + bool inverse_ballot_value = subgroupInverseBallot(ballot_value); + bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); + uint bit_count = subgroupBallotBitCount(ballot_value); + uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); + uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); + uint lsb = subgroupBallotFindLSB(ballot_value); + uint msb = subgroupBallotFindMSB(ballot_value); + + // shuffle + uint shuffled = subgroupShuffle(10u, 8u); + bool shuffled_bool = subgroupShuffle(true, 9u); + uint shuffled_xor = subgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); + + // shuffle relative + uint shuffled_up = subgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = subgroupShuffleUp(true, 4u); + uint shuffled_down = subgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = subgroupShuffleDown(false, 4u); + + // rotate + uint rotated = subgroupRotate(20u, 4u); + bool rotated_bool = subgroupRotate(false, 4u); + doClusteredRotate(); + + // vote + bool has_all = subgroupAll(true); + bool has_any = subgroupAny(true); + bool has_equal = subgroupAllEqual(0); + has_equal = subgroupAllEqual(true); + has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0)); + has_equal = subgroupAllEqual(bvec4(true, true, false, true)); + + // arithmetic + vec4 added = subgroupAdd(vec4(20.0)); + ivec4 iadded = subgroupAdd(ivec4(20)); + vec4 multiplied = subgroupMul(vec4(20.0)); + ivec4 imultiplied = subgroupMul(ivec4(20)); + vec4 lo = subgroupMin(vec4(20.0)); + vec4 hi = subgroupMax(vec4(20.0)); + ivec4 slo = subgroupMin(ivec4(20)); + ivec4 shi = subgroupMax(ivec4(20)); + uvec4 ulo = subgroupMin(uvec4(20)); + uvec4 uhi = subgroupMax(uvec4(20)); + uvec4 anded = subgroupAnd(ballot_value); + uvec4 ored = subgroupOr(ballot_value); + uvec4 xored = subgroupXor(ballot_value); + bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42))); + bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42))); + bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42))); + + added = subgroupInclusiveAdd(added); + iadded = subgroupInclusiveAdd(iadded); + multiplied = subgroupInclusiveMul(multiplied); + imultiplied = subgroupInclusiveMul(imultiplied); + //lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal + //hi = subgroupInclusiveMax(hi); + //slo = subgroupInclusiveMin(slo); + //shi = subgroupInclusiveMax(shi); + //ulo = subgroupInclusiveMin(ulo); + //uhi = subgroupInclusiveMax(uhi); + //anded = subgroupInclusiveAnd(anded); + //ored = subgroupInclusiveOr(ored); + //xored = subgroupInclusiveXor(ored); + //added = subgroupExclusiveAdd(lo); + + added = subgroupExclusiveAdd(multiplied); + multiplied = subgroupExclusiveMul(multiplied); + iadded = subgroupExclusiveAdd(imultiplied); + imultiplied = subgroupExclusiveMul(imultiplied); + //lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal + //hi = subgroupExclusiveMax(hi); + //ulo = subgroupExclusiveMin(ulo); + //uhi = subgroupExclusiveMax(uhi); + //slo = subgroupExclusiveMin(slo); + //shi = subgroupExclusiveMax(shi); + //anded = subgroupExclusiveAnd(anded); + //ored = subgroupExclusiveOr(ored); + //xored = subgroupExclusiveXor(ored); + + // clustered + added = subgroupClusteredAdd(added, 1u); + multiplied = subgroupClusteredMul(multiplied, 1u); + iadded = subgroupClusteredAdd(iadded, 1u); + imultiplied = subgroupClusteredMul(imultiplied, 1u); + lo = subgroupClusteredMin(lo, 1u); + hi = subgroupClusteredMax(hi, 1u); + ulo = subgroupClusteredMin(ulo, 1u); + uhi = subgroupClusteredMax(uhi, 1u); + slo = subgroupClusteredMin(slo, 1u); + shi = subgroupClusteredMax(shi, 1u); + anded = subgroupClusteredAnd(anded, 1u); + ored = subgroupClusteredOr(ored, 1u); + xored = subgroupClusteredXor(xored, 1u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 1u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 1u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 1u); + + added = subgroupClusteredAdd(added, 2u); + multiplied = subgroupClusteredMul(multiplied, 2u); + iadded = subgroupClusteredAdd(iadded, 2u); + imultiplied = subgroupClusteredMul(imultiplied, 2u); + lo = subgroupClusteredMin(lo, 2u); + hi = subgroupClusteredMax(hi, 2u); + ulo = subgroupClusteredMin(ulo, 2u); + uhi = subgroupClusteredMax(uhi, 2u); + slo = subgroupClusteredMin(slo, 2u); + shi = subgroupClusteredMax(shi, 2u); + anded = subgroupClusteredAnd(anded, 2u); + ored = subgroupClusteredOr(ored, 2u); + xored = subgroupClusteredXor(xored, 2u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 2u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 2u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 2u); + + added = subgroupClusteredAdd(added, 4u); + multiplied = subgroupClusteredMul(multiplied, 4u); + iadded = subgroupClusteredAdd(iadded, 4u); + imultiplied = subgroupClusteredMul(imultiplied, 4u); + lo = subgroupClusteredMin(lo, 4u); + hi = subgroupClusteredMax(hi, 4u); + ulo = subgroupClusteredMin(ulo, 4u); + uhi = subgroupClusteredMax(uhi, 4u); + slo = subgroupClusteredMin(slo, 4u); + shi = subgroupClusteredMax(shi, 4u); + anded = subgroupClusteredAnd(anded, 4u); + ored = subgroupClusteredOr(ored, 4u); + xored = subgroupClusteredXor(xored, 4u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u); + + added = subgroupClusteredAdd(added, 16u); + multiplied = subgroupClusteredMul(multiplied, 16u); + iadded = subgroupClusteredAdd(iadded, 16u); + imultiplied = subgroupClusteredMul(imultiplied, 16u); + lo = subgroupClusteredMin(lo, 16u); + hi = subgroupClusteredMax(hi, 16u); + ulo = subgroupClusteredMin(ulo, 16u); + uhi = subgroupClusteredMax(uhi, 16u); + slo = subgroupClusteredMin(slo, 16u); + shi = subgroupClusteredMax(shi, 16u); + anded = subgroupClusteredAnd(anded, 16u); + ored = subgroupClusteredOr(ored, 16u); + xored = subgroupClusteredXor(xored, 16u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 16u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 16u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 16u); + + // quad + vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); + bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true)); + vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); + bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true)); + vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); + bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true)); + vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); + bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u); +} diff --git a/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp b/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp new file mode 100644 index 000000000..c8172fd95 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp @@ -0,0 +1,211 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_shuffle : require +#extension GL_KHR_shader_subgroup_shuffle_relative : require +#extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_KHR_shader_subgroup_clustered : require +#extension GL_KHR_shader_subgroup_quad : require +#extension GL_KHR_shader_subgroup_rotate : require +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +void doClusteredRotate() +{ + uint rotated_clustered = subgroupClusteredRotate(20u, 4u, 8u); + bool rotated_clustered_bool = subgroupClusteredRotate(false, 4u, 8u); +} + +void main() +{ + // basic + FragColor = float(gl_NumSubgroups); + FragColor = float(gl_SubgroupID); + FragColor = float(gl_SubgroupSize); + FragColor = float(gl_SubgroupInvocationID); + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + bool elected = subgroupElect(); + + // ballot + FragColor = float(gl_SubgroupEqMask); + FragColor = float(gl_SubgroupGeMask); + FragColor = float(gl_SubgroupGtMask); + FragColor = float(gl_SubgroupLeMask); + FragColor = float(gl_SubgroupLtMask); + vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); + bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u); + vec3 first = subgroupBroadcastFirst(vec3(20.0)); + bvec4 first_bool = subgroupBroadcastFirst(bvec4(false)); + uvec4 ballot_value = subgroupBallot(true); + bool inverse_ballot_value = subgroupInverseBallot(ballot_value); + bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); + uint bit_count = subgroupBallotBitCount(ballot_value); + uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); + uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); + uint lsb = subgroupBallotFindLSB(ballot_value); + uint msb = subgroupBallotFindMSB(ballot_value); + + // shuffle + uint shuffled = subgroupShuffle(10u, 8u); + bool shuffled_bool = subgroupShuffle(true, 9u); + uint shuffled_xor = subgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); + + // shuffle relative + uint shuffled_up = subgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = subgroupShuffleUp(true, 4u); + uint shuffled_down = subgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = subgroupShuffleDown(false, 4u); + + // rotate + uint rotated = subgroupRotate(20u, 4u); + bool rotated_bool = subgroupRotate(false, 4u); + doClusteredRotate(); + + // vote + bool has_all = subgroupAll(true); + bool has_any = subgroupAny(true); + bool has_equal = subgroupAllEqual(0); + has_equal = subgroupAllEqual(true); + has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0)); + has_equal = subgroupAllEqual(bvec4(true, true, false, true)); + + // arithmetic + vec4 added = subgroupAdd(vec4(20.0)); + ivec4 iadded = subgroupAdd(ivec4(20)); + vec4 multiplied = subgroupMul(vec4(20.0)); + ivec4 imultiplied = subgroupMul(ivec4(20)); + vec4 lo = subgroupMin(vec4(20.0)); + vec4 hi = subgroupMax(vec4(20.0)); + ivec4 slo = subgroupMin(ivec4(20)); + ivec4 shi = subgroupMax(ivec4(20)); + uvec4 ulo = subgroupMin(uvec4(20)); + uvec4 uhi = subgroupMax(uvec4(20)); + uvec4 anded = subgroupAnd(ballot_value); + uvec4 ored = subgroupOr(ballot_value); + uvec4 xored = subgroupXor(ballot_value); + bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42))); + bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42))); + bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42))); + + added = subgroupInclusiveAdd(added); + iadded = subgroupInclusiveAdd(iadded); + multiplied = subgroupInclusiveMul(multiplied); + imultiplied = subgroupInclusiveMul(imultiplied); + //lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal + //hi = subgroupInclusiveMax(hi); + //slo = subgroupInclusiveMin(slo); + //shi = subgroupInclusiveMax(shi); + //ulo = subgroupInclusiveMin(ulo); + //uhi = subgroupInclusiveMax(uhi); + //anded = subgroupInclusiveAnd(anded); + //ored = subgroupInclusiveOr(ored); + //xored = subgroupInclusiveXor(ored); + //added = subgroupExclusiveAdd(lo); + + added = subgroupExclusiveAdd(multiplied); + multiplied = subgroupExclusiveMul(multiplied); + iadded = subgroupExclusiveAdd(imultiplied); + imultiplied = subgroupExclusiveMul(imultiplied); + //lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal + //hi = subgroupExclusiveMax(hi); + //ulo = subgroupExclusiveMin(ulo); + //uhi = subgroupExclusiveMax(uhi); + //slo = subgroupExclusiveMin(slo); + //shi = subgroupExclusiveMax(shi); + //anded = subgroupExclusiveAnd(anded); + //ored = subgroupExclusiveOr(ored); + //xored = subgroupExclusiveXor(ored); + + // clustered + added = subgroupClusteredAdd(added, 1u); + multiplied = subgroupClusteredMul(multiplied, 1u); + iadded = subgroupClusteredAdd(iadded, 1u); + imultiplied = subgroupClusteredMul(imultiplied, 1u); + lo = subgroupClusteredMin(lo, 1u); + hi = subgroupClusteredMax(hi, 1u); + ulo = subgroupClusteredMin(ulo, 1u); + uhi = subgroupClusteredMax(uhi, 1u); + slo = subgroupClusteredMin(slo, 1u); + shi = subgroupClusteredMax(shi, 1u); + anded = subgroupClusteredAnd(anded, 1u); + ored = subgroupClusteredOr(ored, 1u); + xored = subgroupClusteredXor(xored, 1u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 1u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 1u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 1u); + + added = subgroupClusteredAdd(added, 2u); + multiplied = subgroupClusteredMul(multiplied, 2u); + iadded = subgroupClusteredAdd(iadded, 2u); + imultiplied = subgroupClusteredMul(imultiplied, 2u); + lo = subgroupClusteredMin(lo, 2u); + hi = subgroupClusteredMax(hi, 2u); + ulo = subgroupClusteredMin(ulo, 2u); + uhi = subgroupClusteredMax(uhi, 2u); + slo = subgroupClusteredMin(slo, 2u); + shi = subgroupClusteredMax(shi, 2u); + anded = subgroupClusteredAnd(anded, 2u); + ored = subgroupClusteredOr(ored, 2u); + xored = subgroupClusteredXor(xored, 2u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 2u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 2u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 2u); + + added = subgroupClusteredAdd(added, 4u); + multiplied = subgroupClusteredMul(multiplied, 4u); + iadded = subgroupClusteredAdd(iadded, 4u); + imultiplied = subgroupClusteredMul(imultiplied, 4u); + lo = subgroupClusteredMin(lo, 4u); + hi = subgroupClusteredMax(hi, 4u); + ulo = subgroupClusteredMin(ulo, 4u); + uhi = subgroupClusteredMax(uhi, 4u); + slo = subgroupClusteredMin(slo, 4u); + shi = subgroupClusteredMax(shi, 4u); + anded = subgroupClusteredAnd(anded, 4u); + ored = subgroupClusteredOr(ored, 4u); + xored = subgroupClusteredXor(xored, 4u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u); + + added = subgroupClusteredAdd(added, 16u); + multiplied = subgroupClusteredMul(multiplied, 16u); + iadded = subgroupClusteredAdd(iadded, 16u); + imultiplied = subgroupClusteredMul(imultiplied, 16u); + lo = subgroupClusteredMin(lo, 16u); + hi = subgroupClusteredMax(hi, 16u); + ulo = subgroupClusteredMin(ulo, 16u); + uhi = subgroupClusteredMax(uhi, 16u); + slo = subgroupClusteredMin(slo, 16u); + shi = subgroupClusteredMax(shi, 16u); + anded = subgroupClusteredAnd(anded, 16u); + ored = subgroupClusteredOr(ored, 16u); + xored = subgroupClusteredXor(xored, 16u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 16u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 16u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 16u); + + // quad + vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); + bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true)); + vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); + bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true)); + vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); + bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true)); + vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); + bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u); +} diff --git a/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp b/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp new file mode 100644 index 000000000..c8172fd95 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp @@ -0,0 +1,211 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_shuffle : require +#extension GL_KHR_shader_subgroup_shuffle_relative : require +#extension GL_KHR_shader_subgroup_arithmetic : require +#extension GL_KHR_shader_subgroup_clustered : require +#extension GL_KHR_shader_subgroup_quad : require +#extension GL_KHR_shader_subgroup_rotate : require +layout(local_size_x = 1) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +void doClusteredRotate() +{ + uint rotated_clustered = subgroupClusteredRotate(20u, 4u, 8u); + bool rotated_clustered_bool = subgroupClusteredRotate(false, 4u, 8u); +} + +void main() +{ + // basic + FragColor = float(gl_NumSubgroups); + FragColor = float(gl_SubgroupID); + FragColor = float(gl_SubgroupSize); + FragColor = float(gl_SubgroupInvocationID); + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + bool elected = subgroupElect(); + + // ballot + FragColor = float(gl_SubgroupEqMask); + FragColor = float(gl_SubgroupGeMask); + FragColor = float(gl_SubgroupGtMask); + FragColor = float(gl_SubgroupLeMask); + FragColor = float(gl_SubgroupLtMask); + vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); + bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u); + vec3 first = subgroupBroadcastFirst(vec3(20.0)); + bvec4 first_bool = subgroupBroadcastFirst(bvec4(false)); + uvec4 ballot_value = subgroupBallot(true); + bool inverse_ballot_value = subgroupInverseBallot(ballot_value); + bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); + uint bit_count = subgroupBallotBitCount(ballot_value); + uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); + uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); + uint lsb = subgroupBallotFindLSB(ballot_value); + uint msb = subgroupBallotFindMSB(ballot_value); + + // shuffle + uint shuffled = subgroupShuffle(10u, 8u); + bool shuffled_bool = subgroupShuffle(true, 9u); + uint shuffled_xor = subgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); + + // shuffle relative + uint shuffled_up = subgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = subgroupShuffleUp(true, 4u); + uint shuffled_down = subgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = subgroupShuffleDown(false, 4u); + + // rotate + uint rotated = subgroupRotate(20u, 4u); + bool rotated_bool = subgroupRotate(false, 4u); + doClusteredRotate(); + + // vote + bool has_all = subgroupAll(true); + bool has_any = subgroupAny(true); + bool has_equal = subgroupAllEqual(0); + has_equal = subgroupAllEqual(true); + has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0)); + has_equal = subgroupAllEqual(bvec4(true, true, false, true)); + + // arithmetic + vec4 added = subgroupAdd(vec4(20.0)); + ivec4 iadded = subgroupAdd(ivec4(20)); + vec4 multiplied = subgroupMul(vec4(20.0)); + ivec4 imultiplied = subgroupMul(ivec4(20)); + vec4 lo = subgroupMin(vec4(20.0)); + vec4 hi = subgroupMax(vec4(20.0)); + ivec4 slo = subgroupMin(ivec4(20)); + ivec4 shi = subgroupMax(ivec4(20)); + uvec4 ulo = subgroupMin(uvec4(20)); + uvec4 uhi = subgroupMax(uvec4(20)); + uvec4 anded = subgroupAnd(ballot_value); + uvec4 ored = subgroupOr(ballot_value); + uvec4 xored = subgroupXor(ballot_value); + bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42))); + bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42))); + bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42))); + + added = subgroupInclusiveAdd(added); + iadded = subgroupInclusiveAdd(iadded); + multiplied = subgroupInclusiveMul(multiplied); + imultiplied = subgroupInclusiveMul(imultiplied); + //lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal + //hi = subgroupInclusiveMax(hi); + //slo = subgroupInclusiveMin(slo); + //shi = subgroupInclusiveMax(shi); + //ulo = subgroupInclusiveMin(ulo); + //uhi = subgroupInclusiveMax(uhi); + //anded = subgroupInclusiveAnd(anded); + //ored = subgroupInclusiveOr(ored); + //xored = subgroupInclusiveXor(ored); + //added = subgroupExclusiveAdd(lo); + + added = subgroupExclusiveAdd(multiplied); + multiplied = subgroupExclusiveMul(multiplied); + iadded = subgroupExclusiveAdd(imultiplied); + imultiplied = subgroupExclusiveMul(imultiplied); + //lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal + //hi = subgroupExclusiveMax(hi); + //ulo = subgroupExclusiveMin(ulo); + //uhi = subgroupExclusiveMax(uhi); + //slo = subgroupExclusiveMin(slo); + //shi = subgroupExclusiveMax(shi); + //anded = subgroupExclusiveAnd(anded); + //ored = subgroupExclusiveOr(ored); + //xored = subgroupExclusiveXor(ored); + + // clustered + added = subgroupClusteredAdd(added, 1u); + multiplied = subgroupClusteredMul(multiplied, 1u); + iadded = subgroupClusteredAdd(iadded, 1u); + imultiplied = subgroupClusteredMul(imultiplied, 1u); + lo = subgroupClusteredMin(lo, 1u); + hi = subgroupClusteredMax(hi, 1u); + ulo = subgroupClusteredMin(ulo, 1u); + uhi = subgroupClusteredMax(uhi, 1u); + slo = subgroupClusteredMin(slo, 1u); + shi = subgroupClusteredMax(shi, 1u); + anded = subgroupClusteredAnd(anded, 1u); + ored = subgroupClusteredOr(ored, 1u); + xored = subgroupClusteredXor(xored, 1u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 1u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 1u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 1u); + + added = subgroupClusteredAdd(added, 2u); + multiplied = subgroupClusteredMul(multiplied, 2u); + iadded = subgroupClusteredAdd(iadded, 2u); + imultiplied = subgroupClusteredMul(imultiplied, 2u); + lo = subgroupClusteredMin(lo, 2u); + hi = subgroupClusteredMax(hi, 2u); + ulo = subgroupClusteredMin(ulo, 2u); + uhi = subgroupClusteredMax(uhi, 2u); + slo = subgroupClusteredMin(slo, 2u); + shi = subgroupClusteredMax(shi, 2u); + anded = subgroupClusteredAnd(anded, 2u); + ored = subgroupClusteredOr(ored, 2u); + xored = subgroupClusteredXor(xored, 2u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 2u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 2u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 2u); + + added = subgroupClusteredAdd(added, 4u); + multiplied = subgroupClusteredMul(multiplied, 4u); + iadded = subgroupClusteredAdd(iadded, 4u); + imultiplied = subgroupClusteredMul(imultiplied, 4u); + lo = subgroupClusteredMin(lo, 4u); + hi = subgroupClusteredMax(hi, 4u); + ulo = subgroupClusteredMin(ulo, 4u); + uhi = subgroupClusteredMax(uhi, 4u); + slo = subgroupClusteredMin(slo, 4u); + shi = subgroupClusteredMax(shi, 4u); + anded = subgroupClusteredAnd(anded, 4u); + ored = subgroupClusteredOr(ored, 4u); + xored = subgroupClusteredXor(xored, 4u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u); + + added = subgroupClusteredAdd(added, 16u); + multiplied = subgroupClusteredMul(multiplied, 16u); + iadded = subgroupClusteredAdd(iadded, 16u); + imultiplied = subgroupClusteredMul(imultiplied, 16u); + lo = subgroupClusteredMin(lo, 16u); + hi = subgroupClusteredMax(hi, 16u); + ulo = subgroupClusteredMin(ulo, 16u); + uhi = subgroupClusteredMax(uhi, 16u); + slo = subgroupClusteredMin(slo, 16u); + shi = subgroupClusteredMax(shi, 16u); + anded = subgroupClusteredAnd(anded, 16u); + ored = subgroupClusteredOr(ored, 16u); + xored = subgroupClusteredXor(xored, 16u); + + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 16u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 16u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 16u); + + // quad + vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); + bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true)); + vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); + bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true)); + vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); + bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true)); + vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); + bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u); +} diff --git a/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp16.fp16.comp b/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp16.fp16.comp new file mode 100644 index 000000000..4f9e82f37 --- /dev/null +++ b/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp16.fp16.comp @@ -0,0 +1,35 @@ +#version 450 +#extension GL_EXT_spirv_intrinsics : require +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +layout(local_size_x = 4) in; + +layout(set = 0, binding = 0) buffer SSBO +{ + float v[4]; + float16_t f16[4]; +}; + +// SignedZeroInfNanPreserve 16 +spirv_execution_mode(capabilities = [4466], extensions = ["SPV_KHR_float_controls"], 4461, 16); + +void main () +{ + v[gl_LocalInvocationIndex] = cos(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += sin(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += tan(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += acos(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += asin(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += atan(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += exp(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += exp2(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += log(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += log2(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += sqrt(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += inversesqrt(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += pow(v[gl_LocalInvocationIndex], 4.0); + + f16[gl_LocalInvocationIndex] = cos(f16[gl_LocalInvocationIndex]); + f16[gl_LocalInvocationIndex] += sin(f16[gl_LocalInvocationIndex]); + f16[gl_LocalInvocationIndex] += cosh(f16[gl_LocalInvocationIndex]); + f16[gl_LocalInvocationIndex] += sinh(f16[gl_LocalInvocationIndex]); +} diff --git a/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp32.fp16.comp b/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp32.fp16.comp new file mode 100644 index 000000000..8995457c9 --- /dev/null +++ b/shaders-opencl-no-opt/comp/trancendental-float-controls-1-fp32.fp16.comp @@ -0,0 +1,35 @@ +#version 450 +#extension GL_EXT_spirv_intrinsics : require +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require +layout(local_size_x = 4) in; + +layout(set = 0, binding = 0) buffer SSBO +{ + float v[4]; + float16_t f16[4]; +}; + +// SignedZeroInfNanPreserve 32 +spirv_execution_mode(capabilities = [4466], extensions = ["SPV_KHR_float_controls"], 4461, 32); + +void main () +{ + v[gl_LocalInvocationIndex] = cos(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += sin(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += tan(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += acos(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += asin(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += atan(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += exp(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += exp2(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += log(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += log2(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += sqrt(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += inversesqrt(v[gl_LocalInvocationIndex]); + v[gl_LocalInvocationIndex] += pow(v[gl_LocalInvocationIndex], 4.0); + + f16[gl_LocalInvocationIndex] = cos(f16[gl_LocalInvocationIndex]); + f16[gl_LocalInvocationIndex] += sin(f16[gl_LocalInvocationIndex]); + f16[gl_LocalInvocationIndex] += cosh(f16[gl_LocalInvocationIndex]); + f16[gl_LocalInvocationIndex] += sinh(f16[gl_LocalInvocationIndex]); +} diff --git a/shaders-opencl-no-opt/comp/transposed-temporary-expression-2.comp b/shaders-opencl-no-opt/comp/transposed-temporary-expression-2.comp new file mode 100644 index 000000000..3f526942c --- /dev/null +++ b/shaders-opencl-no-opt/comp/transposed-temporary-expression-2.comp @@ -0,0 +1,24 @@ +#version 450 +layout(local_size_x = 64) in; + +layout(set = 0, binding = 0) buffer SSBO +{ + layout(row_major) mat4x3 A; + layout(row_major) mat4x3 B; + layout(row_major) mat4x3 C; + vec4 D; + float w0; + float w1; +}; + +void main() +{ + mat4x3 Anew; + mat4x3 Bnew; + do + { + Anew = A * w0; + Bnew = B * w1; + } while(false); + D = vec4((Anew + Bnew) * D, 1.0); +} diff --git a/shaders-opencl-no-opt/comp/transposed-temporary-expression.comp b/shaders-opencl-no-opt/comp/transposed-temporary-expression.comp new file mode 100644 index 000000000..ab56bd039 --- /dev/null +++ b/shaders-opencl-no-opt/comp/transposed-temporary-expression.comp @@ -0,0 +1,17 @@ +#version 450 +layout(local_size_x = 64) in; + +layout(set = 0, binding = 0) buffer SSBO +{ + layout(row_major) mat4x3 A; + layout(row_major) mat4x3 B; + layout(row_major) mat4x3 C; + vec4 D; + float w0; + float w1; +}; + +void main() +{ + D = vec4((A * w0 + B * w1) * D, 1.0); +} diff --git a/shaders-opencl-no-opt/comp/trivial-select-cast-vector.comp b/shaders-opencl-no-opt/comp/trivial-select-cast-vector.comp new file mode 100644 index 000000000..c3e0922a1 --- /dev/null +++ b/shaders-opencl-no-opt/comp/trivial-select-cast-vector.comp @@ -0,0 +1,14 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0) buffer A +{ + vec3 a; + vec3 b; +}; + +void main() +{ + bvec3 c = lessThan(b, vec3(1.0)); + a = mix(vec3(1, 0, 0), vec3(0, 0, 1), c); +} diff --git a/shaders-opencl-no-opt/comp/trivial-select-matrix.spv14.comp b/shaders-opencl-no-opt/comp/trivial-select-matrix.spv14.comp new file mode 100644 index 000000000..5ffcc3f3a --- /dev/null +++ b/shaders-opencl-no-opt/comp/trivial-select-matrix.spv14.comp @@ -0,0 +1,16 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(set = 0, binding = 0) buffer A +{ + mat3 a; + float b; +}; + +void main() +{ + // Scalar to Matrix + bool c = b < 1.0; + a = c ? mat3(vec3(1), vec3(1), vec3(1)) : mat3(vec3(0), vec3(0), vec3(0)); + a = c ? mat3(1) : mat3(0); +} diff --git a/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.invalid.comp b/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.invalid.comp new file mode 100644 index 000000000..d29e08005 --- /dev/null +++ b/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.invalid.comp @@ -0,0 +1,21 @@ +#version 450 +layout(local_size_x_id = 0, local_size_y_id = 2, local_size_z_id = 3) in; + +layout(constant_id = 1) const int A = 2; +float D[A * gl_WorkGroupSize.x]; +float E[A * gl_WorkGroupSize.z]; + +layout(set = 0, binding = 0) buffer SSBO +{ + int I; + float V; +}; + +void main () +{ + for (int i = 0; i < A * gl_WorkGroupSize.x; i++) + D[i] = 1.0 + float(i + gl_WorkGroupSize.y); + for (int i = 0; i < A * gl_WorkGroupSize.z; i++) + D[i] = 1.0 + float(i + gl_WorkGroupSize.y); + V = D[I] + D[I ^ 1] + E[I]; +} diff --git a/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.spv16.invalid.comp b/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.spv16.invalid.comp new file mode 100644 index 000000000..d29e08005 --- /dev/null +++ b/shaders-opencl-no-opt/comp/workgroup-size-spec-constant-array.spv16.invalid.comp @@ -0,0 +1,21 @@ +#version 450 +layout(local_size_x_id = 0, local_size_y_id = 2, local_size_z_id = 3) in; + +layout(constant_id = 1) const int A = 2; +float D[A * gl_WorkGroupSize.x]; +float E[A * gl_WorkGroupSize.z]; + +layout(set = 0, binding = 0) buffer SSBO +{ + int I; + float V; +}; + +void main () +{ + for (int i = 0; i < A * gl_WorkGroupSize.x; i++) + D[i] = 1.0 + float(i + gl_WorkGroupSize.y); + for (int i = 0; i < A * gl_WorkGroupSize.z; i++) + D[i] = 1.0 + float(i + gl_WorkGroupSize.y); + V = D[I] + D[I ^ 1] + E[I]; +} diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp index 3aaaa6150..b366ec9c6 100644 --- a/spirv_opencl.cpp +++ b/spirv_opencl.cpp @@ -1137,9 +1137,10 @@ const char *CompilerOpenCL::to_restrict(uint32_t id, bool space) else flags = get_decoration_bitset(id); - return flags.get(DecorationRestrict) || flags.get(DecorationRestrictPointerEXT) ? - (space ? "__restrict " : "__restrict") : - ""; + // Only check DecorationRestrict here. DecorationRestrictPointerEXT is handled by + // flags_to_qualifiers_glsl in the GLSL base (emits "restrict " prefix), so we + // don't duplicate it as "__restrict" after the pointer star. + return flags.get(DecorationRestrict) ? (space ? "__restrict " : "__restrict") : ""; } string CompilerOpenCL::type_to_glsl(const SPIRType &type, uint32_t id, bool member) @@ -1822,6 +1823,26 @@ std::string CompilerOpenCL::constant_expression(const SPIRConstant &c, bool insi return CompilerGLSL::constant_expression(c, inside_block_like_struct_scope, inside_struct_scope); } +std::string CompilerOpenCL::to_initializer_expression(const SPIRVariable &var) +{ + // OpenCL C does not support initializing arrays from non-constant expressions + // (e.g., `float a[5] = ssbo->b;` is not valid C). + // For array variables with non-constant initializers, emit zero init `{ 0 }` and + // schedule element-by-element copy after the declaration. + auto &type = get_variable_data_type(var); + if (is_array(type) && var.initializer) + { + // Check if the initializer is a constant — those are fine as-is. + if (ir.ids[var.initializer].get_type() != TypeConstant) + { + // Queue the initializer for post-declaration element-by-element copy. + pending_array_copies.push_back({ var.self, var.initializer }); + return "{ 0 }"; + } + } + return CompilerGLSL::to_initializer_expression(var); +} + // OpenCL C requires cast syntax for vector construction: (float4)(1.0, 2.0, 3.0, 4.0) // The GLSL base emits: float4(1.0, 2.0, 3.0, 4.0) which is invalid in OpenCL C. std::string CompilerOpenCL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) @@ -1831,15 +1852,17 @@ std::string CompilerOpenCL::constant_expression_vector(const SPIRConstant &c, ui auto type = get(c.constant_type); type.columns = 1; - if (type.vecsize > 1) + // The base class emits GLSL constructor-style casts: typename(args). + // OpenCL C requires C-style casts: (typename)(args). + // This applies to both vector types (e.g. float4(x)) and scalar casts + // (e.g. int(0x80000000), long(0x8000000000000000ul), uchar(0)). + auto scalar_type = type; + scalar_type.vecsize = 1; + auto type_name = (type.vecsize > 1) ? type_to_glsl(type) : type_to_glsl(scalar_type); + if (!type_name.empty() && res.size() > type_name.size() + 1 && res.substr(0, type_name.size()) == type_name && + res[type_name.size()] == '(') { - // The base class emits: typename(args). OpenCL needs: (typename)(args). - auto type_name = type_to_glsl(type); - if (res.size() > type_name.size() + 1 && res.substr(0, type_name.size()) == type_name && - res[type_name.size()] == '(') - { - res = "(" + type_name + ")(" + res.substr(type_name.size() + 1); - } + res = "(" + type_name + ")(" + res.substr(type_name.size() + 1); } return res; @@ -2413,23 +2436,41 @@ void CompilerOpenCL::emit_glsl_op(uint32_t result_type, uint32_t result_id, uint emit_trinary_func_op(result_type, result_id, args[0], args[1], args[2], "clamp"); break; case GLSLstd450SMin: - emit_binary_func_op(result_type, result_id, args[0], args[1], "min"); + { + auto int_type = to_signed_basetype(get_integer_width_for_glsl_instruction(glsl_op, args, count)); + emit_binary_func_op_cast(result_type, result_id, args[0], args[1], "min", int_type, false); break; + } case GLSLstd450SMax: - emit_binary_func_op(result_type, result_id, args[0], args[1], "max"); + { + auto int_type = to_signed_basetype(get_integer_width_for_glsl_instruction(glsl_op, args, count)); + emit_binary_func_op_cast(result_type, result_id, args[0], args[1], "max", int_type, false); break; + } case GLSLstd450UMin: - emit_binary_func_op(result_type, result_id, args[0], args[1], "min"); + { + auto uint_type = to_unsigned_basetype(get_integer_width_for_glsl_instruction(glsl_op, args, count)); + emit_binary_func_op_cast(result_type, result_id, args[0], args[1], "min", uint_type, false); break; + } case GLSLstd450UMax: - emit_binary_func_op(result_type, result_id, args[0], args[1], "max"); + { + auto uint_type = to_unsigned_basetype(get_integer_width_for_glsl_instruction(glsl_op, args, count)); + emit_binary_func_op_cast(result_type, result_id, args[0], args[1], "max", uint_type, false); break; + } case GLSLstd450SClamp: - emit_trinary_func_op(result_type, result_id, args[0], args[1], args[2], "clamp"); + { + auto int_type = to_signed_basetype(get_integer_width_for_glsl_instruction(glsl_op, args, count)); + emit_trinary_func_op_cast(result_type, result_id, args[0], args[1], args[2], "clamp", int_type); break; + } case GLSLstd450UClamp: - emit_trinary_func_op(result_type, result_id, args[0], args[1], args[2], "clamp"); + { + auto uint_type = to_unsigned_basetype(get_integer_width_for_glsl_instruction(glsl_op, args, count)); + emit_trinary_func_op_cast(result_type, result_id, args[0], args[1], args[2], "clamp", uint_type); break; + } case GLSLstd450FMix: case GLSLstd450IMix: @@ -2544,7 +2585,8 @@ std::string CompilerOpenCL::to_member_reference(uint32_t base, const SPIRType &t // so is_pointer() above is false — we only reach here with actual pointers. // Note: StorageClassWorkgroup is excluded because __local variables are emitted // as value types in OpenCL C, so member access uses '.'. - if (sc == StorageClassStorageBuffer || sc == StorageClassCrossWorkgroup) + if (sc == StorageClassStorageBuffer || sc == StorageClassCrossWorkgroup || + sc == StorageClassPhysicalStorageBuffer) { return join("->", to_member_name(type, index)); } @@ -3200,7 +3242,9 @@ bool CompilerOpenCL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t lhs = to_expression(lhs_id); auto rhs_expr = to_expression(rhs_id); - auto &type = expression_type(rhs_id); + auto &raw_type = expression_type(rhs_id); + // If the RHS is a pointer (e.g., from OpLoad source), use the pointee type. + auto &type = is_pointer(raw_type) ? get_pointee_type(raw_type) : raw_type; // Get the array size if (!is_array(type) || type.array.empty()) @@ -3975,6 +4019,35 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) break; } + // BDA pointer casts: emit C-style casts instead of GLSL constructor-style. + case OpConvertUToPtr: + { + auto &type = get(ops[0]); + auto &in_type = expression_type(ops[2]); + auto ptr_type_str = type_to_glsl(type); + string expr; + if (in_type.vecsize > 1) + expr = join("((", ptr_type_str, ")as_ulong(", to_expression(ops[2]), "))"); + else + expr = join("((", ptr_type_str, ")(", to_expression(ops[2]), "))"); + emit_op(ops[0], ops[1], std::move(expr), should_forward(ops[2])); + inherit_expression_dependencies(ops[1], ops[2]); + break; + } + + case OpConvertPtrToU: + { + auto &type = get(ops[0]); + string expr; + if (type.vecsize > 1) + expr = join("as_", type_to_glsl(type), "((ulong)(", to_expression(ops[2]), "))"); + else + expr = join("(", type_to_glsl(type), ")(", to_expression(ops[2]), ")"); + emit_op(ops[0], ops[1], std::move(expr), should_forward(ops[2])); + inherit_expression_dependencies(ops[1], ops[2]); + break; + } + case OpBitcast: { auto &out_type = get(ops[0]); diff --git a/spirv_opencl.hpp b/spirv_opencl.hpp index 82e551be9..0303e55d9 100644 --- a/spirv_opencl.hpp +++ b/spirv_opencl.hpp @@ -122,6 +122,7 @@ class CompilerOpenCL : public CompilerGLSL StorageClass rhs_storage) override; std::string constant_expression(const SPIRConstant &c, bool inside_block_like_struct_scope = false, bool inside_struct_scope = false) override; + std::string to_initializer_expression(const SPIRVariable &var) override; std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector) override; std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; std::string to_atomic_ptr_expression(uint32_t id) override; @@ -156,6 +157,10 @@ class CompilerOpenCL : public CompilerGLSL // These are C values (not pointers), so subsequent member accesses must use '.' not '->'. std::unordered_set subscripted_deref_exprs; + // Pending array copies from to_initializer_expression: { var_id, initializer_id } + // These are emitted as element-by-element copies after the variable declaration. + SmallVector> pending_array_copies; + // Set when packHalf2x16/unpackHalf2x16 polyfill helpers are needed. bool needs_half_pack_polyfill = false; bool needs_half_unpack_polyfill = false; From d1371b35571e5545a4c5ee425d830c08b8f1f66e Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Sat, 14 Mar 2026 14:00:52 -0700 Subject: [PATCH 08/16] OpenCL: Fixes for legal C code --- ...e-load-store-short-vector.invalid.asm.comp | 14 +- ...ray-copy-physical-layout-mismatch.asm.comp | 37 +++ ...-physical-layout-mismatch.invalid.asm.comp | 20 -- ...m.comp => bitcast-fp16-fp32.fp16.asm.comp} | 4 +- ... block-like-array-type-construct.asm.comp} | 10 +- ...vice-array-load-temporary.asm.invalid.comp | 6 +- ...porary.force-native-array.asm.invalid.comp | 6 +- ...constant-array-load-store.asm.invalid.comp | 24 +- ...-store.force-native-array.asm.invalid.comp | 24 +- ...shared-inner-array-of-struct-copy.asm.comp | 53 ++++ ...nner-array-of-struct-copy.invalid.asm.comp | 38 --- ...asm.comp => storage-buffer-basic.asm.comp} | 2 +- ...ar-alias-ptr-access-chain.asm.invalid.comp | 2 +- ...tier-1.device-argument-buffer.invalid.comp | 23 ++ .../comp/array-copy-threadgroup-memory.comp | 42 +++ ...array-copy-threadgroup-memory.invalid.comp | 20 -- .../shaders-opencl-no-opt/comp/basic.comp | 36 +++ .../comp/basic.invalid.comp | 0 ...-atomics.invalid.comp => bda-atomics.comp} | 2 +- ...p => bda-load-std140-arrayed-pointer.comp} | 2 +- ...omp => bda-restrict-pointer-variable.comp} | 2 +- ...omp => extract-atomics-from-function.comp} | 2 +- ...p => implicit-integer-promotion.fp16.comp} | 48 ++-- ...nvalid.comp => int16min-literal.fp16.comp} | 2 +- ...lid.comp => read-only-coherent-image.comp} | 2 +- ...tier-1.device-argument-buffer.invalid.comp | 0 ...e-load-store-short-vector.invalid.asm.comp | 14 +- ...ay-copy-physical-layout-mismatch.asm.comp} | 0 ...m.comp => bitcast-fp16-fp32.fp16.asm.comp} | 0 ... block-like-array-type-construct.asm.comp} | 0 ...hared-inner-array-of-struct-copy.asm.comp} | 0 ...asm.comp => storage-buffer-basic.asm.comp} | 0 ...omp => array-copy-threadgroup-memory.comp} | 0 .../comp/{basic.invalid.comp => basic.comp} | 0 ...-atomics.invalid.comp => bda-atomics.comp} | 0 ...p => bda-load-std140-arrayed-pointer.comp} | 0 ...omp => bda-restrict-pointer-variable.comp} | 0 ...omp => extract-atomics-from-function.comp} | 0 ...p => implicit-integer-promotion.fp16.comp} | 0 ...nvalid.comp => int16min-literal.fp16.comp} | 0 ...lid.comp => read-only-coherent-image.comp} | 0 ...tier-1.device-argument-buffer.invalid.comp | 13 - spirv_glsl.cpp | 18 +- spirv_glsl.hpp | 3 + spirv_opencl.cpp | 245 ++++++++++++++---- spirv_opencl.hpp | 5 +- 46 files changed, 499 insertions(+), 220 deletions(-) create mode 100644 reference/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.asm.comp delete mode 100644 reference/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp rename reference/shaders-opencl-no-opt/asm/comp/{bitcast-fp16-fp32.fp16.invalid.asm.comp => bitcast-fp16-fp32.fp16.asm.comp} (79%) rename reference/shaders-opencl-no-opt/asm/comp/{block-like-array-type-construct.invalid.asm.comp => block-like-array-type-construct.asm.comp} (83%) create mode 100644 reference/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.asm.comp delete mode 100644 reference/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp rename reference/shaders-opencl-no-opt/asm/comp/{storage-buffer-basic.invalid.asm.comp => storage-buffer-basic.asm.comp} (93%) create mode 100644 reference/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.comp delete mode 100644 reference/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/basic.comp delete mode 100644 reference/shaders-opencl-no-opt/comp/basic.invalid.comp rename reference/shaders-opencl-no-opt/comp/{bda-atomics.invalid.comp => bda-atomics.comp} (90%) rename reference/shaders-opencl-no-opt/comp/{bda-load-std140-arrayed-pointer.invalid.comp => bda-load-std140-arrayed-pointer.comp} (94%) rename reference/shaders-opencl-no-opt/comp/{bda-restrict-pointer-variable.invalid.comp => bda-restrict-pointer-variable.comp} (81%) rename reference/shaders-opencl-no-opt/comp/{extract-atomics-from-function.invalid.comp => extract-atomics-from-function.comp} (100%) rename reference/shaders-opencl-no-opt/comp/{implicit-integer-promotion.fp16.invalid.comp => implicit-integer-promotion.fp16.comp} (55%) rename reference/shaders-opencl-no-opt/comp/{int16min-literal.fp16.invalid.comp => int16min-literal.fp16.comp} (92%) rename reference/shaders-opencl-no-opt/comp/{read-only-coherent-image.invalid.comp => read-only-coherent-image.comp} (81%) delete mode 100644 reference/shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp rename shaders-opencl-no-opt/asm/comp/{array-copy-physical-layout-mismatch.invalid.asm.comp => array-copy-physical-layout-mismatch.asm.comp} (100%) rename shaders-opencl-no-opt/asm/comp/{bitcast-fp16-fp32.fp16.invalid.asm.comp => bitcast-fp16-fp32.fp16.asm.comp} (100%) rename shaders-opencl-no-opt/asm/comp/{block-like-array-type-construct.invalid.asm.comp => block-like-array-type-construct.asm.comp} (100%) rename shaders-opencl-no-opt/asm/comp/{groupshared-inner-array-of-struct-copy.invalid.asm.comp => groupshared-inner-array-of-struct-copy.asm.comp} (100%) rename shaders-opencl-no-opt/asm/comp/{storage-buffer-basic.invalid.asm.comp => storage-buffer-basic.asm.comp} (100%) rename shaders-opencl-no-opt/comp/{array-copy-threadgroup-memory.invalid.comp => array-copy-threadgroup-memory.comp} (100%) rename shaders-opencl-no-opt/comp/{basic.invalid.comp => basic.comp} (100%) rename shaders-opencl-no-opt/comp/{bda-atomics.invalid.comp => bda-atomics.comp} (100%) rename shaders-opencl-no-opt/comp/{bda-load-std140-arrayed-pointer.invalid.comp => bda-load-std140-arrayed-pointer.comp} (100%) rename shaders-opencl-no-opt/comp/{bda-restrict-pointer-variable.invalid.comp => bda-restrict-pointer-variable.comp} (100%) rename shaders-opencl-no-opt/comp/{extract-atomics-from-function.invalid.comp => extract-atomics-from-function.comp} (100%) rename shaders-opencl-no-opt/comp/{implicit-integer-promotion.fp16.invalid.comp => implicit-integer-promotion.fp16.comp} (100%) rename shaders-opencl-no-opt/comp/{int16min-literal.fp16.invalid.comp => int16min-literal.fp16.comp} (100%) rename shaders-opencl-no-opt/comp/{read-only-coherent-image.invalid.comp => read-only-coherent-image.comp} (100%) delete mode 100644 shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp diff --git a/reference/opt/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp b/reference/opt/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp index f7d65805e..4ab76df67 100644 --- a/reference/opt/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp +++ b/reference/opt/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp @@ -1,18 +1,18 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) -void _main( uint3* id) +void _main( uint3* id_2) { - float2 loaded = read_imagef(TargetTexture, as_int2((*id).xy)).xy; - float2 storeTemp = loaded + (float2)(1.0f); - write_imagef(TargetTexture, as_int2((*id).xy + (uint2)(1u)), (float4)(storeTemp)); + float2 loaded_1 = read_imagef(TargetTexture, as_int2((*id_2).xy)).xy; + float2 storeTemp_1 = loaded_1 + (float2)(1.0f); + write_imagef(TargetTexture, as_int2((*id_2).xy + (uint2)(1u)), (float4)(storeTemp_1)); } __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(write_only image2d_t TargetTexture) { - uint3 id_1 = ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))); - uint3 param = id_1; - _main(¶m); + uint3 id_1_1 = ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))); + uint3 param_1 = id_1_1; + _main(¶m_1); } diff --git a/reference/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.asm.comp new file mode 100644 index 000000000..2cb996c09 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.asm.comp @@ -0,0 +1,37 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float b[5]; + float c[5]; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global SSBO* _7) +{ + float a[5]; + a[0] = _7->b[0]; + a[1] = _7->b[1]; + a[2] = _7->b[2]; + a[3] = _7->b[3]; + a[4] = _7->b[4]; + a[0] = _7->b[0]; + a[1] = _7->b[1]; + a[2] = _7->b[2]; + a[3] = _7->b[3]; + a[4] = _7->b[4]; + _7->b[0] = a[0]; + _7->b[1] = a[1]; + _7->b[2] = a[2]; + _7->b[3] = a[3]; + _7->b[4] = a[4]; + _7->c[0] = a[0]; + _7->c[1] = a[1]; + _7->c[2] = a[2]; + _7->c[3] = a[3]; + _7->c[4] = a[4]; +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp deleted file mode 100644 index 45aaa65c5..000000000 --- a/reference/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp +++ /dev/null @@ -1,20 +0,0 @@ -// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) - - -struct SSBO -{ - float b[5]; - float c[5]; -}; - -typedef struct SSBO SSBO; - -__attribute__((reqd_work_group_size(1, 1, 1))) -__kernel void comp_main(__global SSBO* _7) -{ - float a[5] = _7->b; - a = _7->b; - _7->b = a; - _7->c = a; -} - diff --git a/reference/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.asm.comp similarity index 79% rename from reference/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.invalid.asm.comp rename to reference/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.asm.comp index b02f295d9..3b38a72fe 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.invalid.asm.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.asm.comp @@ -16,7 +16,7 @@ __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global SSBO* _6) { uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); - _6->b = uintBitsToFloat(packFloat2x16(_6->a)); - _6->d = unpackFloat2x16(floatBitsToUint(_6->c)); + _6->b = as_float(_6->a); + _6->d = as_half2(_6->c); } diff --git a/reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.asm.comp similarity index 83% rename from reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.invalid.asm.comp rename to reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.asm.comp index 421377b4d..3744e3e11 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.invalid.asm.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.asm.comp @@ -27,11 +27,17 @@ __kernel void comp_main(__global SSBO* _8) float foo[4]; float foo2[4]; foo[0] = 1.0f; - foo = { 1.0f, 2.0f, 3.0f, 4.0f }; + foo[0] = 1.0f; + foo[1] = 2.0f; + foo[2] = 3.0f; + foo[3] = 4.0f; foo[1] = 2.0f; foo[2] = 3.0f; foo[3] = 4.0f; - foo2 = foo; + foo2[0] = foo[0]; + foo2[1] = foo[1]; + foo2[2] = foo[2]; + foo2[3] = foo[3]; _12 _41 = (_12){ { foo[0], foo[1], foo[2], foo[3] }, { foo2[0], foo2[1], foo2[2], foo2[3] } }; } diff --git a/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.asm.invalid.comp index e4387c0c9..38ec524f8 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.asm.invalid.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.asm.invalid.comp @@ -23,7 +23,9 @@ __kernel void comp_main(__global Block* ssbo) uint2 _27[2]; _27[0] = ssbo[0u]._m1[0]; _27[1] = ssbo[0u]._m1[1]; - ssbo[0u]._m0 = _27; - ssbo[0u]._m0 = _27; + ssbo[0u]._m0[0] = _27[0]; + ssbo[0u]._m0[1] = _27[1]; + ssbo[0u]._m0[0] = _27[0]; + ssbo[0u]._m0[1] = _27[1]; } diff --git a/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.invalid.comp index e4387c0c9..38ec524f8 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.invalid.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/device-array-load-temporary.force-native-array.asm.invalid.comp @@ -23,7 +23,9 @@ __kernel void comp_main(__global Block* ssbo) uint2 _27[2]; _27[0] = ssbo[0u]._m1[0]; _27[1] = ssbo[0u]._m1[1]; - ssbo[0u]._m0 = _27; - ssbo[0u]._m0 = _27; + ssbo[0u]._m0[0] = _27[0]; + ssbo[0u]._m0[1] = _27[1]; + ssbo[0u]._m0[0] = _27[0]; + ssbo[0u]._m0[1] = _27[1]; } diff --git a/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.asm.invalid.comp index f8a5f221b..48f285a9a 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.asm.invalid.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.asm.invalid.comp @@ -20,14 +20,22 @@ __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global Block* ssbo, SSBO ubo) { __local uint2 _18[2]; - ssbo[0u]._m0 = ssbo[0u]._m1; - ssbo[0u]._m0 = ubo._m0[0u]._m1; + ssbo[0u]._m0[0] = ssbo[0u]._m1[0]; + ssbo[0u]._m0[1] = ssbo[0u]._m1[1]; + ssbo[0u]._m0[0] = ubo._m0[0u]._m1[0]; + ssbo[0u]._m0[1] = ubo._m0[0u]._m1[1]; uint2 _23[2]; - ssbo[0u]._m0 = _23; - ssbo[0u]._m0 = _18; - _18 = ssbo[0u]._m1; - _23 = ssbo[0u]._m1; - _18 = ubo._m0[0u]._m1; - _23 = ubo._m0[0u]._m1; + ssbo[0u]._m0[0] = _23[0]; + ssbo[0u]._m0[1] = _23[1]; + ssbo[0u]._m0[0] = _18[0]; + ssbo[0u]._m0[1] = _18[1]; + _18[0] = ssbo[0u]._m1[0]; + _18[1] = ssbo[0u]._m1[1]; + _23[0] = ssbo[0u]._m1[0]; + _23[1] = ssbo[0u]._m1[1]; + _18[0] = ubo._m0[0u]._m1[0]; + _18[1] = ubo._m0[0u]._m1[1]; + _23[0] = ubo._m0[0u]._m1[0]; + _23[1] = ubo._m0[0u]._m1[1]; } diff --git a/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.invalid.comp index f8a5f221b..48f285a9a 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.invalid.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/device-constant-array-load-store.force-native-array.asm.invalid.comp @@ -20,14 +20,22 @@ __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global Block* ssbo, SSBO ubo) { __local uint2 _18[2]; - ssbo[0u]._m0 = ssbo[0u]._m1; - ssbo[0u]._m0 = ubo._m0[0u]._m1; + ssbo[0u]._m0[0] = ssbo[0u]._m1[0]; + ssbo[0u]._m0[1] = ssbo[0u]._m1[1]; + ssbo[0u]._m0[0] = ubo._m0[0u]._m1[0]; + ssbo[0u]._m0[1] = ubo._m0[0u]._m1[1]; uint2 _23[2]; - ssbo[0u]._m0 = _23; - ssbo[0u]._m0 = _18; - _18 = ssbo[0u]._m1; - _23 = ssbo[0u]._m1; - _18 = ubo._m0[0u]._m1; - _23 = ubo._m0[0u]._m1; + ssbo[0u]._m0[0] = _23[0]; + ssbo[0u]._m0[1] = _23[1]; + ssbo[0u]._m0[0] = _18[0]; + ssbo[0u]._m0[1] = _18[1]; + _18[0] = ssbo[0u]._m1[0]; + _18[1] = ssbo[0u]._m1[1]; + _23[0] = ssbo[0u]._m1[0]; + _23[1] = ssbo[0u]._m1[1]; + _18[0] = ubo._m0[0u]._m1[0]; + _18[1] = ubo._m0[0u]._m1[1]; + _23[0] = ubo._m0[0u]._m1[0]; + _23[1] = ubo._m0[0u]._m1[1]; } diff --git a/reference/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.asm.comp new file mode 100644 index 000000000..d0c505a04 --- /dev/null +++ b/reference/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.asm.comp @@ -0,0 +1,53 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Data +{ + float3 sourceData[16]; +}; + +typedef struct Data Data; + +__attribute__((reqd_work_group_size(8, 8, 1))) +__kernel void comp_main(read_only image2d_t g_inputTexture, write_only image2d_t g_output) +{ + __local Data g_data[64]; + uint _49; + _49 = 0u; + for (; _49 < 4u; _49++) + { + for (uint _56 = 0u; _56 < 4u; ) + { + g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[(_49 * 4u) + _56] = read_imagef(g_inputTexture, (as_int3(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2)))) + (int3)(as_int(_56), as_int(_49), 0)).xy).xyz; + _56++; + continue; + } + } + float3 _45[16]; + _45[0] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[0]; + _45[1] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[1]; + _45[2] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[2]; + _45[3] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[3]; + _45[4] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[4]; + _45[5] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[5]; + _45[6] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[6]; + _45[7] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[7]; + _45[8] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[8]; + _45[9] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[9]; + _45[10] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[10]; + _45[11] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[11]; + _45[12] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[12]; + _45[13] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[13]; + _45[14] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[14]; + _45[15] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[15]; + uint _77; + _77 = 0u; + for (int _80 = 0; _80 < 16; ) + { + _77 |= convert_uint(clamp(dot(_45[_80], (float3)(-1.0f)), 0.0f, 1.0f)); + _80++; + continue; + } + write_imageui(g_output, as_int2(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).xy), (uint4)(_77)); +} + diff --git a/reference/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp deleted file mode 100644 index cedb4d5d6..000000000 --- a/reference/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp +++ /dev/null @@ -1,38 +0,0 @@ -// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) - - -struct Data -{ - float3 sourceData[16]; -}; - -typedef struct Data Data; - -__attribute__((reqd_work_group_size(8, 8, 1))) -__kernel void comp_main(read_only image2d_t g_inputTexture, write_only image2d_t g_output) -{ - __local Data g_data[64]; - uint _49; - _49 = 0u; - for (; _49 < 4u; _49++) - { - for (uint _56 = 0u; _56 < 4u; ) - { - int3 _65 = as_int3(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2)))) + (int3)(as_int(_56), as_int(_49), 0); - g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData[(_49 * 4u) + _56] = texelFetch(g_inputTexture, _65.xy, _65.z).xyz; - _56++; - continue; - } - } - float3 _45[16] = g_data[((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x].sourceData; - uint _77; - _77 = 0u; - for (int _80 = 0; _80 < 16; ) - { - _77 |= convert_uint(clamp(dot(_45[_80], (float3)(-1.0f)), 0.0f, 1.0f)); - _80++; - continue; - } - write_imageui(g_output, as_int2(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).xy), (uint4)(_77)); -} - diff --git a/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.asm.comp similarity index 93% rename from reference/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp rename to reference/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.asm.comp index 952585e08..4641626ae 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.asm.comp @@ -21,7 +21,7 @@ constant uint3 spvWorkgroupSize = (uint3)(_15, 2u, _17); __attribute__((reqd_work_group_size(1, 2, 3))) __kernel void comp_main(__global float* _20, __global float* _21) { - uint3 _19 = spvWorkgroupSize = spvWorkgroupSize; + uint3 _19 = spvWorkgroupSize; _20[((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x] = _21[((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x] + _20[((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))).x]; } diff --git a/reference/shaders-opencl-no-opt/asm/comp/workgroup-uint-to-uchar-alias-ptr-access-chain.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/workgroup-uint-to-uchar-alias-ptr-access-chain.asm.invalid.comp index c6c583b90..2f3e9217c 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/workgroup-uint-to-uchar-alias-ptr-access-chain.asm.invalid.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/workgroup-uint-to-uchar-alias-ptr-access-chain.asm.invalid.comp @@ -43,7 +43,7 @@ __kernel void comp_main(__global uint4* _25, _8 _29) { __local uint _5[256]; __local uchar _10[1024]; - uint3 _20 = spvWorkgroupSize = spvWorkgroupSize; + uint3 _20 = spvWorkgroupSize; bool _40 = _29._m0._m0 != 0u; if (_40) { diff --git a/reference/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp b/reference/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp index e69de29bb..497606109 100644 --- a/reference/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp +++ b/reference/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp @@ -0,0 +1,23 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct D +{ + float data_d[1]; +}; + +typedef struct D D; + +struct A +{ + float data_a[1]; +}; + +typedef struct A A; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global float* d, __global const float* a) +{ + d[0][0] = a[0][0]; +} + diff --git a/reference/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.comp b/reference/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.comp new file mode 100644 index 000000000..3ceaf1450 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.comp @@ -0,0 +1,42 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +__attribute__((reqd_work_group_size(8, 1, 1))) +__kernel void comp_main() +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + __local float shared_group[8][8]; + __local float shared_group_alt[8][8]; + float blob[8]; + for (int i = 0; i < 8; i++) + { + blob[i] = convert_float(i); + } + shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][0] = blob[0]; + shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][1] = blob[1]; + shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][2] = blob[2]; + shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][3] = blob[3]; + shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][4] = blob[4]; + shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][5] = blob[5]; + shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][6] = blob[6]; + shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][7] = blob[7]; + barrier(CLK_LOCAL_MEM_FENCE); + float copied_blob[8]; + copied_blob[0] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))) ^ 1u][0]; + copied_blob[1] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))) ^ 1u][1]; + copied_blob[2] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))) ^ 1u][2]; + copied_blob[3] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))) ^ 1u][3]; + copied_blob[4] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))) ^ 1u][4]; + copied_blob[5] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))) ^ 1u][5]; + copied_blob[6] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))) ^ 1u][6]; + copied_blob[7] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))) ^ 1u][7]; + shared_group_alt[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][0] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][0]; + shared_group_alt[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][1] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][1]; + shared_group_alt[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][2] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][2]; + shared_group_alt[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][3] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][3]; + shared_group_alt[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][4] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][4]; + shared_group_alt[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][5] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][5]; + shared_group_alt[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][6] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][6]; + shared_group_alt[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][7] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))][7]; +} + diff --git a/reference/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp b/reference/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp deleted file mode 100644 index 33bdcbea5..000000000 --- a/reference/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp +++ /dev/null @@ -1,20 +0,0 @@ -// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) - - -__attribute__((reqd_work_group_size(8, 1, 1))) -__kernel void comp_main() -{ - uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); - __local float shared_group[8][8]; - __local float shared_group_alt[8][8]; - float blob[8]; - for (int i = 0; i < 8; i++) - { - blob[i] = convert_float(i); - } - shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = blob; - barrier(CLK_LOCAL_MEM_FENCE); - float copied_blob[8] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0))) ^ 1u]; - shared_group_alt[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))] = shared_group[((uint)(get_local_id(2) * get_local_size(0) * get_local_size(1) + get_local_id(1) * get_local_size(0) + get_local_id(0)))]; -} - diff --git a/reference/shaders-opencl-no-opt/comp/basic.comp b/reference/shaders-opencl-no-opt/comp/basic.comp new file mode 100644 index 000000000..35091de9a --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/basic.comp @@ -0,0 +1,36 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct Baz +{ + int e; + int f; +}; + +typedef struct Baz Baz; + +struct Foo +{ + int a; + int b; +}; + +typedef struct Foo Foo; + +struct Bar +{ + int c; + int d; +}; + +typedef struct Bar Bar; + +__attribute__((reqd_work_group_size(3, 3, 2))) +__kernel void comp_main(__global Baz* baz, Foo _34, Bar _40) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint3 coords = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))); + baz[(coords.x + coords.y) + coords.z].e = _34.a + _40.c; + baz[(coords.x + coords.y) + coords.z].f = _34.b * _40.d; +} + diff --git a/reference/shaders-opencl-no-opt/comp/basic.invalid.comp b/reference/shaders-opencl-no-opt/comp/basic.invalid.comp deleted file mode 100644 index e69de29bb..000000000 diff --git a/reference/shaders-opencl-no-opt/comp/bda-atomics.invalid.comp b/reference/shaders-opencl-no-opt/comp/bda-atomics.comp similarity index 90% rename from reference/shaders-opencl-no-opt/comp/bda-atomics.invalid.comp rename to reference/shaders-opencl-no-opt/comp/bda-atomics.comp index 5b9a08f9b..a3250fb5d 100644 --- a/reference/shaders-opencl-no-opt/comp/bda-atomics.invalid.comp +++ b/reference/shaders-opencl-no-opt/comp/bda-atomics.comp @@ -33,7 +33,7 @@ struct SSBO typedef struct SSBO SSBO; __attribute__((reqd_work_group_size(1, 1, 1))) -__kernel void comp_main(Registers _12, UBO _26, __global const __global Ptr** _35) +__kernel void comp_main(Registers _12, UBO _26, __global const ulong* _35) { uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); uint _23 = atomic_add(&((__global Ptr*)(_12.ptr))->i, 10u); diff --git a/reference/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.invalid.comp b/reference/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.comp similarity index 94% rename from reference/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.invalid.comp rename to reference/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.comp index cb3ef0331..381ef3667 100644 --- a/reference/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.invalid.comp +++ b/reference/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.comp @@ -12,7 +12,7 @@ typedef struct SSBO SSBO; struct UBO { - __global SSBO* ptrs[2]; + ulong ptrs[2]; }; typedef struct UBO UBO; diff --git a/reference/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.invalid.comp b/reference/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.comp similarity index 81% rename from reference/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.invalid.comp rename to reference/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.comp index 5d82fb4d5..098f8fd5c 100644 --- a/reference/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.invalid.comp +++ b/reference/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.comp @@ -20,7 +20,7 @@ typedef struct Registers Registers; __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(Registers _14) { - restrict __global Ref* __restrict ref = ((__global Ref*)(_14.foo)); + __global Ref* __restrict ref = ((__global Ref*)(_14.foo)); ref->v = (float4)(1.0f); } diff --git a/reference/shaders-opencl-no-opt/comp/extract-atomics-from-function.invalid.comp b/reference/shaders-opencl-no-opt/comp/extract-atomics-from-function.comp similarity index 100% rename from reference/shaders-opencl-no-opt/comp/extract-atomics-from-function.invalid.comp rename to reference/shaders-opencl-no-opt/comp/extract-atomics-from-function.comp index 016fbcd95..f1da5e503 100644 --- a/reference/shaders-opencl-no-opt/comp/extract-atomics-from-function.invalid.comp +++ b/reference/shaders-opencl-no-opt/comp/extract-atomics-from-function.comp @@ -1,7 +1,6 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) -#define var (*var_ptr) #define var (*var_ptr) void testAdd(__local uint* var_ptr) { @@ -66,6 +65,7 @@ void testStore(__local uint* var_ptr) #undef var +#define var (*var_ptr) void foo(__local uint* var_ptr) { testAdd(&var); diff --git a/reference/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.invalid.comp b/reference/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.comp similarity index 55% rename from reference/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.invalid.comp rename to reference/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.comp index 7ee5a5f89..5191bd38c 100644 --- a/reference/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.invalid.comp +++ b/reference/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.comp @@ -16,34 +16,34 @@ typedef struct BUF0 BUF0; void test_u16(__global BUF0* _24) { - _24->f16 += as_half(ushort(_24->u16[0u] + _24->u16[1u])); - _24->f16 += as_half(ushort(_24->u16[0u] - _24->u16[1u])); - _24->f16 += as_half(ushort(_24->u16[0u] * _24->u16[1u])); - _24->f16 += as_half(ushort(_24->u16[0u] / _24->u16[1u])); - _24->f16 += as_half(ushort(_24->u16[0u] % _24->u16[1u])); - _24->f16 += as_half(ushort(_24->u16[0u] << _24->u16[1u])); - _24->f16 += as_half(ushort(_24->u16[0u] >> _24->u16[1u])); - _24->f16 += as_half(ushort(~_24->u16[0u])); - _24->f16 += as_half(ushort(-_24->u16[0u])); - _24->f16 += as_half(ushort(_24->u16[0u] ^ _24->u16[1u])); - _24->f16 += as_half(ushort(_24->u16[0u] & _24->u16[1u])); - _24->f16 += as_half(ushort(_24->u16[0u] | _24->u16[1u])); + _24->f16 += as_half((ushort)(_24->u16[0u] + _24->u16[1u])); + _24->f16 += as_half((ushort)(_24->u16[0u] - _24->u16[1u])); + _24->f16 += as_half((ushort)(_24->u16[0u] * _24->u16[1u])); + _24->f16 += as_half((ushort)(_24->u16[0u] / _24->u16[1u])); + _24->f16 += as_half((ushort)(_24->u16[0u] % _24->u16[1u])); + _24->f16 += as_half((ushort)(_24->u16[0u] << _24->u16[1u])); + _24->f16 += as_half((ushort)(_24->u16[0u] >> _24->u16[1u])); + _24->f16 += as_half((ushort)(~_24->u16[0u])); + _24->f16 += as_half((ushort)(-_24->u16[0u])); + _24->f16 += as_half((ushort)(_24->u16[0u] ^ _24->u16[1u])); + _24->f16 += as_half((ushort)(_24->u16[0u] & _24->u16[1u])); + _24->f16 += as_half((ushort)(_24->u16[0u] | _24->u16[1u])); } void test_i16(__global BUF0* _24) { - _24->f16 += as_half(short(_24->i16[0u] + _24->i16[1u])); - _24->f16 += as_half(short(_24->i16[0u] - _24->i16[1u])); - _24->f16 += as_half(short(_24->i16[0u] * _24->i16[1u])); - _24->f16 += as_half(short(_24->i16[0u] / _24->i16[1u])); - _24->f16 += as_half(short(_24->i16[0u] % _24->i16[1u])); - _24->f16 += as_half(short(_24->i16[0u] << _24->i16[1u])); - _24->f16 += as_half(short(_24->i16[0u] >> _24->i16[1u])); - _24->f16 += as_half(short(~_24->i16[0u])); - _24->f16 += as_half(short(-_24->i16[0u])); - _24->f16 += as_half(short(_24->i16[0u] ^ _24->i16[1u])); - _24->f16 += as_half(short(_24->i16[0u] & _24->i16[1u])); - _24->f16 += as_half(short(_24->i16[0u] | _24->i16[1u])); + _24->f16 += as_half((short)(_24->i16[0u] + _24->i16[1u])); + _24->f16 += as_half((short)(_24->i16[0u] - _24->i16[1u])); + _24->f16 += as_half((short)(_24->i16[0u] * _24->i16[1u])); + _24->f16 += as_half((short)(_24->i16[0u] / _24->i16[1u])); + _24->f16 += as_half((short)(_24->i16[0u] % _24->i16[1u])); + _24->f16 += as_half((short)(_24->i16[0u] << _24->i16[1u])); + _24->f16 += as_half((short)(_24->i16[0u] >> _24->i16[1u])); + _24->f16 += as_half((short)(~_24->i16[0u])); + _24->f16 += as_half((short)(-_24->i16[0u])); + _24->f16 += as_half((short)(_24->i16[0u] ^ _24->i16[1u])); + _24->f16 += as_half((short)(_24->i16[0u] & _24->i16[1u])); + _24->f16 += as_half((short)(_24->i16[0u] | _24->i16[1u])); } void test_u16s(__global BUF0* _24) diff --git a/reference/shaders-opencl-no-opt/comp/int16min-literal.fp16.invalid.comp b/reference/shaders-opencl-no-opt/comp/int16min-literal.fp16.comp similarity index 92% rename from reference/shaders-opencl-no-opt/comp/int16min-literal.fp16.invalid.comp rename to reference/shaders-opencl-no-opt/comp/int16min-literal.fp16.comp index 4d3324d9e..ee9e25b09 100644 --- a/reference/shaders-opencl-no-opt/comp/int16min-literal.fp16.invalid.comp +++ b/reference/shaders-opencl-no-opt/comp/int16min-literal.fp16.comp @@ -21,7 +21,7 @@ __kernel void comp_main(UBO _12, __global half* _24) { uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); short v = as_short(_12.b); - v = short(v ^ (-32768s)); + v = (short)(v ^ (short)(-32768)); _24[0] = as_half(v); } diff --git a/reference/shaders-opencl-no-opt/comp/read-only-coherent-image.invalid.comp b/reference/shaders-opencl-no-opt/comp/read-only-coherent-image.comp similarity index 81% rename from reference/shaders-opencl-no-opt/comp/read-only-coherent-image.invalid.comp rename to reference/shaders-opencl-no-opt/comp/read-only-coherent-image.comp index 0e8f8174f..4fe8fbb98 100644 --- a/reference/shaders-opencl-no-opt/comp/read-only-coherent-image.invalid.comp +++ b/reference/shaders-opencl-no-opt/comp/read-only-coherent-image.comp @@ -9,7 +9,7 @@ struct SSBO typedef struct SSBO SSBO; __attribute__((reqd_work_group_size(1, 1, 1))) -__kernel void comp_main(__global uint* _9, write_only image2d_t img) +__kernel void comp_main(__global uint* _9, read_only image2d_t img) { uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); _9[0] = read_imageui(img, (int2)(10)).x; diff --git a/reference/shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp b/reference/shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp deleted file mode 100644 index e69de29bb..000000000 diff --git a/reference/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp b/reference/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp index f7d65805e..4ab76df67 100644 --- a/reference/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp +++ b/reference/shaders-opencl/asm/comp/image-load-store-short-vector.invalid.asm.comp @@ -1,18 +1,18 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) -void _main( uint3* id) +void _main( uint3* id_2) { - float2 loaded = read_imagef(TargetTexture, as_int2((*id).xy)).xy; - float2 storeTemp = loaded + (float2)(1.0f); - write_imagef(TargetTexture, as_int2((*id).xy + (uint2)(1u)), (float4)(storeTemp)); + float2 loaded_1 = read_imagef(TargetTexture, as_int2((*id_2).xy)).xy; + float2 storeTemp_1 = loaded_1 + (float2)(1.0f); + write_imagef(TargetTexture, as_int2((*id_2).xy + (uint2)(1u)), (float4)(storeTemp_1)); } __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(write_only image2d_t TargetTexture) { - uint3 id_1 = ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))); - uint3 param = id_1; - _main(¶m); + uint3 id_1_1 = ((uint3)(get_group_id(0), get_group_id(1), get_group_id(2))); + uint3 param_1 = id_1_1; + _main(¶m_1); } diff --git a/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.asm.comp similarity index 100% rename from shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.invalid.asm.comp rename to shaders-opencl-no-opt/asm/comp/array-copy-physical-layout-mismatch.asm.comp diff --git a/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.asm.comp similarity index 100% rename from shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.invalid.asm.comp rename to shaders-opencl-no-opt/asm/comp/bitcast-fp16-fp32.fp16.asm.comp diff --git a/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.asm.comp similarity index 100% rename from shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.invalid.asm.comp rename to shaders-opencl-no-opt/asm/comp/block-like-array-type-construct.asm.comp diff --git a/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.asm.comp similarity index 100% rename from shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.invalid.asm.comp rename to shaders-opencl-no-opt/asm/comp/groupshared-inner-array-of-struct-copy.asm.comp diff --git a/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/storage-buffer-basic.asm.comp similarity index 100% rename from shaders-opencl-no-opt/asm/comp/storage-buffer-basic.invalid.asm.comp rename to shaders-opencl-no-opt/asm/comp/storage-buffer-basic.asm.comp diff --git a/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp b/shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.comp similarity index 100% rename from shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.invalid.comp rename to shaders-opencl-no-opt/comp/array-copy-threadgroup-memory.comp diff --git a/shaders-opencl-no-opt/comp/basic.invalid.comp b/shaders-opencl-no-opt/comp/basic.comp similarity index 100% rename from shaders-opencl-no-opt/comp/basic.invalid.comp rename to shaders-opencl-no-opt/comp/basic.comp diff --git a/shaders-opencl-no-opt/comp/bda-atomics.invalid.comp b/shaders-opencl-no-opt/comp/bda-atomics.comp similarity index 100% rename from shaders-opencl-no-opt/comp/bda-atomics.invalid.comp rename to shaders-opencl-no-opt/comp/bda-atomics.comp diff --git a/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.invalid.comp b/shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.comp similarity index 100% rename from shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.invalid.comp rename to shaders-opencl-no-opt/comp/bda-load-std140-arrayed-pointer.comp diff --git a/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.invalid.comp b/shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.comp similarity index 100% rename from shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.invalid.comp rename to shaders-opencl-no-opt/comp/bda-restrict-pointer-variable.comp diff --git a/shaders-opencl-no-opt/comp/extract-atomics-from-function.invalid.comp b/shaders-opencl-no-opt/comp/extract-atomics-from-function.comp similarity index 100% rename from shaders-opencl-no-opt/comp/extract-atomics-from-function.invalid.comp rename to shaders-opencl-no-opt/comp/extract-atomics-from-function.comp diff --git a/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.invalid.comp b/shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.comp similarity index 100% rename from shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.invalid.comp rename to shaders-opencl-no-opt/comp/implicit-integer-promotion.fp16.comp diff --git a/shaders-opencl-no-opt/comp/int16min-literal.fp16.invalid.comp b/shaders-opencl-no-opt/comp/int16min-literal.fp16.comp similarity index 100% rename from shaders-opencl-no-opt/comp/int16min-literal.fp16.invalid.comp rename to shaders-opencl-no-opt/comp/int16min-literal.fp16.comp diff --git a/shaders-opencl-no-opt/comp/read-only-coherent-image.invalid.comp b/shaders-opencl-no-opt/comp/read-only-coherent-image.comp similarity index 100% rename from shaders-opencl-no-opt/comp/read-only-coherent-image.invalid.comp rename to shaders-opencl-no-opt/comp/read-only-coherent-image.comp diff --git a/shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp b/shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp deleted file mode 100644 index 0db56342c..000000000 --- a/shaders-opencl-no-opt/comp/simple-bindless-ssbo.argument.argument-tier-1.device-argument-buffer.invalid.comp +++ /dev/null @@ -1,13 +0,0 @@ -#version 450 -#extension GL_EXT_nonuniform_qualifier : require -layout(local_size_x = 1) in; - -layout(set = 0, binding = 0) buffer SSBO -{ - vec4 a; -} ssbos[]; - -void main() -{ - ssbos[gl_WorkGroupID.x].a += 2.0; -} diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 66ab8c560..e782463c7 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -7122,8 +7122,12 @@ void CompilerGLSL::emit_unary_op_cast(uint32_t result_type, uint32_t result_id, { auto &type = get(result_type); bool forward = should_forward(op0); - emit_op(result_type, result_id, join(type_to_glsl(type), "(", op, to_enclosed_unpacked_expression(op0), ")"), - forward); + if (backend.c_style_casts) + emit_op(result_type, result_id, + join("(", type_to_glsl(type), ")(", op, to_enclosed_unpacked_expression(op0), ")"), forward); + else + emit_op(result_type, result_id, join(type_to_glsl(type), "(", op, to_enclosed_unpacked_expression(op0), ")"), + forward); inherit_expression_dependencies(result_id, op0); } @@ -7291,7 +7295,10 @@ void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, if (implicit_integer_promotion) { // Simple value cast. - expr = join(type_to_glsl(out_type), '(', bitop, ')'); + if (backend.c_style_casts) + expr = join("(", type_to_glsl(out_type), ")(", bitop, ")"); + else + expr = join(type_to_glsl(out_type), '(', bitop, ')'); } else if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) { @@ -13779,7 +13786,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (implicit_integer_promotion) { - expr = join(type_to_glsl(get(result_type)), '(', expr, ')'); + if (backend.c_style_casts) + expr = join("(", type_to_glsl(get(result_type)), ")(", expr, ")"); + else + expr = join(type_to_glsl(get(result_type)), '(', expr, ')'); } else if (out_type.basetype != int_type) { diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index 70c93fcd4..a6b564461 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -667,6 +667,9 @@ class CompilerGLSL : public Compiler bool workgroup_size_is_hidden = false; bool requires_relaxed_precision_analysis = false; bool implicit_c_integer_promotion_rules = false; + // When true, emit C-style casts "(type)(expr)" instead of GLSL constructor-style "type(expr)" + // for value casts (e.g., implicit integer promotion). + bool c_style_casts = false; bool supports_spec_constant_array_size = true; // When non-empty, matrix column access uses this member name instead of raw array indexing. // e.g., "columns" -> m.columns[i] instead of m[i]. diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp index b366ec9c6..cb2be10aa 100644 --- a/spirv_opencl.cpp +++ b/spirv_opencl.cpp @@ -78,8 +78,8 @@ string CompilerOpenCL::compile() backend.float_literal_suffix = true; backend.double_literal_suffix = true; backend.uint32_t_literal_suffix = true; - backend.int16_t_literal_suffix = "s"; - backend.uint16_t_literal_suffix = "us"; + backend.int16_t_literal_suffix = ""; + backend.uint16_t_literal_suffix = ""; backend.basic_int_type = "int"; backend.basic_uint_type = "uint"; backend.basic_int8_type = "char"; @@ -111,12 +111,14 @@ string CompilerOpenCL::compile() backend.array_is_value_type_in_buffer_blocks = false; backend.support_pointer_to_pointer = true; backend.implicit_c_integer_promotion_rules = true; + backend.c_style_casts = true; backend.supports_spec_constant_array_size = false; backend.matrix_column_accessor = "columns"; fixup_anonymous_struct_names(); fixup_type_alias(); replace_illegal_names(); + fixup_image_load_store_access(); build_function_control_flow_graphs_and_analyze(); update_active_builtins(); analyze_image_and_sampler_usage(); @@ -216,6 +218,21 @@ const char *CompilerOpenCL::to_storage_qualifiers_glsl(const SPIRVariable &) void CompilerOpenCL::compute_kernel_resources() { + // OpenCL C uses __restrict (after *) instead of GLSL's restrict (before type). + // Convert DecorationRestrictPointerEXT → DecorationRestrict so the base class + // flags_to_qualifiers_glsl does not emit "restrict " prefix, and our to_restrict + // emits "__restrict" after the pointer star instead. + ir.for_each_typed_id( + [&](uint32_t, SPIRVariable &var) + { + auto &flags = get_decoration_bitset(var.self); + if (flags.get(DecorationRestrictPointerEXT)) + { + unset_decoration(var.self, DecorationRestrictPointerEXT); + set_decoration(var.self, DecorationRestrict); + } + }); + // Collect all SSBOs/BufferBlocks that get flattened to __global T* kernel parameters. flattened_buffer_vars.clear(); flattened_var_type_decl.clear(); @@ -234,7 +251,11 @@ void CompilerOpenCL::compute_kernel_resources() if (type.basetype == SPIRType::Struct && type.member_types.size() == 1) { const auto &member0_type = get(type.member_types.front()); - subtype = type_to_glsl(member0_type); + // BDA pointer members are stored as ulong in structs. + if (is_pointer(member0_type) && member0_type.storage == StorageClassPhysicalStorageBuffer) + subtype = "ulong"; + else + subtype = type_to_glsl(member0_type); } else { @@ -951,10 +972,9 @@ void CompilerOpenCL::emit_entry_point_declarations() if (var.storage == StorageClassPrivate && !is_hidden_variable(var, true)) { add_local_variable_name(var.self); - string initializer; - if (var.initializer) - initializer = join(" = ", to_expression(var.initializer)); - statement(CompilerGLSL::variable_decl(var), initializer, ";"); + // CompilerGLSL::variable_decl(var) already includes the initializer + // expression (via to_initializer_expression), so no extra initializer needed. + statement(CompilerGLSL::variable_decl(var), ";"); } } @@ -1137,9 +1157,8 @@ const char *CompilerOpenCL::to_restrict(uint32_t id, bool space) else flags = get_decoration_bitset(id); - // Only check DecorationRestrict here. DecorationRestrictPointerEXT is handled by - // flags_to_qualifiers_glsl in the GLSL base (emits "restrict " prefix), so we - // don't duplicate it as "__restrict" after the pointer star. + // DecorationRestrictPointerEXT is converted to DecorationRestrict in + // compute_kernel_resources(), so only check DecorationRestrict here. return flags.get(DecorationRestrict) ? (space ? "__restrict " : "__restrict") : ""; } @@ -1829,17 +1848,9 @@ std::string CompilerOpenCL::to_initializer_expression(const SPIRVariable &var) // (e.g., `float a[5] = ssbo->b;` is not valid C). // For array variables with non-constant initializers, emit zero init `{ 0 }` and // schedule element-by-element copy after the declaration. - auto &type = get_variable_data_type(var); - if (is_array(type) && var.initializer) - { - // Check if the initializer is a constant — those are fine as-is. - if (ir.ids[var.initializer].get_type() != TypeConstant) - { - // Queue the initializer for post-declaration element-by-element copy. - pending_array_copies.push_back({ var.self, var.initializer }); - return "{ 0 }"; - } - } + // SPIR-V spec only allows constant initializers on OpVariable, so array + // initializers are always constants and valid as-is in OpenCL C. + // Non-constant array copies are handled by emit_store_statement (OpStore). return CompilerGLSL::to_initializer_expression(var); } @@ -2513,6 +2524,26 @@ std::string CompilerOpenCL::bitcast_glsl_op(const SPIRType &out_type, const SPIR return "as_" + out_name; } +bool CompilerOpenCL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) +{ + auto &output_type = get(result_type); + auto &input_type = expression_type(op0); + string expr; + + // float → half2 bitcast: as_half2(expr) + if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1) + expr = join("as_half2(", to_unpacked_expression(op0), ")"); + // half2 → float bitcast: as_float(expr) + else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half && + input_type.vecsize == 2) + expr = join("as_float(", to_unpacked_expression(op0), ")"); + else + return false; + + emit_op(result_type, id, expr, should_forward(op0)); + return true; +} + // Task #7: In OpenCL C, atomic functions take a pointer argument. // Access chain expressions (access_chain = true) may be C lvalues (e.g. ssbo->u32) → need &. // But single-member flattened SSBOs emit the raw pointer itself (e.g. _48 is __global uint*) @@ -2728,6 +2759,12 @@ std::string CompilerOpenCL::entry_point_args(bool append_comma) { // Use type of first struct member as a StructuredBuffer will have only one '._m0' field in SPIR-V const auto &member0_type = this->get(parent_type.member_types.front()); + // If the sole member is a BDA pointer, type_to_glsl would return + // `__global Ptr*` which, wrapped in `__global const X*`, yields + // double `__global` and pointer-to-pointer. Flatten to `ulong` + // instead, matching how emit_struct_member stores BDA pointers. + if (is_pointer(member0_type) && member0_type.storage == StorageClassPhysicalStorageBuffer) + return std::string("ulong"); return this->type_to_glsl(member0_type); } else @@ -2896,6 +2933,24 @@ void CompilerOpenCL::emit_function_prototype(SPIRFunction &func, const Bitset &r } decl += ")"; + + // Emit #define macros right before the function prototype for workgroup scalar pointer aliasing. + // This must happen here (not in emit_function) because CompilerGLSL::emit_function recursively + // emits callee functions before reaching emit_function_prototype, so #define in emit_function + // would be undone by callee #undef before this function's body is emitted. + auto wg_it = func_workgroup_args.find(func.self); + if (wg_it != func_workgroup_args.end()) + { + for (auto var_id : wg_it->second) + { + if (workgroup_scalar_vars.count(var_id)) + { + auto var_name = to_name(var_id); + statement("#define ", var_name, " (*", var_name, "_ptr)"); + } + } + } + statement(decl); } @@ -2933,33 +2988,24 @@ void CompilerOpenCL::append_global_func_args(const SPIRFunction &func, uint32_t void CompilerOpenCL::emit_function(SPIRFunction &func, const Bitset &return_flags) { - // Emit #define macros before the function for workgroup scalar pointer aliasing. + CompilerGLSL::emit_function(func, return_flags); + + // Emit #undef after the function body. + // The matching #define is emitted in emit_function_prototype. auto wg_it = func_workgroup_args.find(func.self); - bool has_defines = false; if (wg_it != func_workgroup_args.end()) { + bool has_defines = false; for (auto var_id : wg_it->second) { if (workgroup_scalar_vars.count(var_id)) { - auto var_name = to_name(var_id); - statement("#define ", var_name, " (*", var_name, "_ptr)"); + statement("#undef ", to_name(var_id)); has_defines = true; } } - } - - CompilerGLSL::emit_function(func, return_flags); - - // Emit #undef after the function. - if (has_defines) - { - for (auto var_id : wg_it->second) - { - if (workgroup_scalar_vars.count(var_id)) - statement("#undef ", to_name(var_id)); - } - statement(""); + if (has_defines) + statement(""); } } @@ -3020,6 +3066,35 @@ void CompilerOpenCL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_ } else { + // Check if storing an array type — C does not allow `array = expr;`. + auto &rhs_type_raw = expression_type(rhs_expression); + auto &rhs_type = is_pointer(rhs_type_raw) ? get_pointee_type(rhs_type_raw) : rhs_type_raw; + if (is_array(rhs_type)) + { + auto *var = maybe_get(lhs_expression); + // For deferred declarations where the RHS is a composite construct + // (not loaded from memory), C99 allows `T arr[N] = { ... };`. + // Let the base class handle that case — it merges decl + init correctly. + auto *rhs_expr_node = maybe_get(rhs_expression); + bool rhs_from_memory = rhs_expr_node && rhs_expr_node->loaded_from; + if (var && var->deferred_declaration && !rhs_from_memory) + { + // Base class will emit `T arr[N] = { ... };` + CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression); + return; + } + + // Flush deferred declaration so we don't get "float a[5] = rhs". + if (var && var->deferred_declaration) + { + var->deferred_declaration = false; + statement(variable_decl_function_local(*var), ";"); + } + auto lhs_expr = to_dereferenced_expression(lhs_expression); + emit_array_copy(lhs_expr.c_str(), 0, rhs_expression, StorageClassFunction, StorageClassFunction); + register_write(lhs_expression); + return; + } CompilerGLSL::emit_store_statement(lhs_expression, rhs_expression); } } @@ -3031,19 +3106,20 @@ void CompilerOpenCL::emit_struct_member(const SPIRType &type, uint32_t member_ty // OpenCL C does not use GLSL layout qualifiers or interpolation qualifiers. // PhysicalStorageBuffer pointers in structs must be emitted as ulong since // OpenCL C does not allow pointer types in kernel parameter structs. - if (is_pointer(membertype) && membertype.storage == StorageClassPhysicalStorageBuffer) + // Walk through array dimensions to find the inner element type, so that + // array-of-pointer members (e.g. `Ptr* ptrs[2]`) are also caught. + auto *inner = &membertype; + while (is_array(*inner)) + inner = &get(inner->parent_type); + if (is_pointer(*inner) && inner->storage == StorageClassPhysicalStorageBuffer) { - statement(qualifier, "ulong ", to_member_name(type, index), ";"); + statement(qualifier, "ulong ", to_member_name(type, index), type_to_array_glsl(membertype, 0), ";"); } else if (has_member_decoration(type.self, index, DecorationRowMajor)) { // Row-major matrix: the physical layout has transposed dimensions. // Emit the member with the physical (transposed) type so struct layout matches buffer. - // Walk through array nesting to find the inner matrix type. - const auto *inner = &membertype; - while (is_array(*inner)) - inner = &get(inner->parent_type); - + // `inner` already points to the innermost non-array type from the BDA check above. if (inner->columns > 1) { auto phys_type_name = opencl_matrix_type_name(inner->basetype, inner->columns, inner->vecsize); @@ -3262,9 +3338,19 @@ bool CompilerOpenCL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t return true; } - // Emit element-by-element copy - for (uint32_t i = 0; i < array_size; i++) - statement(lhs, "[", i, "] = ", rhs_expr, "[", i, "];"); + // For constant RHS, `to_expression` returns `{ 1.0f, 2.0f, ... }` and + // subscripting that (`{ ... }[0]`) is not valid C. Extract sub-constants. + auto *constant = maybe_get(rhs_id); + if (constant && !constant->subconstants.empty()) + { + for (uint32_t i = 0; i < array_size; i++) + statement(lhs, "[", i, "] = ", to_expression(constant->subconstants[i]), ";"); + } + else + { + for (uint32_t i = 0; i < array_size; i++) + statement(lhs, "[", i, "] = ", rhs_expr, "[", i, "];"); + } return true; } @@ -4164,6 +4250,7 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) string expr; bool handled = false; + int32_t row_major_mbr_idx = -1; // member index for row-major check, -1 if N/A bool is_subscript_deref = false; // result is a C value (subscripted), not a pointer @@ -4175,6 +4262,7 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) // ops[5+] = optional sub-member indices expr = join(to_name(base_id), "[", to_expression(ops[4]), "]"); is_subscript_deref = true; + row_major_mbr_idx = 0; // single member, always index 0 // Walk additional sub-member indices using type info. if (length >= 6 && struct_type) { @@ -4204,6 +4292,7 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) auto mbr_name = to_member_name(*struct_type, mbr_idx); expr = join(to_name(base_id), "[", to_expression(ops[3]), "].", mbr_name); is_subscript_deref = true; + row_major_mbr_idx = int32_t(mbr_idx); handled = true; } else if (length == 5 && !is_single_member && struct_type) @@ -4214,6 +4303,7 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) auto mbr_name = to_member_name(*struct_type, mbr_idx); expr = join(to_name(base_id), "->", mbr_name, "[", to_expression(ops[4]), "]"); is_subscript_deref = true; + row_major_mbr_idx = int32_t(mbr_idx); handled = true; } else if (length == 4 && is_single_member) @@ -4221,6 +4311,7 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) // Single-member SSBO flattened to T*: accessing the one member gives element 0. expr = join(to_name(base_id), "[0]"); is_subscript_deref = true; + row_major_mbr_idx = 0; // single member, always index 0 handled = true; } else if (length == 4 && !is_single_member && struct_type && !struct_type->array.empty()) @@ -4237,6 +4328,7 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) auto mbr_name = to_member_name(*struct_type, mbr_idx); expr = join(to_name(base_id), "->", mbr_name); is_subscript_deref = true; // result is a struct value (accessed through ->), use . for children + row_major_mbr_idx = int32_t(mbr_idx); handled = true; } @@ -4250,10 +4342,9 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) subscripted_deref_exprs.insert(result_id); // Propagate row-major transpose flag for matrix members. - if (struct_type && length >= 4) + if (struct_type && row_major_mbr_idx >= 0) { - uint32_t mbr_idx = get(ops[3]).scalar(); - if (member_is_non_native_row_major_matrix(*struct_type, mbr_idx)) + if (member_is_non_native_row_major_matrix(*struct_type, uint32_t(row_major_mbr_idx))) e.need_transpose = true; } @@ -4554,6 +4645,46 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) break; } + // OpImageFetch (texelFetch in GLSL) maps to read_image* in OpenCL C, + // same as OpImageRead but may carry a Lod operand (which we ignore + // since OpenCL images don't support LOD on read). + case OpImageFetch: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t image_id = ops[2]; + uint32_t coord_id = ops[3]; + + auto &result_spirtype = get(result_type); + const char *read_func; + switch (result_spirtype.basetype) + { + case SPIRType::UInt: + read_func = "read_imageui"; + break; + case SPIRType::Int: + read_func = "read_imagei"; + break; + default: + read_func = "read_imagef"; + break; + } + + // Convert coordinate to int. + auto coord_type = expression_type(coord_id); + coord_type.basetype = SPIRType::Int; + auto coord_expr = bitcast_expression(coord_type, expression_type(coord_id).basetype, to_expression(coord_id)); + + auto raw_expr = join(read_func, "(", to_expression(image_id), ", ", coord_expr, ")"); + auto swizzled = remap_swizzle(result_spirtype, 4, raw_expr); + + bool forward = should_forward(image_id) && should_forward(coord_id); + emit_op(result_type, result_id, swizzled, forward); + inherit_expression_dependencies(result_id, image_id); + inherit_expression_dependencies(result_id, coord_id); + break; + } + // Task #10: Map image read/write/query ops to OpenCL C equivalents. case OpImageRead: { @@ -4562,6 +4693,18 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) uint32_t image_id = ops[2]; uint32_t coord_id = ops[3]; + // Unset NonReadable so image access qualifier deduction works correctly. + auto *image_var = maybe_get_backing_variable(image_id); + if (image_var) + { + auto &flags = get_decoration_bitset(image_var->self); + if (flags.get(DecorationNonReadable)) + { + unset_decoration(image_var->self, DecorationNonReadable); + force_recompile(); + } + } + auto &img_type = expression_type(image_id); // SubpassData is not supported; fall through to base class. if (img_type.image.dim == DimSubpassData) diff --git a/spirv_opencl.hpp b/spirv_opencl.hpp index 0303e55d9..94a7c5d35 100644 --- a/spirv_opencl.hpp +++ b/spirv_opencl.hpp @@ -125,6 +125,7 @@ class CompilerOpenCL : public CompilerGLSL std::string to_initializer_expression(const SPIRVariable &var) override; std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector) override; std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; + bool emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) override; std::string to_atomic_ptr_expression(uint32_t id) override; void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, uint32_t count) override; @@ -157,10 +158,6 @@ class CompilerOpenCL : public CompilerGLSL // These are C values (not pointers), so subsequent member accesses must use '.' not '->'. std::unordered_set subscripted_deref_exprs; - // Pending array copies from to_initializer_expression: { var_id, initializer_id } - // These are emitted as element-by-element copies after the variable declaration. - SmallVector> pending_array_copies; - // Set when packHalf2x16/unpackHalf2x16 polyfill helpers are needed. bool needs_half_pack_polyfill = false; bool needs_half_unpack_polyfill = false; From 82f29d2abeeb605b1521f920bad846c8f1749bbd Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Sat, 14 Mar 2026 14:42:10 -0700 Subject: [PATCH 09/16] OpenCL: Fixing another test --- ...w-maj-mtx-bypass-transpose.spv14.asm.comp} | 16 +---- .../asm/comp/variable-pointers-2.asm.comp | 2 +- .../comp/variable-pointers-3.invalid.asm.comp | 2 +- ...ariable-pointers-vector-to-scalar.asm.comp | 2 +- ...w-maj-mtx-bypass-transpose.spv14.asm.comp} | 0 spirv_opencl.cpp | 62 +++++++++++++------ spirv_opencl.hpp | 4 ++ 7 files changed, 54 insertions(+), 34 deletions(-) rename reference/shaders-opencl-no-opt/asm/comp/{opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp => opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp} (51%) rename shaders-opencl-no-opt/asm/comp/{opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp => opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp} (100%) diff --git a/reference/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp b/reference/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp similarity index 51% rename from reference/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp rename to reference/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp index 0f41e332f..0cd1801d4 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp @@ -20,16 +20,6 @@ struct _7 typedef struct _7 _7; -static spvMat4 spvTransposeMat4(spvMat4 m) -{ - spvMat4 r; - r.columns[0] = (float4)(m.columns[0].x, m.columns[1].x, m.columns[2].x, m.columns[3].x); - r.columns[1] = (float4)(m.columns[0].y, m.columns[1].y, m.columns[2].y, m.columns[3].y); - r.columns[2] = (float4)(m.columns[0].z, m.columns[1].z, m.columns[2].z, m.columns[3].z); - r.columns[3] = (float4)(m.columns[0].w, m.columns[1].w, m.columns[2].w, m.columns[3].w); - return r; -} - __attribute__((reqd_work_group_size(1, 1, 1))) __kernel void comp_main(__global _6* _2, __global _6* _3, __global uint* _4) { @@ -39,12 +29,12 @@ __kernel void comp_main(__global _6* _2, __global _6* _3, __global uint* _4) bool _40 = &_2->_m2 == &_3->_m2; uint _43 = _39 + 1u; _4[_39] = _40 ? 0u : 1u; - bool _46 = _40 ? &_2->_m2 : &_2->_m3 == _40 ? &_3->_m2 : &_3->_m3; + bool _46 = (_40 ? &_2->_m2 : &_2->_m3) == (_40 ? &_3->_m2 : &_3->_m3); uint _49 = _43 + 1u; _4[_43] = _46 ? 0u : 1u; uint _54 = _49 + 1u; - _4[_49] = (_46 ? &_2->_m2 : &_2->_m3 == &_2->_m0.columns[0u].x) ? 0u : 1u; - uint _56 = (&_2->_m0 == &spvTransposeMat4(_2->_m1)) ? 0u : 1u; + _4[_49] = ((_46 ? &_2->_m2 : &_2->_m3) == &((__global float*)&_2->_m0.columns[0u])[0u]) ? 0u : 1u; + uint _56 = (&_2->_m0 == &_2->_m1) ? 0u : 1u; uint _58 = _54 + 1u; _4[_54] = _56; _4[_58] = _56; diff --git a/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-2.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-2.asm.comp index dfbbef692..de0cfa47c 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-2.asm.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-2.asm.comp @@ -11,6 +11,6 @@ __kernel void comp_main() ((&((&test)[0u]))[0u])[1u + 2u] = _22; ((&test)[0u])[1u + 2u] = _22; ((&test)[0u])[3u] = _22; - ((&test)[0u])[2u + 1u].x = _21; + ((__local float*)&((&test)[0u])[2u])[0u + 1u] = _21; } diff --git a/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-3.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-3.invalid.asm.comp index 99ce6ceff..6c35b4042 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-3.invalid.asm.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-3.invalid.asm.comp @@ -7,6 +7,6 @@ __kernel void comp_main() uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); __local float2 test[64]; float _21 = convert_float(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x); - (true ? &((&test)[0u])[2u].x : &((&test)[0u])[2u].x)[1u] = _21; + (true ? &((__local float*)&((&test)[0u])[2u])[0u] : &((__local float*)&((&test)[0u])[2u])[0u])[1u] = _21; } diff --git a/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-vector-to-scalar.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-vector-to-scalar.asm.comp index 5af73eb76..e925200e9 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-vector-to-scalar.asm.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/variable-pointers-vector-to-scalar.asm.comp @@ -7,6 +7,6 @@ __kernel void comp_main() uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); __local float2 test[64]; float _21 = convert_float(((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x); - (*(true ? &test[1u] : &test[2u])).y = _21; + ((__local float*)&(*(true ? &test[1u] : &test[2u])))[1u] = _21; } diff --git a/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp b/shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp similarity index 100% rename from shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.invalid.comp rename to shaders-opencl-no-opt/asm/comp/opptrequal-row-maj-mtx-bypass-transpose.spv14.asm.comp diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp index cb2be10aa..ed7cfe633 100644 --- a/spirv_opencl.cpp +++ b/spirv_opencl.cpp @@ -2574,6 +2574,43 @@ std::string CompilerOpenCL::to_atomic_ptr_expression(uint32_t id) return to_expression(id); } +bool CompilerOpenCL::prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, + StorageClass storage, bool &is_packed) +{ + // In OpenCL C, you cannot take the address of a vector component (e.g. &vec.x is invalid). + // Cast the vector expression to a scalar pointer so that element access uses array indexing. + if (storage == StorageClassStorageBuffer || storage == StorageClassWorkgroup) + { + const char *addr_space = storage == StorageClassWorkgroup ? "__local" : "__global"; + expr = join("((", addr_space, " ", type_to_glsl(type), "*)&", enclose_expression(expr), ")"); + is_packed = true; + return true; + } + else + return false; +} + +void CompilerOpenCL::emit_binary_ptr_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1); + emit_op(result_type, result_id, join(to_ptr_expression(op0), " ", op, " ", to_ptr_expression(op1)), forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +string CompilerOpenCL::to_ptr_expression(uint32_t id, bool register_expression_read) +{ + auto *e = maybe_get(id); + // If need_transpose is set, bypass the transpose wrapper and use the raw expression, + // since we're taking the address and comparing pointers, not values. + auto expr = + enclose_expression(e && e->need_transpose ? e->expression : to_expression(id, register_expression_read)); + if (!should_dereference(id)) + expr = address_of_expression(expr); + return expr; +} + // Task #3: In OpenCL C, pointer-to-struct member access uses -> instead of . // ptr_chain_is_resolved == false means this is the first member access from the base. bool CompilerOpenCL::should_dereference(uint32_t id) @@ -4843,27 +4880,16 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) } case OpPtrEqual: + emit_binary_ptr_op(ops[0], ops[1], ops[2], ops[3], "=="); + break; + case OpPtrNotEqual: + emit_binary_ptr_op(ops[0], ops[1], ops[2], ops[3], "!="); + break; + case OpPtrDiff: - { - uint32_t result_type = ops[0]; - uint32_t result_id = ops[1]; - uint32_t op0 = ops[2]; - uint32_t op1 = ops[3]; - const char *op = ""; - if (opcode == OpPtrEqual) - op = "=="; - else if (opcode == OpPtrNotEqual) - op = "!="; - else if (opcode == OpPtrDiff) - op = "-"; - bool forward = should_forward(op0) && should_forward(op1); - emit_op(result_type, result_id, join(to_pointer_expression(op0), " ", op, " ", to_pointer_expression(op1)), - forward); - inherit_expression_dependencies(result_id, op0); - inherit_expression_dependencies(result_id, op1); + emit_binary_ptr_op(ops[0], ops[1], ops[2], ops[3], "-"); break; - } case OpSDot: case OpUDot: diff --git a/spirv_opencl.hpp b/spirv_opencl.hpp index 94a7c5d35..febc62bc9 100644 --- a/spirv_opencl.hpp +++ b/spirv_opencl.hpp @@ -144,6 +144,10 @@ class CompilerOpenCL : public CompilerGLSL void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override; void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier = "", uint32_t base_offset = 0) override; + void emit_binary_ptr_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op); + std::string to_ptr_expression(uint32_t id, bool register_expression_read = true); + bool prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, StorageClass storage, + bool &is_packed) override; Options opencl_options; From 6c9166411e26105fd6bf9b81a79918e59e29f744 Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Mon, 16 Mar 2026 10:44:11 -0700 Subject: [PATCH 10/16] OpenCL: subgroup implementation --- ...oups-arithmetic.nocompat.vk.subgroups.comp | 47 ++ ...ubgroups-ballot.nocompat.vk.subgroups.comp | 37 ++ ...subgroups-basic.nocompat.vk.subgroups.comp | 57 +++ ...roups-clustered.nocompat.vk.subgroups.comp | 99 ++++ ...ubgroups-rotate.nocompat.vk.subgroups.comp | 24 + ...huffle-relative.nocompat.vk.subgroups.comp | 24 + ...bgroups-shuffle.nocompat.vk.subgroups.comp | 24 + .../subgroups-vote.nocompat.vk.subgroups.comp | 28 ++ ....vk.opencl12.emulate-subgroup.invalid.comp | 0 ...at.vk.subgroup.fixed-subgroup.invalid.comp | 0 ...ubgroups.nocompat.vk.subgroup.invalid.comp | 0 ....nocompat.vk.subgroup.swizzle.invalid.comp | 0 ...oups-arithmetic.nocompat.vk.subgroups.comp | 47 ++ ...ubgroups-ballot.nocompat.vk.subgroups.comp | 36 ++ ...subgroups-basic.nocompat.vk.subgroups.comp | 72 +++ ...roups-clustered.nocompat.vk.subgroups.comp | 97 ++++ ...ubgroups-rotate.nocompat.vk.subgroups.comp | 20 + ...huffle-relative.nocompat.vk.subgroups.comp | 20 + ...bgroups-shuffle.nocompat.vk.subgroups.comp | 19 + .../subgroups-vote.nocompat.vk.subgroups.comp | 24 + ....vk.opencl12.emulate-subgroup.invalid.comp | 25 - ...at.vk.subgroup.fixed-subgroup.invalid.comp | 211 -------- ...ubgroups.nocompat.vk.subgroup.invalid.comp | 211 -------- ....nocompat.vk.subgroup.swizzle.invalid.comp | 211 -------- spirv_opencl.cpp | 470 +++++++++++++++++- spirv_opencl.hpp | 14 + test_shaders.py | 42 +- 27 files changed, 1171 insertions(+), 688 deletions(-) create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-shuffle-relative.nocompat.vk.subgroups.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups.comp delete mode 100644 reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp delete mode 100644 reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp delete mode 100644 reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp delete mode 100644 reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-shuffle-relative.nocompat.vk.subgroups.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups.comp delete mode 100644 shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp delete mode 100644 shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp delete mode 100644 shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp delete mode 100644 shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..916168719 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups.comp @@ -0,0 +1,47 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#pragma OPENCL EXTENSION cl_khr_subgroups : enable +#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable + +struct SSBO +{ + float fdat; + int idat; + uint udat; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global SSBO* _13) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float fadd_1 = sub_group_reduce_add(_13->fdat); + float fmul_1 = sub_group_non_uniform_reduce_mul(_13->fdat); + int iadd_1 = sub_group_reduce_add(_13->idat); + int imul_1 = sub_group_non_uniform_reduce_mul(_13->idat); + float fmin_v_1 = sub_group_reduce_min(_13->fdat); + float fmax_v_1 = sub_group_reduce_max(_13->fdat); + int smin_v_1 = sub_group_reduce_min(_13->idat); + int smax_v_1 = sub_group_reduce_max(_13->idat); + uint umin_v_1 = sub_group_reduce_min(_13->udat); + uint umax_v_1 = sub_group_reduce_max(_13->udat); + uint anded_1 = sub_group_non_uniform_reduce_and(_13->udat); + uint ored_1 = sub_group_non_uniform_reduce_or(_13->udat); + uint xored_1 = sub_group_non_uniform_reduce_xor(_13->udat); + int4 bval_1 = (uint4)(_13->udat) == (uint4)(42u); + int4 anded_b_1 = (int4)(sub_group_non_uniform_reduce_logical_and(bval_1.x), sub_group_non_uniform_reduce_logical_and(bval_1.y), sub_group_non_uniform_reduce_logical_and(bval_1.z), sub_group_non_uniform_reduce_logical_and(bval_1.w)); + int4 ored_b_1 = (int4)(sub_group_non_uniform_reduce_logical_or(bval_1.x), sub_group_non_uniform_reduce_logical_or(bval_1.y), sub_group_non_uniform_reduce_logical_or(bval_1.z), sub_group_non_uniform_reduce_logical_or(bval_1.w)); + int4 xored_b_1 = (int4)(sub_group_non_uniform_reduce_logical_xor(bval_1.x), sub_group_non_uniform_reduce_logical_xor(bval_1.y), sub_group_non_uniform_reduce_logical_xor(bval_1.z), sub_group_non_uniform_reduce_logical_xor(bval_1.w)); + float finc_add_1 = sub_group_scan_inclusive_add(_13->fdat); + float finc_mul_1 = sub_group_non_uniform_scan_inclusive_mul(_13->fdat); + int iinc_add_1 = sub_group_scan_inclusive_add(_13->idat); + int iinc_mul_1 = sub_group_non_uniform_scan_inclusive_mul(_13->idat); + float fexc_add_1 = sub_group_scan_exclusive_add(_13->fdat); + float fexc_mul_1 = sub_group_non_uniform_scan_exclusive_mul(_13->fdat); + int iexc_add_1 = sub_group_scan_exclusive_add(_13->idat); + int iexc_mul_1 = sub_group_non_uniform_scan_exclusive_mul(_13->idat); + _13->fdat = (((((((((((fadd_1 + fmul_1) + fmin_v_1) + fmax_v_1) + finc_add_1) + finc_mul_1) + fexc_add_1) + fexc_mul_1) + convert_float(((((((iadd_1 + imul_1) + smin_v_1) + smax_v_1) + iinc_add_1) + iinc_mul_1) + iexc_add_1) + iexc_mul_1)) + convert_float((((umin_v_1 + umax_v_1) + anded_1) + ored_1) + xored_1)) + (float)(anded_b_1.x)) + (float)(ored_b_1.x)) + (float)(xored_b_1.x); +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..c7d53554c --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups.comp @@ -0,0 +1,37 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#pragma OPENCL EXTENSION cl_khr_subgroups : enable +#pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable + +struct SSBO +{ + float FragColor; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global float* _9) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _9[0] = convert_float4(get_sub_group_eq_mask()).x; + _9[0] = convert_float4(get_sub_group_ge_mask()).x; + _9[0] = convert_float4(get_sub_group_gt_mask()).x; + _9[0] = convert_float4(get_sub_group_le_mask()).x; + _9[0] = convert_float4(get_sub_group_lt_mask()).x; + float4 broadcasted_1 = (float4)(sub_group_broadcast((float4)(10.0f).x, 8u), sub_group_broadcast((float4)(10.0f).y, 8u), sub_group_broadcast((float4)(10.0f).z, 8u), sub_group_broadcast((float4)(10.0f).w, 8u)); + int2 broadcasted_bool_1 = (int2)(sub_group_broadcast((int2)(true).x, 8u), sub_group_broadcast((int2)(true).y, 8u)); + float3 first_1 = (float3)(sub_group_broadcast_first((float3)(20.0f).x), sub_group_broadcast_first((float3)(20.0f).y), sub_group_broadcast_first((float3)(20.0f).z)); + int4 first_bool_1 = (int4)(sub_group_broadcast_first((int4)(false).x), sub_group_broadcast_first((int4)(false).y), sub_group_broadcast_first((int4)(false).z), sub_group_broadcast_first((int4)(false).w)); + uint4 ballot_value_1 = sub_group_ballot(true); + bool inverse_ballot_value_1 = sub_group_inverse_ballot(ballot_value_1); + bool bit_extracted_1 = sub_group_ballot_bit_extract((uint4)(10u), 8u); + uint bit_count_1 = sub_group_ballot_bit_count(ballot_value_1); + uint inclusive_bit_count_1 = sub_group_ballot_inclusive_scan(ballot_value_1); + uint exclusive_bit_count_1 = sub_group_ballot_exclusive_scan(ballot_value_1); + uint lsb_1 = sub_group_ballot_find_lsb(ballot_value_1); + uint msb_1 = sub_group_ballot_find_msb(ballot_value_1); + _9[0] = (((((((((broadcasted_1.x + (float)(broadcasted_bool_1.x)) + first_1.x) + (float)(first_bool_1.x)) + (float)(inverse_ballot_value_1)) + (float)(bit_extracted_1)) + convert_float(bit_count_1)) + convert_float(inclusive_bit_count_1)) + convert_float(exclusive_bit_count_1)) + convert_float(lsb_1)) + convert_float(msb_1); +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..e4921be88 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp @@ -0,0 +1,57 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#pragma OPENCL EXTENSION cl_khr_subgroups : enable + +struct SSBO +{ + float FragColor; + int idat; + uint udat; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global SSBO* _11) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + _11->FragColor = convert_float(get_num_sub_groups()); + _11->FragColor = convert_float(get_sub_group_id()); + _11->FragColor = convert_float(get_sub_group_size()); + _11->FragColor = convert_float(get_sub_group_local_id()); + sub_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + sub_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + sub_group_barrier(CLK_GLOBAL_MEM_FENCE); + sub_group_barrier(CLK_LOCAL_MEM_FENCE); + sub_group_barrier(CLK_GLOBAL_MEM_FENCE); + bool has_all = sub_group_all(true); + bool has_any = sub_group_any(true); + uint broadcasted = sub_group_broadcast(42u, 0u); + float fadd = sub_group_reduce_add(_11->FragColor); + int iadd = sub_group_reduce_add(_11->idat); + float fmin = sub_group_reduce_min(_11->FragColor); + float fmax = sub_group_reduce_max(_11->FragColor); + int smin = sub_group_reduce_min(_11->idat); + int smax = sub_group_reduce_max(_11->idat); + uint umin = sub_group_reduce_min(_11->udat); + uint umax = sub_group_reduce_max(_11->udat); + float finc_add = sub_group_scan_inclusive_add(_11->FragColor); + float finc_min = sub_group_scan_inclusive_min(_11->FragColor); + float finc_max = sub_group_scan_inclusive_max(_11->FragColor); + int iinc_add = sub_group_scan_inclusive_add(_11->idat); + int iinc_min = sub_group_scan_inclusive_min(_11->idat); + int iinc_max = sub_group_scan_inclusive_max(_11->idat); + uint uinc_min = sub_group_scan_inclusive_min(_11->udat); + uint uinc_max = sub_group_scan_inclusive_max(_11->udat); + float fexc_add = sub_group_scan_exclusive_add(_11->FragColor); + float fexc_min = sub_group_scan_exclusive_min(_11->FragColor); + float fexc_max = sub_group_scan_exclusive_max(_11->FragColor); + int iexc_add = sub_group_scan_exclusive_add(_11->idat); + int iexc_min = sub_group_scan_exclusive_min(_11->idat); + int iexc_max = sub_group_scan_exclusive_max(_11->idat); + uint uexc_min = sub_group_scan_exclusive_min(_11->udat); + uint uexc_max = sub_group_scan_exclusive_max(_11->udat); + _11->FragColor = (((((((((((fadd + fmin) + fmax) + finc_add) + finc_min) + finc_max) + fexc_add) + fexc_min) + fexc_max) + convert_float((((((((iadd + smin) + smax) + iinc_add) + iinc_min) + iinc_max) + iexc_add) + iexc_min) + iexc_max)) + convert_float((((((umin + umax) + uinc_min) + uinc_max) + uexc_min) + uexc_max) + broadcasted)) + (float)(has_all)) + (float)(has_any); +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..10a67ecce --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups.comp @@ -0,0 +1,99 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#pragma OPENCL EXTENSION cl_khr_subgroups : enable +#pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable + +struct SSBO +{ + float fdat; + int idat; + uint udat; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global SSBO* _14) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + float4 fv_1 = (float4)(_14->fdat); + int4 iv_1 = (int4)(_14->idat); + uint4 uv_1 = (uint4)(_14->udat); + float4 added_1 = (float4)(sub_group_clustered_reduce_add(fv_1.x, 1u), sub_group_clustered_reduce_add(fv_1.y, 1u), sub_group_clustered_reduce_add(fv_1.z, 1u), sub_group_clustered_reduce_add(fv_1.w, 1u)); + float4 multiplied_1 = (float4)(sub_group_clustered_reduce_mul(fv_1.x, 1u), sub_group_clustered_reduce_mul(fv_1.y, 1u), sub_group_clustered_reduce_mul(fv_1.z, 1u), sub_group_clustered_reduce_mul(fv_1.w, 1u)); + int4 iadded_1 = (int4)(sub_group_clustered_reduce_add(iv_1.x, 1u), sub_group_clustered_reduce_add(iv_1.y, 1u), sub_group_clustered_reduce_add(iv_1.z, 1u), sub_group_clustered_reduce_add(iv_1.w, 1u)); + int4 imultiplied_1 = (int4)(sub_group_clustered_reduce_mul(iv_1.x, 1u), sub_group_clustered_reduce_mul(iv_1.y, 1u), sub_group_clustered_reduce_mul(iv_1.z, 1u), sub_group_clustered_reduce_mul(iv_1.w, 1u)); + float4 lo_1 = (float4)(sub_group_clustered_reduce_min(fv_1.x, 1u), sub_group_clustered_reduce_min(fv_1.y, 1u), sub_group_clustered_reduce_min(fv_1.z, 1u), sub_group_clustered_reduce_min(fv_1.w, 1u)); + float4 hi_1 = (float4)(sub_group_clustered_reduce_max(fv_1.x, 1u), sub_group_clustered_reduce_max(fv_1.y, 1u), sub_group_clustered_reduce_max(fv_1.z, 1u), sub_group_clustered_reduce_max(fv_1.w, 1u)); + int4 slo_1 = (int4)(sub_group_clustered_reduce_min(iv_1.x, 1u), sub_group_clustered_reduce_min(iv_1.y, 1u), sub_group_clustered_reduce_min(iv_1.z, 1u), sub_group_clustered_reduce_min(iv_1.w, 1u)); + int4 shi_1 = (int4)(sub_group_clustered_reduce_max(iv_1.x, 1u), sub_group_clustered_reduce_max(iv_1.y, 1u), sub_group_clustered_reduce_max(iv_1.z, 1u), sub_group_clustered_reduce_max(iv_1.w, 1u)); + uint4 ulo_1 = (uint4)(sub_group_clustered_reduce_min(uv_1.x, 1u), sub_group_clustered_reduce_min(uv_1.y, 1u), sub_group_clustered_reduce_min(uv_1.z, 1u), sub_group_clustered_reduce_min(uv_1.w, 1u)); + uint4 uhi_1 = (uint4)(sub_group_clustered_reduce_max(uv_1.x, 1u), sub_group_clustered_reduce_max(uv_1.y, 1u), sub_group_clustered_reduce_max(uv_1.z, 1u), sub_group_clustered_reduce_max(uv_1.w, 1u)); + uint4 anded_1 = (uint4)(sub_group_clustered_reduce_and(uv_1.x, 1u), sub_group_clustered_reduce_and(uv_1.y, 1u), sub_group_clustered_reduce_and(uv_1.z, 1u), sub_group_clustered_reduce_and(uv_1.w, 1u)); + uint4 ored_1 = (uint4)(sub_group_clustered_reduce_or(uv_1.x, 1u), sub_group_clustered_reduce_or(uv_1.y, 1u), sub_group_clustered_reduce_or(uv_1.z, 1u), sub_group_clustered_reduce_or(uv_1.w, 1u)); + uint4 xored_1 = (uint4)(sub_group_clustered_reduce_xor(uv_1.x, 1u), sub_group_clustered_reduce_xor(uv_1.y, 1u), sub_group_clustered_reduce_xor(uv_1.z, 1u), sub_group_clustered_reduce_xor(uv_1.w, 1u)); + int4 bval_1 = uv_1 == (uint4)(42u); + int4 anded_b_1 = (int4)(sub_group_clustered_reduce_logical_and(bval_1.x, 1u), sub_group_clustered_reduce_logical_and(bval_1.y, 1u), sub_group_clustered_reduce_logical_and(bval_1.z, 1u), sub_group_clustered_reduce_logical_and(bval_1.w, 1u)); + int4 ored_b_1 = (int4)(sub_group_clustered_reduce_logical_or(bval_1.x, 1u), sub_group_clustered_reduce_logical_or(bval_1.y, 1u), sub_group_clustered_reduce_logical_or(bval_1.z, 1u), sub_group_clustered_reduce_logical_or(bval_1.w, 1u)); + int4 xored_b_1 = (int4)(sub_group_clustered_reduce_logical_xor(bval_1.x, 1u), sub_group_clustered_reduce_logical_xor(bval_1.y, 1u), sub_group_clustered_reduce_logical_xor(bval_1.z, 1u), sub_group_clustered_reduce_logical_xor(bval_1.w, 1u)); + added_1 = (float4)(sub_group_clustered_reduce_add(added_1.x, 2u), sub_group_clustered_reduce_add(added_1.y, 2u), sub_group_clustered_reduce_add(added_1.z, 2u), sub_group_clustered_reduce_add(added_1.w, 2u)); + multiplied_1 = (float4)(sub_group_clustered_reduce_mul(multiplied_1.x, 2u), sub_group_clustered_reduce_mul(multiplied_1.y, 2u), sub_group_clustered_reduce_mul(multiplied_1.z, 2u), sub_group_clustered_reduce_mul(multiplied_1.w, 2u)); + iadded_1 = (int4)(sub_group_clustered_reduce_add(iadded_1.x, 2u), sub_group_clustered_reduce_add(iadded_1.y, 2u), sub_group_clustered_reduce_add(iadded_1.z, 2u), sub_group_clustered_reduce_add(iadded_1.w, 2u)); + imultiplied_1 = (int4)(sub_group_clustered_reduce_mul(imultiplied_1.x, 2u), sub_group_clustered_reduce_mul(imultiplied_1.y, 2u), sub_group_clustered_reduce_mul(imultiplied_1.z, 2u), sub_group_clustered_reduce_mul(imultiplied_1.w, 2u)); + lo_1 = (float4)(sub_group_clustered_reduce_min(lo_1.x, 2u), sub_group_clustered_reduce_min(lo_1.y, 2u), sub_group_clustered_reduce_min(lo_1.z, 2u), sub_group_clustered_reduce_min(lo_1.w, 2u)); + hi_1 = (float4)(sub_group_clustered_reduce_max(hi_1.x, 2u), sub_group_clustered_reduce_max(hi_1.y, 2u), sub_group_clustered_reduce_max(hi_1.z, 2u), sub_group_clustered_reduce_max(hi_1.w, 2u)); + slo_1 = (int4)(sub_group_clustered_reduce_min(slo_1.x, 2u), sub_group_clustered_reduce_min(slo_1.y, 2u), sub_group_clustered_reduce_min(slo_1.z, 2u), sub_group_clustered_reduce_min(slo_1.w, 2u)); + shi_1 = (int4)(sub_group_clustered_reduce_max(shi_1.x, 2u), sub_group_clustered_reduce_max(shi_1.y, 2u), sub_group_clustered_reduce_max(shi_1.z, 2u), sub_group_clustered_reduce_max(shi_1.w, 2u)); + ulo_1 = (uint4)(sub_group_clustered_reduce_min(ulo_1.x, 2u), sub_group_clustered_reduce_min(ulo_1.y, 2u), sub_group_clustered_reduce_min(ulo_1.z, 2u), sub_group_clustered_reduce_min(ulo_1.w, 2u)); + uhi_1 = (uint4)(sub_group_clustered_reduce_max(uhi_1.x, 2u), sub_group_clustered_reduce_max(uhi_1.y, 2u), sub_group_clustered_reduce_max(uhi_1.z, 2u), sub_group_clustered_reduce_max(uhi_1.w, 2u)); + anded_1 = (uint4)(sub_group_clustered_reduce_and(anded_1.x, 2u), sub_group_clustered_reduce_and(anded_1.y, 2u), sub_group_clustered_reduce_and(anded_1.z, 2u), sub_group_clustered_reduce_and(anded_1.w, 2u)); + ored_1 = (uint4)(sub_group_clustered_reduce_or(ored_1.x, 2u), sub_group_clustered_reduce_or(ored_1.y, 2u), sub_group_clustered_reduce_or(ored_1.z, 2u), sub_group_clustered_reduce_or(ored_1.w, 2u)); + xored_1 = (uint4)(sub_group_clustered_reduce_xor(xored_1.x, 2u), sub_group_clustered_reduce_xor(xored_1.y, 2u), sub_group_clustered_reduce_xor(xored_1.z, 2u), sub_group_clustered_reduce_xor(xored_1.w, 2u)); + int4 _123 = anded_1 == (uint4)(2u); + anded_b_1 = (int4)(sub_group_clustered_reduce_logical_and(_123.x, 2u), sub_group_clustered_reduce_logical_and(_123.y, 2u), sub_group_clustered_reduce_logical_and(_123.z, 2u), sub_group_clustered_reduce_logical_and(_123.w, 2u)); + int4 _127 = ored_1 == (uint4)(3u); + ored_b_1 = (int4)(sub_group_clustered_reduce_logical_or(_127.x, 2u), sub_group_clustered_reduce_logical_or(_127.y, 2u), sub_group_clustered_reduce_logical_or(_127.z, 2u), sub_group_clustered_reduce_logical_or(_127.w, 2u)); + int4 _132 = xored_1 == (uint4)(4u); + xored_b_1 = (int4)(sub_group_clustered_reduce_logical_xor(_132.x, 2u), sub_group_clustered_reduce_logical_xor(_132.y, 2u), sub_group_clustered_reduce_logical_xor(_132.z, 2u), sub_group_clustered_reduce_logical_xor(_132.w, 2u)); + added_1 = (float4)(sub_group_clustered_reduce_add(added_1.x, 4u), sub_group_clustered_reduce_add(added_1.y, 4u), sub_group_clustered_reduce_add(added_1.z, 4u), sub_group_clustered_reduce_add(added_1.w, 4u)); + multiplied_1 = (float4)(sub_group_clustered_reduce_mul(multiplied_1.x, 4u), sub_group_clustered_reduce_mul(multiplied_1.y, 4u), sub_group_clustered_reduce_mul(multiplied_1.z, 4u), sub_group_clustered_reduce_mul(multiplied_1.w, 4u)); + iadded_1 = (int4)(sub_group_clustered_reduce_add(iadded_1.x, 4u), sub_group_clustered_reduce_add(iadded_1.y, 4u), sub_group_clustered_reduce_add(iadded_1.z, 4u), sub_group_clustered_reduce_add(iadded_1.w, 4u)); + imultiplied_1 = (int4)(sub_group_clustered_reduce_mul(imultiplied_1.x, 4u), sub_group_clustered_reduce_mul(imultiplied_1.y, 4u), sub_group_clustered_reduce_mul(imultiplied_1.z, 4u), sub_group_clustered_reduce_mul(imultiplied_1.w, 4u)); + lo_1 = (float4)(sub_group_clustered_reduce_min(lo_1.x, 4u), sub_group_clustered_reduce_min(lo_1.y, 4u), sub_group_clustered_reduce_min(lo_1.z, 4u), sub_group_clustered_reduce_min(lo_1.w, 4u)); + hi_1 = (float4)(sub_group_clustered_reduce_max(hi_1.x, 4u), sub_group_clustered_reduce_max(hi_1.y, 4u), sub_group_clustered_reduce_max(hi_1.z, 4u), sub_group_clustered_reduce_max(hi_1.w, 4u)); + slo_1 = (int4)(sub_group_clustered_reduce_min(slo_1.x, 4u), sub_group_clustered_reduce_min(slo_1.y, 4u), sub_group_clustered_reduce_min(slo_1.z, 4u), sub_group_clustered_reduce_min(slo_1.w, 4u)); + shi_1 = (int4)(sub_group_clustered_reduce_max(shi_1.x, 4u), sub_group_clustered_reduce_max(shi_1.y, 4u), sub_group_clustered_reduce_max(shi_1.z, 4u), sub_group_clustered_reduce_max(shi_1.w, 4u)); + ulo_1 = (uint4)(sub_group_clustered_reduce_min(ulo_1.x, 4u), sub_group_clustered_reduce_min(ulo_1.y, 4u), sub_group_clustered_reduce_min(ulo_1.z, 4u), sub_group_clustered_reduce_min(ulo_1.w, 4u)); + uhi_1 = (uint4)(sub_group_clustered_reduce_max(uhi_1.x, 4u), sub_group_clustered_reduce_max(uhi_1.y, 4u), sub_group_clustered_reduce_max(uhi_1.z, 4u), sub_group_clustered_reduce_max(uhi_1.w, 4u)); + anded_1 = (uint4)(sub_group_clustered_reduce_and(anded_1.x, 4u), sub_group_clustered_reduce_and(anded_1.y, 4u), sub_group_clustered_reduce_and(anded_1.z, 4u), sub_group_clustered_reduce_and(anded_1.w, 4u)); + ored_1 = (uint4)(sub_group_clustered_reduce_or(ored_1.x, 4u), sub_group_clustered_reduce_or(ored_1.y, 4u), sub_group_clustered_reduce_or(ored_1.z, 4u), sub_group_clustered_reduce_or(ored_1.w, 4u)); + xored_1 = (uint4)(sub_group_clustered_reduce_xor(xored_1.x, 4u), sub_group_clustered_reduce_xor(xored_1.y, 4u), sub_group_clustered_reduce_xor(xored_1.z, 4u), sub_group_clustered_reduce_xor(xored_1.w, 4u)); + int4 _161 = anded_1 == (uint4)(2u); + anded_b_1 = (int4)(sub_group_clustered_reduce_logical_and(_161.x, 4u), sub_group_clustered_reduce_logical_and(_161.y, 4u), sub_group_clustered_reduce_logical_and(_161.z, 4u), sub_group_clustered_reduce_logical_and(_161.w, 4u)); + int4 _164 = ored_1 == (uint4)(3u); + ored_b_1 = (int4)(sub_group_clustered_reduce_logical_or(_164.x, 4u), sub_group_clustered_reduce_logical_or(_164.y, 4u), sub_group_clustered_reduce_logical_or(_164.z, 4u), sub_group_clustered_reduce_logical_or(_164.w, 4u)); + int4 _167 = xored_1 == (uint4)(4u); + xored_b_1 = (int4)(sub_group_clustered_reduce_logical_xor(_167.x, 4u), sub_group_clustered_reduce_logical_xor(_167.y, 4u), sub_group_clustered_reduce_logical_xor(_167.z, 4u), sub_group_clustered_reduce_logical_xor(_167.w, 4u)); + added_1 = (float4)(sub_group_clustered_reduce_add(added_1.x, 16u), sub_group_clustered_reduce_add(added_1.y, 16u), sub_group_clustered_reduce_add(added_1.z, 16u), sub_group_clustered_reduce_add(added_1.w, 16u)); + multiplied_1 = (float4)(sub_group_clustered_reduce_mul(multiplied_1.x, 16u), sub_group_clustered_reduce_mul(multiplied_1.y, 16u), sub_group_clustered_reduce_mul(multiplied_1.z, 16u), sub_group_clustered_reduce_mul(multiplied_1.w, 16u)); + iadded_1 = (int4)(sub_group_clustered_reduce_add(iadded_1.x, 16u), sub_group_clustered_reduce_add(iadded_1.y, 16u), sub_group_clustered_reduce_add(iadded_1.z, 16u), sub_group_clustered_reduce_add(iadded_1.w, 16u)); + imultiplied_1 = (int4)(sub_group_clustered_reduce_mul(imultiplied_1.x, 16u), sub_group_clustered_reduce_mul(imultiplied_1.y, 16u), sub_group_clustered_reduce_mul(imultiplied_1.z, 16u), sub_group_clustered_reduce_mul(imultiplied_1.w, 16u)); + lo_1 = (float4)(sub_group_clustered_reduce_min(lo_1.x, 16u), sub_group_clustered_reduce_min(lo_1.y, 16u), sub_group_clustered_reduce_min(lo_1.z, 16u), sub_group_clustered_reduce_min(lo_1.w, 16u)); + hi_1 = (float4)(sub_group_clustered_reduce_max(hi_1.x, 16u), sub_group_clustered_reduce_max(hi_1.y, 16u), sub_group_clustered_reduce_max(hi_1.z, 16u), sub_group_clustered_reduce_max(hi_1.w, 16u)); + slo_1 = (int4)(sub_group_clustered_reduce_min(slo_1.x, 16u), sub_group_clustered_reduce_min(slo_1.y, 16u), sub_group_clustered_reduce_min(slo_1.z, 16u), sub_group_clustered_reduce_min(slo_1.w, 16u)); + shi_1 = (int4)(sub_group_clustered_reduce_max(shi_1.x, 16u), sub_group_clustered_reduce_max(shi_1.y, 16u), sub_group_clustered_reduce_max(shi_1.z, 16u), sub_group_clustered_reduce_max(shi_1.w, 16u)); + ulo_1 = (uint4)(sub_group_clustered_reduce_min(ulo_1.x, 16u), sub_group_clustered_reduce_min(ulo_1.y, 16u), sub_group_clustered_reduce_min(ulo_1.z, 16u), sub_group_clustered_reduce_min(ulo_1.w, 16u)); + uhi_1 = (uint4)(sub_group_clustered_reduce_max(uhi_1.x, 16u), sub_group_clustered_reduce_max(uhi_1.y, 16u), sub_group_clustered_reduce_max(uhi_1.z, 16u), sub_group_clustered_reduce_max(uhi_1.w, 16u)); + anded_1 = (uint4)(sub_group_clustered_reduce_and(anded_1.x, 16u), sub_group_clustered_reduce_and(anded_1.y, 16u), sub_group_clustered_reduce_and(anded_1.z, 16u), sub_group_clustered_reduce_and(anded_1.w, 16u)); + ored_1 = (uint4)(sub_group_clustered_reduce_or(ored_1.x, 16u), sub_group_clustered_reduce_or(ored_1.y, 16u), sub_group_clustered_reduce_or(ored_1.z, 16u), sub_group_clustered_reduce_or(ored_1.w, 16u)); + xored_1 = (uint4)(sub_group_clustered_reduce_xor(xored_1.x, 16u), sub_group_clustered_reduce_xor(xored_1.y, 16u), sub_group_clustered_reduce_xor(xored_1.z, 16u), sub_group_clustered_reduce_xor(xored_1.w, 16u)); + int4 _197 = anded_1 == (uint4)(2u); + anded_b_1 = (int4)(sub_group_clustered_reduce_logical_and(_197.x, 16u), sub_group_clustered_reduce_logical_and(_197.y, 16u), sub_group_clustered_reduce_logical_and(_197.z, 16u), sub_group_clustered_reduce_logical_and(_197.w, 16u)); + int4 _200 = ored_1 == (uint4)(3u); + ored_b_1 = (int4)(sub_group_clustered_reduce_logical_or(_200.x, 16u), sub_group_clustered_reduce_logical_or(_200.y, 16u), sub_group_clustered_reduce_logical_or(_200.z, 16u), sub_group_clustered_reduce_logical_or(_200.w, 16u)); + int4 _203 = xored_1 == (uint4)(4u); + xored_b_1 = (int4)(sub_group_clustered_reduce_logical_xor(_203.x, 16u), sub_group_clustered_reduce_logical_xor(_203.y, 16u), sub_group_clustered_reduce_logical_xor(_203.z, 16u), sub_group_clustered_reduce_logical_xor(_203.w, 16u)); + _14->fdat = (((((((added_1.x + multiplied_1.x) + lo_1.x) + hi_1.x) + convert_float(((iadded_1.x + imultiplied_1.x) + slo_1.x) + shi_1.x)) + convert_float((((ulo_1.x + uhi_1.x) + anded_1.x) + ored_1.x) + xored_1.x)) + (float)(anded_b_1.x)) + (float)(ored_b_1.x)) + (float)(xored_b_1.x); +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..d97431603 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#pragma OPENCL EXTENSION cl_khr_subgroups : enable +#pragma OPENCL EXTENSION cl_khr_subgroup_rotate : enable + +struct SSBO +{ + float FragColor; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global float* _26) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint rotated_1 = sub_group_rotate(20u, 4u); + bool rotated_bool_1 = sub_group_rotate(false, 4u); + uint rotated_clustered_1 = sub_group_clustered_rotate(20u, 4u, 8u); + bool rotated_clustered_bool_1 = sub_group_clustered_rotate(false, 4u, 8u); + _26[0] = ((convert_float(rotated_1) + (float)(rotated_bool_1)) + convert_float(rotated_clustered_1)) + (float)(rotated_clustered_bool_1); +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-shuffle-relative.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-shuffle-relative.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..7c076e911 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-shuffle-relative.nocompat.vk.subgroups.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#pragma OPENCL EXTENSION cl_khr_subgroups : enable +#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable + +struct SSBO +{ + float FragColor; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global float* _26) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint shuffled_up_1 = sub_group_shuffle_up(20u, 4u); + bool shuffled_up_bool_1 = sub_group_shuffle_up(true, 4u); + uint shuffled_down_1 = sub_group_shuffle_down(20u, 4u); + bool shuffled_down_bool_1 = sub_group_shuffle_down(false, 4u); + _26[0] = ((convert_float(shuffled_up_1) + (float)(shuffled_up_bool_1)) + convert_float(shuffled_down_1)) + (float)(shuffled_down_bool_1); +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..5c032dda2 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups.comp @@ -0,0 +1,24 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#pragma OPENCL EXTENSION cl_khr_subgroups : enable +#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable + +struct SSBO +{ + float FragColor; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global float* _28) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint shuffled_1 = sub_group_shuffle(10u, 8u); + bool shuffled_bool_1 = sub_group_shuffle(true, 9u); + uint shuffled_xor_1 = sub_group_shuffle_xor(30u, 8u); + bool shuffled_xor_bool_1 = sub_group_shuffle_xor(false, 9u); + _28[0] = ((convert_float(shuffled_1) + (float)(shuffled_bool_1)) + convert_float(shuffled_xor_1)) + (float)(shuffled_xor_bool_1); +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..63276058c --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups.comp @@ -0,0 +1,28 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable +#pragma OPENCL EXTENSION cl_khr_subgroups : enable +#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable + +struct SSBO +{ + float FragColor; + int idat; +}; + +typedef struct SSBO SSBO; + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global SSBO* _29) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + bool elected_1 = sub_group_elect(); + bool has_all_1 = sub_group_all(get_sub_group_local_id() < 10u); + bool has_any_1 = sub_group_any(get_sub_group_local_id() == 0u); + bool eq_int_1 = sub_group_non_uniform_all_equal(_29->idat); + bool eq_bool_1 = sub_group_non_uniform_all_equal(true); + bool eq_vec3_1 = sub_group_non_uniform_all_equal((float3)(0.0f, 1.0f, 2.0f).x) && sub_group_non_uniform_all_equal((float3)(0.0f, 1.0f, 2.0f).y) && sub_group_non_uniform_all_equal((float3)(0.0f, 1.0f, 2.0f).z); + bool eq_bvec4_1 = sub_group_non_uniform_all_equal((int4)(true, true, false, true).x) && sub_group_non_uniform_all_equal((int4)(true, true, false, true).y) && sub_group_non_uniform_all_equal((int4)(true, true, false, true).z) && sub_group_non_uniform_all_equal((int4)(true, true, false, true).w); + _29->FragColor = ((((((float)(elected_1) + (float)(has_all_1)) + (float)(has_any_1)) + (float)(eq_int_1)) + (float)(eq_bool_1)) + (float)(eq_vec3_1)) + (float)(eq_bvec4_1); +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp b/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp deleted file mode 100644 index e69de29bb..000000000 diff --git a/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp b/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp deleted file mode 100644 index e69de29bb..000000000 diff --git a/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp b/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp deleted file mode 100644 index e69de29bb..000000000 diff --git a/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp b/reference/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp deleted file mode 100644 index e69de29bb..000000000 diff --git a/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups.comp b/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..23bf10ec5 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups.comp @@ -0,0 +1,47 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_arithmetic : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float fdat; + int idat; + uint udat; +}; + +void main() +{ + float fadd = subgroupAdd(fdat); + float fmul = subgroupMul(fdat); + int iadd = subgroupAdd(idat); + int imul = subgroupMul(idat); + float fmin_v = subgroupMin(fdat); + float fmax_v = subgroupMax(fdat); + int smin_v = subgroupMin(idat); + int smax_v = subgroupMax(idat); + uint umin_v = subgroupMin(udat); + uint umax_v = subgroupMax(udat); + uint anded = subgroupAnd(udat); + uint ored = subgroupOr(udat); + uint xored = subgroupXor(udat); + bvec4 bval = equal(uvec4(udat), uvec4(42u)); + bvec4 anded_b = subgroupAnd(bval); + bvec4 ored_b = subgroupOr(bval); + bvec4 xored_b = subgroupXor(bval); + + float finc_add = subgroupInclusiveAdd(fdat); + float finc_mul = subgroupInclusiveMul(fdat); + int iinc_add = subgroupInclusiveAdd(idat); + int iinc_mul = subgroupInclusiveMul(idat); + + float fexc_add = subgroupExclusiveAdd(fdat); + float fexc_mul = subgroupExclusiveMul(fdat); + int iexc_add = subgroupExclusiveAdd(idat); + int iexc_mul = subgroupExclusiveMul(idat); + + fdat = fadd + fmul + fmin_v + fmax_v + finc_add + finc_mul + fexc_add + fexc_mul + + float(iadd + imul + smin_v + smax_v + iinc_add + iinc_mul + iexc_add + iexc_mul) + + float(umin_v + umax_v + anded + ored + xored) + + float(anded_b.x) + float(ored_b.x) + float(xored_b.x); +} diff --git a/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups.comp b/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..f65334709 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups.comp @@ -0,0 +1,36 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +void main() +{ + FragColor = float(gl_SubgroupEqMask); + FragColor = float(gl_SubgroupGeMask); + FragColor = float(gl_SubgroupGtMask); + FragColor = float(gl_SubgroupLeMask); + FragColor = float(gl_SubgroupLtMask); + + vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); + bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u); + vec3 first = subgroupBroadcastFirst(vec3(20.0)); + bvec4 first_bool = subgroupBroadcastFirst(bvec4(false)); + uvec4 ballot_value = subgroupBallot(true); + bool inverse_ballot_value = subgroupInverseBallot(ballot_value); + bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); + uint bit_count = subgroupBallotBitCount(ballot_value); + uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); + uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); + uint lsb = subgroupBallotFindLSB(ballot_value); + uint msb = subgroupBallotFindMSB(ballot_value); + + FragColor = broadcasted.x + float(broadcasted_bool.x) + first.x + float(first_bool.x) + + float(inverse_ballot_value) + float(bit_extracted) + + float(bit_count) + float(inclusive_bit_count) + float(exclusive_bit_count) + + float(lsb) + float(msb); +} diff --git a/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp b/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..da981bccf --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp @@ -0,0 +1,72 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_arithmetic : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; + int idat; + uint udat; +}; + +void main() +{ + // Builtins + FragColor = float(gl_NumSubgroups); + FragColor = float(gl_SubgroupID); + FragColor = float(gl_SubgroupSize); + FragColor = float(gl_SubgroupInvocationID); + + // Barriers + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + + // Vote (uniform) — OpGroupNonUniformAll/Any map to sub_group_all/any in cl_khr_subgroups + bool has_all = subgroupAll(true); + bool has_any = subgroupAny(true); + + // Broadcast (uniform) — OpGroupNonUniformBroadcast maps to sub_group_broadcast in cl_khr_subgroups + uint broadcasted = subgroupBroadcast(42u, 0u); + + // Reduce (uniform) — OpGroupNonUniform{I,F}Add/Min/Max with Reduce + float fadd = subgroupAdd(FragColor); + int iadd = subgroupAdd(idat); + float fmin = subgroupMin(FragColor); + float fmax = subgroupMax(FragColor); + int smin = subgroupMin(idat); + int smax = subgroupMax(idat); + uint umin = subgroupMin(udat); + uint umax = subgroupMax(udat); + + // Inclusive scan + float finc_add = subgroupInclusiveAdd(FragColor); + float finc_min = subgroupInclusiveMin(FragColor); + float finc_max = subgroupInclusiveMax(FragColor); + int iinc_add = subgroupInclusiveAdd(idat); + int iinc_min = subgroupInclusiveMin(idat); + int iinc_max = subgroupInclusiveMax(idat); + uint uinc_min = subgroupInclusiveMin(udat); + uint uinc_max = subgroupInclusiveMax(udat); + + // Exclusive scan + float fexc_add = subgroupExclusiveAdd(FragColor); + float fexc_min = subgroupExclusiveMin(FragColor); + float fexc_max = subgroupExclusiveMax(FragColor); + int iexc_add = subgroupExclusiveAdd(idat); + int iexc_min = subgroupExclusiveMin(idat); + int iexc_max = subgroupExclusiveMax(idat); + uint uexc_min = subgroupExclusiveMin(udat); + uint uexc_max = subgroupExclusiveMax(udat); + + // Write results to prevent dead-code elimination + FragColor = fadd + fmin + fmax + finc_add + finc_min + finc_max + fexc_add + fexc_min + fexc_max + + float(iadd + smin + smax + iinc_add + iinc_min + iinc_max + iexc_add + iexc_min + iexc_max) + + float(umin + umax + uinc_min + uinc_max + uexc_min + uexc_max + broadcasted) + + float(has_all) + float(has_any); +} diff --git a/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups.comp b/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..c52b5ab00 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups.comp @@ -0,0 +1,97 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_clustered : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float fdat; + int idat; + uint udat; +}; + +void main() +{ + vec4 fv = vec4(fdat); + ivec4 iv = ivec4(idat); + uvec4 uv = uvec4(udat); + + // Cluster size 1 + vec4 added = subgroupClusteredAdd(fv, 1u); + vec4 multiplied = subgroupClusteredMul(fv, 1u); + ivec4 iadded = subgroupClusteredAdd(iv, 1u); + ivec4 imultiplied = subgroupClusteredMul(iv, 1u); + vec4 lo = subgroupClusteredMin(fv, 1u); + vec4 hi = subgroupClusteredMax(fv, 1u); + ivec4 slo = subgroupClusteredMin(iv, 1u); + ivec4 shi = subgroupClusteredMax(iv, 1u); + uvec4 ulo = subgroupClusteredMin(uv, 1u); + uvec4 uhi = subgroupClusteredMax(uv, 1u); + uvec4 anded = subgroupClusteredAnd(uv, 1u); + uvec4 ored = subgroupClusteredOr(uv, 1u); + uvec4 xored = subgroupClusteredXor(uv, 1u); + bvec4 bval = equal(uv, uvec4(42u)); + bvec4 anded_b = subgroupClusteredAnd(bval, 1u); + bvec4 ored_b = subgroupClusteredOr(bval, 1u); + bvec4 xored_b = subgroupClusteredXor(bval, 1u); + + // Cluster size 2 + added = subgroupClusteredAdd(added, 2u); + multiplied = subgroupClusteredMul(multiplied, 2u); + iadded = subgroupClusteredAdd(iadded, 2u); + imultiplied = subgroupClusteredMul(imultiplied, 2u); + lo = subgroupClusteredMin(lo, 2u); + hi = subgroupClusteredMax(hi, 2u); + slo = subgroupClusteredMin(slo, 2u); + shi = subgroupClusteredMax(shi, 2u); + ulo = subgroupClusteredMin(ulo, 2u); + uhi = subgroupClusteredMax(uhi, 2u); + anded = subgroupClusteredAnd(anded, 2u); + ored = subgroupClusteredOr(ored, 2u); + xored = subgroupClusteredXor(xored, 2u); + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 2u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 2u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 2u); + + // Cluster size 4 + added = subgroupClusteredAdd(added, 4u); + multiplied = subgroupClusteredMul(multiplied, 4u); + iadded = subgroupClusteredAdd(iadded, 4u); + imultiplied = subgroupClusteredMul(imultiplied, 4u); + lo = subgroupClusteredMin(lo, 4u); + hi = subgroupClusteredMax(hi, 4u); + slo = subgroupClusteredMin(slo, 4u); + shi = subgroupClusteredMax(shi, 4u); + ulo = subgroupClusteredMin(ulo, 4u); + uhi = subgroupClusteredMax(uhi, 4u); + anded = subgroupClusteredAnd(anded, 4u); + ored = subgroupClusteredOr(ored, 4u); + xored = subgroupClusteredXor(xored, 4u); + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u); + + // Cluster size 16 + added = subgroupClusteredAdd(added, 16u); + multiplied = subgroupClusteredMul(multiplied, 16u); + iadded = subgroupClusteredAdd(iadded, 16u); + imultiplied = subgroupClusteredMul(imultiplied, 16u); + lo = subgroupClusteredMin(lo, 16u); + hi = subgroupClusteredMax(hi, 16u); + slo = subgroupClusteredMin(slo, 16u); + shi = subgroupClusteredMax(shi, 16u); + ulo = subgroupClusteredMin(ulo, 16u); + uhi = subgroupClusteredMax(uhi, 16u); + anded = subgroupClusteredAnd(anded, 16u); + ored = subgroupClusteredOr(ored, 16u); + xored = subgroupClusteredXor(xored, 16u); + anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 16u); + ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 16u); + xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 16u); + + // Write results + fdat = added.x + multiplied.x + lo.x + hi.x + + float(iadded.x + imultiplied.x + slo.x + shi.x) + + float(ulo.x + uhi.x + anded.x + ored.x + xored.x) + + float(anded_b.x) + float(ored_b.x) + float(xored_b.x); +} diff --git a/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups.comp b/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..0df5d8330 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups.comp @@ -0,0 +1,20 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_rotate : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +void main() +{ + uint rotated = subgroupRotate(20u, 4u); + bool rotated_bool = subgroupRotate(false, 4u); + uint rotated_clustered = subgroupClusteredRotate(20u, 4u, 8u); + bool rotated_clustered_bool = subgroupClusteredRotate(false, 4u, 8u); + + FragColor = float(rotated) + float(rotated_bool) + + float(rotated_clustered) + float(rotated_clustered_bool); +} diff --git a/shaders-opencl-no-opt/comp/subgroups-shuffle-relative.nocompat.vk.subgroups.comp b/shaders-opencl-no-opt/comp/subgroups-shuffle-relative.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..b026695ce --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-shuffle-relative.nocompat.vk.subgroups.comp @@ -0,0 +1,20 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_shuffle_relative : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +void main() +{ + uint shuffled_up = subgroupShuffleUp(20u, 4u); + bool shuffled_up_bool = subgroupShuffleUp(true, 4u); + uint shuffled_down = subgroupShuffleDown(20u, 4u); + bool shuffled_down_bool = subgroupShuffleDown(false, 4u); + + FragColor = float(shuffled_up) + float(shuffled_up_bool) + + float(shuffled_down) + float(shuffled_down_bool); +} diff --git a/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups.comp b/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..8e2a433ac --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups.comp @@ -0,0 +1,19 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_shuffle : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +void main() +{ + uint shuffled = subgroupShuffle(10u, 8u); + bool shuffled_bool = subgroupShuffle(true, 9u); + uint shuffled_xor = subgroupShuffleXor(30u, 8u); + bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); + + FragColor = float(shuffled) + float(shuffled_bool) + float(shuffled_xor) + float(shuffled_xor_bool); +} diff --git a/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups.comp b/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups.comp new file mode 100644 index 000000000..b0995a600 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups.comp @@ -0,0 +1,24 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_vote : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; + int idat; +}; + +void main() +{ + bool elected = subgroupElect(); + bool has_all = subgroupAll(gl_SubgroupInvocationID < 10u); + bool has_any = subgroupAny(gl_SubgroupInvocationID == 0u); + bool eq_int = subgroupAllEqual(idat); + bool eq_bool = subgroupAllEqual(true); + bool eq_vec3 = subgroupAllEqual(vec3(0.0, 1.0, 2.0)); + bool eq_bvec4 = subgroupAllEqual(bvec4(true, true, false, true)); + + FragColor = float(elected) + float(has_all) + float(has_any) + + float(eq_int) + float(eq_bool) + float(eq_vec3) + float(eq_bvec4); +} diff --git a/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp b/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp deleted file mode 100644 index 8a0be2269..000000000 --- a/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.opencl12.emulate-subgroup.invalid.comp +++ /dev/null @@ -1,25 +0,0 @@ -#version 450 -#extension GL_KHR_shader_subgroup_basic : require -layout(local_size_x = 1) in; - -layout(std430, binding = 0) buffer SSBO -{ - float FragColor; -}; - -// Reduced test for emulated functionality. - -void main() -{ - // basic - FragColor = float(gl_NumSubgroups); - FragColor = float(gl_SubgroupID); - FragColor = float(gl_SubgroupSize); - FragColor = float(gl_SubgroupInvocationID); - subgroupBarrier(); - subgroupMemoryBarrier(); - subgroupMemoryBarrierBuffer(); - subgroupMemoryBarrierShared(); - subgroupMemoryBarrierImage(); - bool elected = subgroupElect(); -} diff --git a/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp b/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp deleted file mode 100644 index c8172fd95..000000000 --- a/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.fixed-subgroup.invalid.comp +++ /dev/null @@ -1,211 +0,0 @@ -#version 450 -#extension GL_KHR_shader_subgroup_basic : require -#extension GL_KHR_shader_subgroup_ballot : require -#extension GL_KHR_shader_subgroup_vote : require -#extension GL_KHR_shader_subgroup_shuffle : require -#extension GL_KHR_shader_subgroup_shuffle_relative : require -#extension GL_KHR_shader_subgroup_arithmetic : require -#extension GL_KHR_shader_subgroup_clustered : require -#extension GL_KHR_shader_subgroup_quad : require -#extension GL_KHR_shader_subgroup_rotate : require -layout(local_size_x = 1) in; - -layout(std430, binding = 0) buffer SSBO -{ - float FragColor; -}; - -void doClusteredRotate() -{ - uint rotated_clustered = subgroupClusteredRotate(20u, 4u, 8u); - bool rotated_clustered_bool = subgroupClusteredRotate(false, 4u, 8u); -} - -void main() -{ - // basic - FragColor = float(gl_NumSubgroups); - FragColor = float(gl_SubgroupID); - FragColor = float(gl_SubgroupSize); - FragColor = float(gl_SubgroupInvocationID); - subgroupBarrier(); - subgroupMemoryBarrier(); - subgroupMemoryBarrierBuffer(); - subgroupMemoryBarrierShared(); - subgroupMemoryBarrierImage(); - bool elected = subgroupElect(); - - // ballot - FragColor = float(gl_SubgroupEqMask); - FragColor = float(gl_SubgroupGeMask); - FragColor = float(gl_SubgroupGtMask); - FragColor = float(gl_SubgroupLeMask); - FragColor = float(gl_SubgroupLtMask); - vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); - bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u); - vec3 first = subgroupBroadcastFirst(vec3(20.0)); - bvec4 first_bool = subgroupBroadcastFirst(bvec4(false)); - uvec4 ballot_value = subgroupBallot(true); - bool inverse_ballot_value = subgroupInverseBallot(ballot_value); - bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); - uint bit_count = subgroupBallotBitCount(ballot_value); - uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); - uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); - uint lsb = subgroupBallotFindLSB(ballot_value); - uint msb = subgroupBallotFindMSB(ballot_value); - - // shuffle - uint shuffled = subgroupShuffle(10u, 8u); - bool shuffled_bool = subgroupShuffle(true, 9u); - uint shuffled_xor = subgroupShuffleXor(30u, 8u); - bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); - - // shuffle relative - uint shuffled_up = subgroupShuffleUp(20u, 4u); - bool shuffled_up_bool = subgroupShuffleUp(true, 4u); - uint shuffled_down = subgroupShuffleDown(20u, 4u); - bool shuffled_down_bool = subgroupShuffleDown(false, 4u); - - // rotate - uint rotated = subgroupRotate(20u, 4u); - bool rotated_bool = subgroupRotate(false, 4u); - doClusteredRotate(); - - // vote - bool has_all = subgroupAll(true); - bool has_any = subgroupAny(true); - bool has_equal = subgroupAllEqual(0); - has_equal = subgroupAllEqual(true); - has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0)); - has_equal = subgroupAllEqual(bvec4(true, true, false, true)); - - // arithmetic - vec4 added = subgroupAdd(vec4(20.0)); - ivec4 iadded = subgroupAdd(ivec4(20)); - vec4 multiplied = subgroupMul(vec4(20.0)); - ivec4 imultiplied = subgroupMul(ivec4(20)); - vec4 lo = subgroupMin(vec4(20.0)); - vec4 hi = subgroupMax(vec4(20.0)); - ivec4 slo = subgroupMin(ivec4(20)); - ivec4 shi = subgroupMax(ivec4(20)); - uvec4 ulo = subgroupMin(uvec4(20)); - uvec4 uhi = subgroupMax(uvec4(20)); - uvec4 anded = subgroupAnd(ballot_value); - uvec4 ored = subgroupOr(ballot_value); - uvec4 xored = subgroupXor(ballot_value); - bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42))); - bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42))); - bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42))); - - added = subgroupInclusiveAdd(added); - iadded = subgroupInclusiveAdd(iadded); - multiplied = subgroupInclusiveMul(multiplied); - imultiplied = subgroupInclusiveMul(imultiplied); - //lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal - //hi = subgroupInclusiveMax(hi); - //slo = subgroupInclusiveMin(slo); - //shi = subgroupInclusiveMax(shi); - //ulo = subgroupInclusiveMin(ulo); - //uhi = subgroupInclusiveMax(uhi); - //anded = subgroupInclusiveAnd(anded); - //ored = subgroupInclusiveOr(ored); - //xored = subgroupInclusiveXor(ored); - //added = subgroupExclusiveAdd(lo); - - added = subgroupExclusiveAdd(multiplied); - multiplied = subgroupExclusiveMul(multiplied); - iadded = subgroupExclusiveAdd(imultiplied); - imultiplied = subgroupExclusiveMul(imultiplied); - //lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal - //hi = subgroupExclusiveMax(hi); - //ulo = subgroupExclusiveMin(ulo); - //uhi = subgroupExclusiveMax(uhi); - //slo = subgroupExclusiveMin(slo); - //shi = subgroupExclusiveMax(shi); - //anded = subgroupExclusiveAnd(anded); - //ored = subgroupExclusiveOr(ored); - //xored = subgroupExclusiveXor(ored); - - // clustered - added = subgroupClusteredAdd(added, 1u); - multiplied = subgroupClusteredMul(multiplied, 1u); - iadded = subgroupClusteredAdd(iadded, 1u); - imultiplied = subgroupClusteredMul(imultiplied, 1u); - lo = subgroupClusteredMin(lo, 1u); - hi = subgroupClusteredMax(hi, 1u); - ulo = subgroupClusteredMin(ulo, 1u); - uhi = subgroupClusteredMax(uhi, 1u); - slo = subgroupClusteredMin(slo, 1u); - shi = subgroupClusteredMax(shi, 1u); - anded = subgroupClusteredAnd(anded, 1u); - ored = subgroupClusteredOr(ored, 1u); - xored = subgroupClusteredXor(xored, 1u); - - anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 1u); - ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 1u); - xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 1u); - - added = subgroupClusteredAdd(added, 2u); - multiplied = subgroupClusteredMul(multiplied, 2u); - iadded = subgroupClusteredAdd(iadded, 2u); - imultiplied = subgroupClusteredMul(imultiplied, 2u); - lo = subgroupClusteredMin(lo, 2u); - hi = subgroupClusteredMax(hi, 2u); - ulo = subgroupClusteredMin(ulo, 2u); - uhi = subgroupClusteredMax(uhi, 2u); - slo = subgroupClusteredMin(slo, 2u); - shi = subgroupClusteredMax(shi, 2u); - anded = subgroupClusteredAnd(anded, 2u); - ored = subgroupClusteredOr(ored, 2u); - xored = subgroupClusteredXor(xored, 2u); - - anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 2u); - ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 2u); - xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 2u); - - added = subgroupClusteredAdd(added, 4u); - multiplied = subgroupClusteredMul(multiplied, 4u); - iadded = subgroupClusteredAdd(iadded, 4u); - imultiplied = subgroupClusteredMul(imultiplied, 4u); - lo = subgroupClusteredMin(lo, 4u); - hi = subgroupClusteredMax(hi, 4u); - ulo = subgroupClusteredMin(ulo, 4u); - uhi = subgroupClusteredMax(uhi, 4u); - slo = subgroupClusteredMin(slo, 4u); - shi = subgroupClusteredMax(shi, 4u); - anded = subgroupClusteredAnd(anded, 4u); - ored = subgroupClusteredOr(ored, 4u); - xored = subgroupClusteredXor(xored, 4u); - - anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u); - ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u); - xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u); - - added = subgroupClusteredAdd(added, 16u); - multiplied = subgroupClusteredMul(multiplied, 16u); - iadded = subgroupClusteredAdd(iadded, 16u); - imultiplied = subgroupClusteredMul(imultiplied, 16u); - lo = subgroupClusteredMin(lo, 16u); - hi = subgroupClusteredMax(hi, 16u); - ulo = subgroupClusteredMin(ulo, 16u); - uhi = subgroupClusteredMax(uhi, 16u); - slo = subgroupClusteredMin(slo, 16u); - shi = subgroupClusteredMax(shi, 16u); - anded = subgroupClusteredAnd(anded, 16u); - ored = subgroupClusteredOr(ored, 16u); - xored = subgroupClusteredXor(xored, 16u); - - anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 16u); - ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 16u); - xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 16u); - - // quad - vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); - bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true)); - vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); - bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true)); - vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); - bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true)); - vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); - bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u); -} diff --git a/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp b/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp deleted file mode 100644 index c8172fd95..000000000 --- a/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.invalid.comp +++ /dev/null @@ -1,211 +0,0 @@ -#version 450 -#extension GL_KHR_shader_subgroup_basic : require -#extension GL_KHR_shader_subgroup_ballot : require -#extension GL_KHR_shader_subgroup_vote : require -#extension GL_KHR_shader_subgroup_shuffle : require -#extension GL_KHR_shader_subgroup_shuffle_relative : require -#extension GL_KHR_shader_subgroup_arithmetic : require -#extension GL_KHR_shader_subgroup_clustered : require -#extension GL_KHR_shader_subgroup_quad : require -#extension GL_KHR_shader_subgroup_rotate : require -layout(local_size_x = 1) in; - -layout(std430, binding = 0) buffer SSBO -{ - float FragColor; -}; - -void doClusteredRotate() -{ - uint rotated_clustered = subgroupClusteredRotate(20u, 4u, 8u); - bool rotated_clustered_bool = subgroupClusteredRotate(false, 4u, 8u); -} - -void main() -{ - // basic - FragColor = float(gl_NumSubgroups); - FragColor = float(gl_SubgroupID); - FragColor = float(gl_SubgroupSize); - FragColor = float(gl_SubgroupInvocationID); - subgroupBarrier(); - subgroupMemoryBarrier(); - subgroupMemoryBarrierBuffer(); - subgroupMemoryBarrierShared(); - subgroupMemoryBarrierImage(); - bool elected = subgroupElect(); - - // ballot - FragColor = float(gl_SubgroupEqMask); - FragColor = float(gl_SubgroupGeMask); - FragColor = float(gl_SubgroupGtMask); - FragColor = float(gl_SubgroupLeMask); - FragColor = float(gl_SubgroupLtMask); - vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); - bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u); - vec3 first = subgroupBroadcastFirst(vec3(20.0)); - bvec4 first_bool = subgroupBroadcastFirst(bvec4(false)); - uvec4 ballot_value = subgroupBallot(true); - bool inverse_ballot_value = subgroupInverseBallot(ballot_value); - bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); - uint bit_count = subgroupBallotBitCount(ballot_value); - uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); - uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); - uint lsb = subgroupBallotFindLSB(ballot_value); - uint msb = subgroupBallotFindMSB(ballot_value); - - // shuffle - uint shuffled = subgroupShuffle(10u, 8u); - bool shuffled_bool = subgroupShuffle(true, 9u); - uint shuffled_xor = subgroupShuffleXor(30u, 8u); - bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); - - // shuffle relative - uint shuffled_up = subgroupShuffleUp(20u, 4u); - bool shuffled_up_bool = subgroupShuffleUp(true, 4u); - uint shuffled_down = subgroupShuffleDown(20u, 4u); - bool shuffled_down_bool = subgroupShuffleDown(false, 4u); - - // rotate - uint rotated = subgroupRotate(20u, 4u); - bool rotated_bool = subgroupRotate(false, 4u); - doClusteredRotate(); - - // vote - bool has_all = subgroupAll(true); - bool has_any = subgroupAny(true); - bool has_equal = subgroupAllEqual(0); - has_equal = subgroupAllEqual(true); - has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0)); - has_equal = subgroupAllEqual(bvec4(true, true, false, true)); - - // arithmetic - vec4 added = subgroupAdd(vec4(20.0)); - ivec4 iadded = subgroupAdd(ivec4(20)); - vec4 multiplied = subgroupMul(vec4(20.0)); - ivec4 imultiplied = subgroupMul(ivec4(20)); - vec4 lo = subgroupMin(vec4(20.0)); - vec4 hi = subgroupMax(vec4(20.0)); - ivec4 slo = subgroupMin(ivec4(20)); - ivec4 shi = subgroupMax(ivec4(20)); - uvec4 ulo = subgroupMin(uvec4(20)); - uvec4 uhi = subgroupMax(uvec4(20)); - uvec4 anded = subgroupAnd(ballot_value); - uvec4 ored = subgroupOr(ballot_value); - uvec4 xored = subgroupXor(ballot_value); - bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42))); - bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42))); - bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42))); - - added = subgroupInclusiveAdd(added); - iadded = subgroupInclusiveAdd(iadded); - multiplied = subgroupInclusiveMul(multiplied); - imultiplied = subgroupInclusiveMul(imultiplied); - //lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal - //hi = subgroupInclusiveMax(hi); - //slo = subgroupInclusiveMin(slo); - //shi = subgroupInclusiveMax(shi); - //ulo = subgroupInclusiveMin(ulo); - //uhi = subgroupInclusiveMax(uhi); - //anded = subgroupInclusiveAnd(anded); - //ored = subgroupInclusiveOr(ored); - //xored = subgroupInclusiveXor(ored); - //added = subgroupExclusiveAdd(lo); - - added = subgroupExclusiveAdd(multiplied); - multiplied = subgroupExclusiveMul(multiplied); - iadded = subgroupExclusiveAdd(imultiplied); - imultiplied = subgroupExclusiveMul(imultiplied); - //lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal - //hi = subgroupExclusiveMax(hi); - //ulo = subgroupExclusiveMin(ulo); - //uhi = subgroupExclusiveMax(uhi); - //slo = subgroupExclusiveMin(slo); - //shi = subgroupExclusiveMax(shi); - //anded = subgroupExclusiveAnd(anded); - //ored = subgroupExclusiveOr(ored); - //xored = subgroupExclusiveXor(ored); - - // clustered - added = subgroupClusteredAdd(added, 1u); - multiplied = subgroupClusteredMul(multiplied, 1u); - iadded = subgroupClusteredAdd(iadded, 1u); - imultiplied = subgroupClusteredMul(imultiplied, 1u); - lo = subgroupClusteredMin(lo, 1u); - hi = subgroupClusteredMax(hi, 1u); - ulo = subgroupClusteredMin(ulo, 1u); - uhi = subgroupClusteredMax(uhi, 1u); - slo = subgroupClusteredMin(slo, 1u); - shi = subgroupClusteredMax(shi, 1u); - anded = subgroupClusteredAnd(anded, 1u); - ored = subgroupClusteredOr(ored, 1u); - xored = subgroupClusteredXor(xored, 1u); - - anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 1u); - ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 1u); - xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 1u); - - added = subgroupClusteredAdd(added, 2u); - multiplied = subgroupClusteredMul(multiplied, 2u); - iadded = subgroupClusteredAdd(iadded, 2u); - imultiplied = subgroupClusteredMul(imultiplied, 2u); - lo = subgroupClusteredMin(lo, 2u); - hi = subgroupClusteredMax(hi, 2u); - ulo = subgroupClusteredMin(ulo, 2u); - uhi = subgroupClusteredMax(uhi, 2u); - slo = subgroupClusteredMin(slo, 2u); - shi = subgroupClusteredMax(shi, 2u); - anded = subgroupClusteredAnd(anded, 2u); - ored = subgroupClusteredOr(ored, 2u); - xored = subgroupClusteredXor(xored, 2u); - - anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 2u); - ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 2u); - xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 2u); - - added = subgroupClusteredAdd(added, 4u); - multiplied = subgroupClusteredMul(multiplied, 4u); - iadded = subgroupClusteredAdd(iadded, 4u); - imultiplied = subgroupClusteredMul(imultiplied, 4u); - lo = subgroupClusteredMin(lo, 4u); - hi = subgroupClusteredMax(hi, 4u); - ulo = subgroupClusteredMin(ulo, 4u); - uhi = subgroupClusteredMax(uhi, 4u); - slo = subgroupClusteredMin(slo, 4u); - shi = subgroupClusteredMax(shi, 4u); - anded = subgroupClusteredAnd(anded, 4u); - ored = subgroupClusteredOr(ored, 4u); - xored = subgroupClusteredXor(xored, 4u); - - anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u); - ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u); - xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u); - - added = subgroupClusteredAdd(added, 16u); - multiplied = subgroupClusteredMul(multiplied, 16u); - iadded = subgroupClusteredAdd(iadded, 16u); - imultiplied = subgroupClusteredMul(imultiplied, 16u); - lo = subgroupClusteredMin(lo, 16u); - hi = subgroupClusteredMax(hi, 16u); - ulo = subgroupClusteredMin(ulo, 16u); - uhi = subgroupClusteredMax(uhi, 16u); - slo = subgroupClusteredMin(slo, 16u); - shi = subgroupClusteredMax(shi, 16u); - anded = subgroupClusteredAnd(anded, 16u); - ored = subgroupClusteredOr(ored, 16u); - xored = subgroupClusteredXor(xored, 16u); - - anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 16u); - ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 16u); - xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 16u); - - // quad - vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); - bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true)); - vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); - bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true)); - vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); - bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true)); - vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); - bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u); -} diff --git a/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp b/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp deleted file mode 100644 index c8172fd95..000000000 --- a/shaders-opencl-no-opt/comp/subgroups.nocompat.vk.subgroup.swizzle.invalid.comp +++ /dev/null @@ -1,211 +0,0 @@ -#version 450 -#extension GL_KHR_shader_subgroup_basic : require -#extension GL_KHR_shader_subgroup_ballot : require -#extension GL_KHR_shader_subgroup_vote : require -#extension GL_KHR_shader_subgroup_shuffle : require -#extension GL_KHR_shader_subgroup_shuffle_relative : require -#extension GL_KHR_shader_subgroup_arithmetic : require -#extension GL_KHR_shader_subgroup_clustered : require -#extension GL_KHR_shader_subgroup_quad : require -#extension GL_KHR_shader_subgroup_rotate : require -layout(local_size_x = 1) in; - -layout(std430, binding = 0) buffer SSBO -{ - float FragColor; -}; - -void doClusteredRotate() -{ - uint rotated_clustered = subgroupClusteredRotate(20u, 4u, 8u); - bool rotated_clustered_bool = subgroupClusteredRotate(false, 4u, 8u); -} - -void main() -{ - // basic - FragColor = float(gl_NumSubgroups); - FragColor = float(gl_SubgroupID); - FragColor = float(gl_SubgroupSize); - FragColor = float(gl_SubgroupInvocationID); - subgroupBarrier(); - subgroupMemoryBarrier(); - subgroupMemoryBarrierBuffer(); - subgroupMemoryBarrierShared(); - subgroupMemoryBarrierImage(); - bool elected = subgroupElect(); - - // ballot - FragColor = float(gl_SubgroupEqMask); - FragColor = float(gl_SubgroupGeMask); - FragColor = float(gl_SubgroupGtMask); - FragColor = float(gl_SubgroupLeMask); - FragColor = float(gl_SubgroupLtMask); - vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); - bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u); - vec3 first = subgroupBroadcastFirst(vec3(20.0)); - bvec4 first_bool = subgroupBroadcastFirst(bvec4(false)); - uvec4 ballot_value = subgroupBallot(true); - bool inverse_ballot_value = subgroupInverseBallot(ballot_value); - bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); - uint bit_count = subgroupBallotBitCount(ballot_value); - uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); - uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); - uint lsb = subgroupBallotFindLSB(ballot_value); - uint msb = subgroupBallotFindMSB(ballot_value); - - // shuffle - uint shuffled = subgroupShuffle(10u, 8u); - bool shuffled_bool = subgroupShuffle(true, 9u); - uint shuffled_xor = subgroupShuffleXor(30u, 8u); - bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); - - // shuffle relative - uint shuffled_up = subgroupShuffleUp(20u, 4u); - bool shuffled_up_bool = subgroupShuffleUp(true, 4u); - uint shuffled_down = subgroupShuffleDown(20u, 4u); - bool shuffled_down_bool = subgroupShuffleDown(false, 4u); - - // rotate - uint rotated = subgroupRotate(20u, 4u); - bool rotated_bool = subgroupRotate(false, 4u); - doClusteredRotate(); - - // vote - bool has_all = subgroupAll(true); - bool has_any = subgroupAny(true); - bool has_equal = subgroupAllEqual(0); - has_equal = subgroupAllEqual(true); - has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0)); - has_equal = subgroupAllEqual(bvec4(true, true, false, true)); - - // arithmetic - vec4 added = subgroupAdd(vec4(20.0)); - ivec4 iadded = subgroupAdd(ivec4(20)); - vec4 multiplied = subgroupMul(vec4(20.0)); - ivec4 imultiplied = subgroupMul(ivec4(20)); - vec4 lo = subgroupMin(vec4(20.0)); - vec4 hi = subgroupMax(vec4(20.0)); - ivec4 slo = subgroupMin(ivec4(20)); - ivec4 shi = subgroupMax(ivec4(20)); - uvec4 ulo = subgroupMin(uvec4(20)); - uvec4 uhi = subgroupMax(uvec4(20)); - uvec4 anded = subgroupAnd(ballot_value); - uvec4 ored = subgroupOr(ballot_value); - uvec4 xored = subgroupXor(ballot_value); - bvec4 anded_b = subgroupAnd(equal(ballot_value, uvec4(42))); - bvec4 ored_b = subgroupOr(equal(ballot_value, uvec4(42))); - bvec4 xored_b = subgroupXor(equal(ballot_value, uvec4(42))); - - added = subgroupInclusiveAdd(added); - iadded = subgroupInclusiveAdd(iadded); - multiplied = subgroupInclusiveMul(multiplied); - imultiplied = subgroupInclusiveMul(imultiplied); - //lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal - //hi = subgroupInclusiveMax(hi); - //slo = subgroupInclusiveMin(slo); - //shi = subgroupInclusiveMax(shi); - //ulo = subgroupInclusiveMin(ulo); - //uhi = subgroupInclusiveMax(uhi); - //anded = subgroupInclusiveAnd(anded); - //ored = subgroupInclusiveOr(ored); - //xored = subgroupInclusiveXor(ored); - //added = subgroupExclusiveAdd(lo); - - added = subgroupExclusiveAdd(multiplied); - multiplied = subgroupExclusiveMul(multiplied); - iadded = subgroupExclusiveAdd(imultiplied); - imultiplied = subgroupExclusiveMul(imultiplied); - //lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal - //hi = subgroupExclusiveMax(hi); - //ulo = subgroupExclusiveMin(ulo); - //uhi = subgroupExclusiveMax(uhi); - //slo = subgroupExclusiveMin(slo); - //shi = subgroupExclusiveMax(shi); - //anded = subgroupExclusiveAnd(anded); - //ored = subgroupExclusiveOr(ored); - //xored = subgroupExclusiveXor(ored); - - // clustered - added = subgroupClusteredAdd(added, 1u); - multiplied = subgroupClusteredMul(multiplied, 1u); - iadded = subgroupClusteredAdd(iadded, 1u); - imultiplied = subgroupClusteredMul(imultiplied, 1u); - lo = subgroupClusteredMin(lo, 1u); - hi = subgroupClusteredMax(hi, 1u); - ulo = subgroupClusteredMin(ulo, 1u); - uhi = subgroupClusteredMax(uhi, 1u); - slo = subgroupClusteredMin(slo, 1u); - shi = subgroupClusteredMax(shi, 1u); - anded = subgroupClusteredAnd(anded, 1u); - ored = subgroupClusteredOr(ored, 1u); - xored = subgroupClusteredXor(xored, 1u); - - anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 1u); - ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 1u); - xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 1u); - - added = subgroupClusteredAdd(added, 2u); - multiplied = subgroupClusteredMul(multiplied, 2u); - iadded = subgroupClusteredAdd(iadded, 2u); - imultiplied = subgroupClusteredMul(imultiplied, 2u); - lo = subgroupClusteredMin(lo, 2u); - hi = subgroupClusteredMax(hi, 2u); - ulo = subgroupClusteredMin(ulo, 2u); - uhi = subgroupClusteredMax(uhi, 2u); - slo = subgroupClusteredMin(slo, 2u); - shi = subgroupClusteredMax(shi, 2u); - anded = subgroupClusteredAnd(anded, 2u); - ored = subgroupClusteredOr(ored, 2u); - xored = subgroupClusteredXor(xored, 2u); - - anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 2u); - ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 2u); - xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 2u); - - added = subgroupClusteredAdd(added, 4u); - multiplied = subgroupClusteredMul(multiplied, 4u); - iadded = subgroupClusteredAdd(iadded, 4u); - imultiplied = subgroupClusteredMul(imultiplied, 4u); - lo = subgroupClusteredMin(lo, 4u); - hi = subgroupClusteredMax(hi, 4u); - ulo = subgroupClusteredMin(ulo, 4u); - uhi = subgroupClusteredMax(uhi, 4u); - slo = subgroupClusteredMin(slo, 4u); - shi = subgroupClusteredMax(shi, 4u); - anded = subgroupClusteredAnd(anded, 4u); - ored = subgroupClusteredOr(ored, 4u); - xored = subgroupClusteredXor(xored, 4u); - - anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 4u); - ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 4u); - xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 4u); - - added = subgroupClusteredAdd(added, 16u); - multiplied = subgroupClusteredMul(multiplied, 16u); - iadded = subgroupClusteredAdd(iadded, 16u); - imultiplied = subgroupClusteredMul(imultiplied, 16u); - lo = subgroupClusteredMin(lo, 16u); - hi = subgroupClusteredMax(hi, 16u); - ulo = subgroupClusteredMin(ulo, 16u); - uhi = subgroupClusteredMax(uhi, 16u); - slo = subgroupClusteredMin(slo, 16u); - shi = subgroupClusteredMax(shi, 16u); - anded = subgroupClusteredAnd(anded, 16u); - ored = subgroupClusteredOr(ored, 16u); - xored = subgroupClusteredXor(xored, 16u); - - anded_b = subgroupClusteredAnd(equal(anded, uvec4(2u)), 16u); - ored_b = subgroupClusteredOr(equal(ored, uvec4(3u)), 16u); - xored_b = subgroupClusteredXor(equal(xored, uvec4(4u)), 16u); - - // quad - vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); - bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true)); - vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); - bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true)); - vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); - bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true)); - vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); - bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u); -} diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp index ed7cfe633..806bf7497 100644 --- a/spirv_opencl.cpp +++ b/spirv_opencl.cpp @@ -177,6 +177,22 @@ void CompilerOpenCL::emit_header() statement("#pragma OPENCL EXTENSION cl_khr_fp64 : enable"); if (opencl_options.enable_64bit_atomics && opencl_options.opencl_version >= 200) statement("#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable"); + if (opencl_options.enable_subgroups) + statement("#pragma OPENCL EXTENSION cl_khr_subgroups : enable"); + if (needs_subgroup_vote) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable"); + if (needs_subgroup_ballot) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable"); + if (needs_subgroup_arithmetic) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable"); + if (needs_subgroup_shuffle) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable"); + if (needs_subgroup_shuffle_relative) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable"); + if (needs_subgroup_clustered) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable"); + if (needs_subgroup_rotate) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_rotate : enable"); statement(""); // Emit FP_CONTRACT pragma based on ContractionOff execution mode and FPFastMathDefault. @@ -1071,10 +1087,66 @@ string CompilerOpenCL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInGlobalSize: return "((uint3)(get_global_size(0), get_global_size(1), get_global_size(2)))"; case BuiltInNumSubgroups: + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); + return "get_num_sub_groups()"; case BuiltInSubgroupId: + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); + return "get_sub_group_id()"; case BuiltInSubgroupSize: + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); + return "get_sub_group_size()"; case BuiltInSubgroupLocalInvocationId: - SPIRV_CROSS_THROW("OpenCL subgroup builtins not yet implemented."); + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); + return "get_sub_group_local_id()"; + case BuiltInSubgroupEqMask: + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); + if (!needs_subgroup_ballot) + { + needs_subgroup_ballot = true; + force_recompile(); + } + return "get_sub_group_eq_mask()"; + case BuiltInSubgroupGeMask: + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); + if (!needs_subgroup_ballot) + { + needs_subgroup_ballot = true; + force_recompile(); + } + return "get_sub_group_ge_mask()"; + case BuiltInSubgroupGtMask: + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); + if (!needs_subgroup_ballot) + { + needs_subgroup_ballot = true; + force_recompile(); + } + return "get_sub_group_gt_mask()"; + case BuiltInSubgroupLeMask: + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); + if (!needs_subgroup_ballot) + { + needs_subgroup_ballot = true; + force_recompile(); + } + return "get_sub_group_le_mask()"; + case BuiltInSubgroupLtMask: + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); + if (!needs_subgroup_ballot) + { + needs_subgroup_ballot = true; + force_recompile(); + } + return "get_sub_group_lt_mask()"; default: SPIRV_CROSS_THROW("Unsupported builtin for OpenCL compute shader."); } @@ -3183,6 +3255,333 @@ void CompilerOpenCL::emit_block_hints(const SPIRBlock &) // OpenCL C has no control-flow hint attributes; suppress SPIRV_CROSS_BRANCH/FLATTEN etc. } +// Emit a unary subgroup op, decomposing vectors into per-component calls. +// For scalars, emits: func(val) +// For vectors, emits: (vectype)(func(val.x), func(val.y), ...) +void CompilerOpenCL::emit_subgroup_op_vec(uint32_t result_type, uint32_t id, uint32_t value_id, const char *func_name) +{ + auto &type = expression_type(value_id); + if (type.vecsize > 1) + { + auto &out_type = get(result_type); + string expr = "(" + type_to_glsl(out_type) + ")("; + for (uint32_t c = 0; c < type.vecsize; c++) + { + if (c > 0) + expr += ", "; + expr += join(func_name, "(", to_enclosed_expression(value_id), ".", "xyzw"[c], ")"); + } + expr += ")"; + emit_op(result_type, id, expr, should_forward(value_id)); + inherit_expression_dependencies(id, value_id); + } + else + { + emit_unary_func_op(result_type, id, value_id, func_name); + } +} + +// Emit a binary subgroup op (value + extra arg like cluster size), decomposing vectors. +// For scalars, emits: func(val, extra) +// For vectors, emits: (vectype)(func(val.x, extra), func(val.y, extra), ...) +void CompilerOpenCL::emit_subgroup_op_vec_binary(uint32_t result_type, uint32_t id, uint32_t value_id, + uint32_t extra_id, const char *func_name) +{ + auto &type = expression_type(value_id); + if (type.vecsize > 1) + { + auto &out_type = get(result_type); + string extra_expr = to_expression(extra_id); + string expr = "(" + type_to_glsl(out_type) + ")("; + for (uint32_t c = 0; c < type.vecsize; c++) + { + if (c > 0) + expr += ", "; + expr += join(func_name, "(", to_enclosed_expression(value_id), ".", "xyzw"[c], ", ", extra_expr, ")"); + } + expr += ")"; + emit_op(result_type, id, expr, should_forward(value_id)); + inherit_expression_dependencies(id, value_id); + } + else + { + emit_binary_func_op(result_type, id, value_id, extra_id, func_name); + } +} + +void CompilerOpenCL::emit_subgroup_op(const Instruction &i) +{ + const uint32_t *ops = stream(i); + auto op = static_cast(i.op); + + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup operations require enable_subgroups option."); + + // Validate scope is Subgroup + if (op != OpGroupNonUniformQuadAllKHR && op != OpGroupNonUniformQuadAnyKHR) + { + auto scope = static_cast(evaluate_constant_u32(ops[2])); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(i); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + // Helper to set an extension flag and trigger recompile if newly needed. + auto require_extension = [this](bool &flag) + { + if (!flag) + { + flag = true; + force_recompile(); + } + }; + + switch (op) + { + // === Task 5: cl_khr_subgroup_non_uniform_vote === + + case OpGroupNonUniformElect: + require_extension(needs_subgroup_vote); + emit_op(result_type, id, "sub_group_elect()", true); + break; + + case OpGroupNonUniformAllEqual: + { + require_extension(needs_subgroup_vote); + auto &type = expression_type(ops[3]); + if (type.vecsize > 1) + { + // OpenCL sub_group_non_uniform_all_equal only accepts scalars. + // For vectors, decompose into per-component calls combined with &&. + string expr; + for (uint32_t c = 0; c < type.vecsize; c++) + { + if (c > 0) + expr += " && "; + string component = join(to_enclosed_expression(ops[3]), ".", "xyzw"[c]); + expr += join("sub_group_non_uniform_all_equal(", component, ")"); + } + emit_op(result_type, id, expr, should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + else + { + emit_unary_func_op(result_type, id, ops[3], "sub_group_non_uniform_all_equal"); + } + break; + } + + // === Task 4: cl_khr_subgroups (base) — vote/broadcast === + + case OpGroupNonUniformAll: + emit_unary_func_op(result_type, id, ops[3], "sub_group_all"); + break; + + case OpGroupNonUniformAny: + emit_unary_func_op(result_type, id, ops[3], "sub_group_any"); + break; + + case OpGroupNonUniformBroadcast: + emit_subgroup_op_vec_binary(result_type, id, ops[3], ops[4], "sub_group_broadcast"); + break; + + // === Task 6: cl_khr_subgroup_ballot === + + case OpGroupNonUniformBroadcastFirst: + require_extension(needs_subgroup_ballot); + emit_subgroup_op_vec(result_type, id, ops[3], "sub_group_broadcast_first"); + break; + + case OpGroupNonUniformBallot: + require_extension(needs_subgroup_ballot); + emit_unary_func_op(result_type, id, ops[3], "sub_group_ballot"); + break; + + case OpGroupNonUniformInverseBallot: + require_extension(needs_subgroup_ballot); + emit_unary_func_op(result_type, id, ops[3], "sub_group_inverse_ballot"); + break; + + case OpGroupNonUniformBallotBitExtract: + require_extension(needs_subgroup_ballot); + emit_binary_func_op(result_type, id, ops[3], ops[4], "sub_group_ballot_bit_extract"); + break; + + case OpGroupNonUniformBallotFindLSB: + require_extension(needs_subgroup_ballot); + emit_unary_func_op(result_type, id, ops[3], "sub_group_ballot_find_lsb"); + break; + + case OpGroupNonUniformBallotFindMSB: + require_extension(needs_subgroup_ballot); + emit_unary_func_op(result_type, id, ops[3], "sub_group_ballot_find_msb"); + break; + + case OpGroupNonUniformBallotBitCount: + { + require_extension(needs_subgroup_ballot); + auto operation = static_cast(ops[3]); + if (operation == GroupOperationReduce) + emit_unary_func_op(result_type, id, ops[4], "sub_group_ballot_bit_count"); + else if (operation == GroupOperationInclusiveScan) + emit_unary_func_op(result_type, id, ops[4], "sub_group_ballot_inclusive_scan"); + else if (operation == GroupOperationExclusiveScan) + emit_unary_func_op(result_type, id, ops[4], "sub_group_ballot_exclusive_scan"); + else + SPIRV_CROSS_THROW("Invalid group operation for BallotBitCount."); + break; + } + + // === Tasks 4/7/10: Arithmetic ops (Reduce/Scan/Clustered) === + // The same SPIR-V opcodes are used for base cl_khr_subgroups (Reduce/InclusiveScan/ExclusiveScan + // with add/min/max), cl_khr_subgroup_non_uniform_arithmetic (all ops with Reduce/Scan), + // and cl_khr_subgroup_clustered_reduce (ClusteredReduce). + + // clang-format off + // OpenCL subgroup functions are scalar-only; vectors are decomposed per-component + // via emit_subgroup_op_vec / emit_subgroup_op_vec_binary. + +#define OPENCL_SUBGROUP_ARITH(spirv_op, base_name, nu_name) \ + case OpGroupNonUniform##spirv_op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_subgroup_op_vec(result_type, id, ops[4], "sub_group_reduce_" base_name); \ + else if (operation == GroupOperationInclusiveScan) \ + emit_subgroup_op_vec(result_type, id, ops[4], "sub_group_scan_inclusive_" base_name); \ + else if (operation == GroupOperationExclusiveScan) \ + emit_subgroup_op_vec(result_type, id, ops[4], "sub_group_scan_exclusive_" base_name); \ + else if (operation == GroupOperationClusteredReduce) \ + { \ + require_extension(needs_subgroup_clustered); \ + emit_subgroup_op_vec_binary(result_type, id, ops[4], ops[5], "sub_group_clustered_reduce_" base_name); \ + } \ + else \ + SPIRV_CROSS_THROW("Unsupported group operation."); \ + break; \ + } + +#define OPENCL_SUBGROUP_ARITH_CAST(spirv_op, base_name, nu_name, cast_type) \ + case OpGroupNonUniform##spirv_op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op_cast(result_type, id, ops[4], "sub_group_reduce_" base_name, cast_type, cast_type); \ + else if (operation == GroupOperationInclusiveScan) \ + emit_unary_func_op_cast(result_type, id, ops[4], "sub_group_scan_inclusive_" base_name, cast_type, cast_type); \ + else if (operation == GroupOperationExclusiveScan) \ + emit_unary_func_op_cast(result_type, id, ops[4], "sub_group_scan_exclusive_" base_name, cast_type, cast_type); \ + else if (operation == GroupOperationClusteredReduce) \ + { \ + require_extension(needs_subgroup_clustered); \ + emit_subgroup_op_vec_binary(result_type, id, ops[4], ops[5], "sub_group_clustered_reduce_" base_name); \ + } \ + else \ + SPIRV_CROSS_THROW("Unsupported group operation."); \ + break; \ + } + + // Non-uniform arithmetic extension ops (mul, bitwise, logical) — always require the extension +#define OPENCL_SUBGROUP_ARITH_NU(spirv_op, nu_name) \ + case OpGroupNonUniform##spirv_op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + { \ + require_extension(needs_subgroup_arithmetic); \ + emit_subgroup_op_vec(result_type, id, ops[4], "sub_group_non_uniform_reduce_" nu_name); \ + } \ + else if (operation == GroupOperationInclusiveScan) \ + { \ + require_extension(needs_subgroup_arithmetic); \ + emit_subgroup_op_vec(result_type, id, ops[4], "sub_group_non_uniform_scan_inclusive_" nu_name); \ + } \ + else if (operation == GroupOperationExclusiveScan) \ + { \ + require_extension(needs_subgroup_arithmetic); \ + emit_subgroup_op_vec(result_type, id, ops[4], "sub_group_non_uniform_scan_exclusive_" nu_name); \ + } \ + else if (operation == GroupOperationClusteredReduce) \ + { \ + require_extension(needs_subgroup_clustered); \ + emit_subgroup_op_vec_binary(result_type, id, ops[4], ops[5], "sub_group_clustered_reduce_" nu_name); \ + } \ + else \ + SPIRV_CROSS_THROW("Unsupported group operation."); \ + break; \ + } + + // add/min/max: base cl_khr_subgroups for Reduce/Scan, clustered for ClusteredReduce + OPENCL_SUBGROUP_ARITH(FAdd, "add", "add") + OPENCL_SUBGROUP_ARITH(IAdd, "add", "add") + OPENCL_SUBGROUP_ARITH(FMin, "min", "min") + OPENCL_SUBGROUP_ARITH(FMax, "max", "max") + OPENCL_SUBGROUP_ARITH_CAST(SMin, "min", "min", int_type) + OPENCL_SUBGROUP_ARITH_CAST(SMax, "max", "max", int_type) + OPENCL_SUBGROUP_ARITH_CAST(UMin, "min", "min", uint_type) + OPENCL_SUBGROUP_ARITH_CAST(UMax, "max", "max", uint_type) + + // mul/bitwise/logical: always require cl_khr_subgroup_non_uniform_arithmetic (or clustered) + OPENCL_SUBGROUP_ARITH_NU(FMul, "mul") + OPENCL_SUBGROUP_ARITH_NU(IMul, "mul") + OPENCL_SUBGROUP_ARITH_NU(BitwiseAnd, "and") + OPENCL_SUBGROUP_ARITH_NU(BitwiseOr, "or") + OPENCL_SUBGROUP_ARITH_NU(BitwiseXor, "xor") + OPENCL_SUBGROUP_ARITH_NU(LogicalAnd, "logical_and") + OPENCL_SUBGROUP_ARITH_NU(LogicalOr, "logical_or") + OPENCL_SUBGROUP_ARITH_NU(LogicalXor, "logical_xor") + +#undef OPENCL_SUBGROUP_ARITH +#undef OPENCL_SUBGROUP_ARITH_CAST +#undef OPENCL_SUBGROUP_ARITH_NU + // clang-format on + + // === Task 8: cl_khr_subgroup_shuffle === + + case OpGroupNonUniformShuffle: + require_extension(needs_subgroup_shuffle); + emit_binary_func_op(result_type, id, ops[3], ops[4], "sub_group_shuffle"); + break; + + case OpGroupNonUniformShuffleXor: + require_extension(needs_subgroup_shuffle); + emit_binary_func_op(result_type, id, ops[3], ops[4], "sub_group_shuffle_xor"); + break; + + // === Task 9: cl_khr_subgroup_shuffle_relative === + + case OpGroupNonUniformShuffleUp: + require_extension(needs_subgroup_shuffle_relative); + emit_binary_func_op(result_type, id, ops[3], ops[4], "sub_group_shuffle_up"); + break; + + case OpGroupNonUniformShuffleDown: + require_extension(needs_subgroup_shuffle_relative); + emit_binary_func_op(result_type, id, ops[3], ops[4], "sub_group_shuffle_down"); + break; + + // === Task 11: cl_khr_subgroup_rotate === + + case OpGroupNonUniformRotateKHR: + require_extension(needs_subgroup_rotate); + if (i.length > 5) + emit_trinary_func_op(result_type, id, ops[3], ops[4], ops[5], "sub_group_clustered_rotate"); + else + emit_binary_func_op(result_type, id, ops[3], ops[4], "sub_group_rotate"); + break; + + default: + SPIRV_CROSS_THROW("Unsupported subgroup op for OpenCL."); + } +} + void CompilerOpenCL::emit_specialization_constants_and_structs() { bool emitted = false; @@ -3851,28 +4250,50 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) case OpControlBarrier: { // ops[0]=execution_scope, ops[1]=memory_scope, ops[2]=semantics + uint32_t execution_scope = evaluate_constant_u32(ops[0]); uint32_t semantics = evaluate_constant_u32(ops[2]); semantics = mask_relevant_memory_semantics(semantics); flush_control_dependent_expressions(current_emitting_block->self); flush_all_active_variables(); - // Emit memory fence before the execution barrier if needed - string fence_flags = opencl_mem_fence_flags(semantics); - if (semantics != 0) + if (execution_scope == ScopeSubgroup) { - if (opencl_options.supports_opencl_version(2, 0)) - statement("work_group_barrier(", fence_flags, ");"); + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup barriers require enable_subgroups option."); + + // Subgroup barrier with memory fence flags + const uint32_t all_barriers = + MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; + + if (semantics == 0 || (semantics & all_barriers) == all_barriers) + { + statement("sub_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);"); + } else - statement("barrier(", fence_flags, ");"); + { + string fence_flags = opencl_mem_fence_flags(semantics); + statement("sub_group_barrier(", fence_flags, ");"); + } } else { - // Execution barrier with default local fence - if (opencl_options.supports_opencl_version(2, 0)) - statement("work_group_barrier(CLK_LOCAL_MEM_FENCE);"); + // Workgroup barrier + string fence_flags = opencl_mem_fence_flags(semantics); + if (semantics != 0) + { + if (opencl_options.supports_opencl_version(2, 0)) + statement("work_group_barrier(", fence_flags, ");"); + else + statement("barrier(", fence_flags, ");"); + } else - statement("barrier(CLK_LOCAL_MEM_FENCE);"); + { + if (opencl_options.supports_opencl_version(2, 0)) + statement("work_group_barrier(CLK_LOCAL_MEM_FENCE);"); + else + statement("barrier(CLK_LOCAL_MEM_FENCE);"); + } } break; } @@ -3880,6 +4301,7 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) case OpMemoryBarrier: { // ops[0]=memory_scope, ops[1]=semantics + uint32_t memory_scope = evaluate_constant_u32(ops[0]); uint32_t semantics = evaluate_constant_u32(ops[1]); semantics = mask_relevant_memory_semantics(semantics); @@ -3888,8 +4310,30 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) if (semantics != 0) { - string fence_flags = opencl_mem_fence_flags(semantics); - statement("mem_fence(", fence_flags, ");"); + if (memory_scope == ScopeSubgroup) + { + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup memory barriers require enable_subgroups option."); + + const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | + MemorySemanticsImageMemoryMask; + + if ((semantics & all_barriers) == all_barriers || + (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))) + { + statement("sub_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);"); + } + else + { + string fence_flags = opencl_mem_fence_flags(semantics); + statement("sub_group_barrier(", fence_flags, ");"); + } + } + else + { + string fence_flags = opencl_mem_fence_flags(semantics); + statement("mem_fence(", fence_flags, ");"); + } } break; } diff --git a/spirv_opencl.hpp b/spirv_opencl.hpp index febc62bc9..4010997fe 100644 --- a/spirv_opencl.hpp +++ b/spirv_opencl.hpp @@ -141,6 +141,10 @@ class CompilerOpenCL : public CompilerGLSL void replace_illegal_names() override; void emit_function(SPIRFunction &func, const Bitset &return_flags) override; void emit_block_hints(const SPIRBlock &block) override; + void emit_subgroup_op(const Instruction &i) override; + void emit_subgroup_op_vec(uint32_t result_type, uint32_t id, uint32_t value_id, const char *func_name); + void emit_subgroup_op_vec_binary(uint32_t result_type, uint32_t id, uint32_t value_id, uint32_t extra_id, + const char *func_name); void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override; void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier = "", uint32_t base_offset = 0) override; @@ -188,6 +192,16 @@ class CompilerOpenCL : public CompilerGLSL bool needs_inverse_3 = false; bool needs_inverse_4 = false; + // Subgroup extension requirements discovered during emit_subgroup_op / builtin_to_glsl. + // These trigger force_recompile() so emit_header() can emit the correct pragmas. + bool needs_subgroup_vote = false; + bool needs_subgroup_ballot = false; + bool needs_subgroup_arithmetic = false; + bool needs_subgroup_shuffle = false; + bool needs_subgroup_shuffle_relative = false; + bool needs_subgroup_clustered = false; + bool needs_subgroup_rotate = false; + // Matrix type support: tracks which matrix signatures (basetype, vecsize, columns) are needed. struct MatrixTypeKey { diff --git a/test_shaders.py b/test_shaders.py index 9343d9a9d..5278a1b4a 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -585,30 +585,30 @@ def cross_compile_hlsl(shader, spirv, opt, force_no_external_validation, iterati return (spirv_path, hlsl_path) -def path_to_opencl_standard(shader): - if '.cl30.' in shader: - return '-cl-std=CL3.0' - elif '.cl22.' in shader: - return '-cl-std=CL2.2' - elif '.cl21.' in shader: - return '-cl-std=CL2.1' - elif '.cl20.' in shader: - return '-cl-std=CL2.0' - else: - return '-cl-std=CL1.2' - def path_to_opencl_standard_cli(shader): + # clang seems warn about cl_khr_subgroups unless is specified. + # Revisit when OpenCL 3.0 support is no longer experimental. + if '.subgroups.' in shader: + return '200' + # OpenCL 3.0 support in clang is experimental and 2.1 and 2.2 seem unsupported. if '.cl30.' in shader: - return '300' + # return '300' + return '120' elif '.cl22.' in shader: - return '220' + # return '220' + return '200' elif '.cl21.' in shader: - return '210' + # return '210' + return '200' elif '.cl20.' in shader: return '200' else: return '120' +def path_to_opencl_standard(shader): + version = path_to_opencl_standard_cli(shader) + return f'-cl-std=CL{version[0]}.{version[1]}' + ignore_clang = False def validate_shader_opencl(shader, opt, paths): shader = reference_path(shader[0], shader[1], opt) @@ -633,14 +633,22 @@ def validate_shader_opencl(shader, opt, paths): global ignore_clang try: defines = ['-D' + ext for ext in extensions] + if extensions: + exts = ['-cl-ext=' + ','.join(['+' + ext for ext in extensions])] + else: + exts = [] + defines = ['-D' + ext for ext in extensions] version = path_to_opencl_standard_cli(shader) subprocess.check_call([paths.clang, '-Xclang', path_to_opencl_standard(shader), '-D__OPENCL_C_VERSION__=' + version, - '-D__OPENCL_VERSION__=' + version] + defines + + '-D__OPENCL_VERSION__=' + version] + defines + exts + [ '-emit-llvm', '-target', 'spir64-unknown-unknown', - '-Xclang', '-finclude-default-header', '-x', 'cl', '-c', shader]) + # clang may incorrectly claim that some extension pragmas are unnecessary + '-Wignored-pragmas', + '-Xclang', '-finclude-default-header', '-x', 'cl', '-c', shader, + '-o', os.devnull]) except OSError as oe: if (oe.errno != errno.ENOENT): # Ignore clang not found error From f19809203d5f3254805cdd60f4d2d5ec832dcdeb Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Mon, 16 Mar 2026 15:48:18 -0700 Subject: [PATCH 11/16] OpenCL: more test fixes --- .../asm/comp/bda-arguments.asm.comp | 2 +- .../storage-buffer-pointer-argument.asm.comp | 2 +- ...riable-ssbo-array-argument.spv16.asm.comp} | 4 +- ...tier-1.device-argument-buffer.invalid.comp | 23 ------ .../comp/bitcast-16bit-1.fp16.invalid.comp | 29 ++++++++ .../comp/bitcast-16bit-1.invalid.comp | 0 .../comp/bitcast-16bit-2.fp16.invalid.comp | 40 +++++++++++ .../comp/bitcast-16bit-2.invalid.comp | 0 ...riable-ssbo-array-argument.spv16.asm.comp} | 0 ...tier-1.device-argument-buffer.invalid.comp | 9 --- ...comp => bitcast-16bit-1.fp16.invalid.comp} | 0 ...comp => bitcast-16bit-2.fp16.invalid.comp} | 0 spirv_opencl.cpp | 70 ++++++++++++++++++- test_shaders.py | 2 +- 14 files changed, 141 insertions(+), 40 deletions(-) rename reference/shaders-opencl-no-opt/asm/comp/{variable-ssbo-array-argument.spv16.invalid.asm.comp => variable-ssbo-array-argument.spv16.asm.comp} (76%) delete mode 100644 reference/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.invalid.comp delete mode 100644 reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.invalid.comp create mode 100644 reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.invalid.comp delete mode 100644 reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.invalid.comp rename shaders-opencl-no-opt/asm/comp/{variable-ssbo-array-argument.spv16.invalid.asm.comp => variable-ssbo-array-argument.spv16.asm.comp} (100%) delete mode 100644 shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp rename shaders-opencl-no-opt/comp/{bitcast-16bit-1.invalid.comp => bitcast-16bit-1.fp16.invalid.comp} (100%) rename shaders-opencl-no-opt/comp/{bitcast-16bit-2.invalid.comp => bitcast-16bit-2.fp16.invalid.comp} (100%) diff --git a/reference/shaders-opencl-no-opt/asm/comp/bda-arguments.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/bda-arguments.asm.comp index e927b1917..ee615ca3d 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/bda-arguments.asm.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/bda-arguments.asm.comp @@ -36,6 +36,6 @@ __kernel void comp_main(_16 _32) { uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); __global _4* _28 = ((__global _4*)(_32._m0)); - _40(_28, 40, &_28->_m0, &_28->_m1, ((__global int*)(_28->_m1))); + _40(_28, 40, &_28->_m0, (__global int* __global *)&_28->_m1, ((__global int*)(_28->_m1))); } diff --git a/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp index 9a56784a5..1ea3ba362 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/storage-buffer-pointer-argument.asm.comp @@ -15,7 +15,7 @@ struct SSBORead typedef struct SSBORead SSBORead; -void copy_out(__global float* A_1, __global float* B_1) +void copy_out(__global float* A_1, const __global float* B_1) { *A_1 = *B_1; } diff --git a/reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.invalid.asm.comp b/reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.asm.comp similarity index 76% rename from reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.invalid.asm.comp rename to reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.asm.comp index 2b20027cd..e4d6b5107 100644 --- a/reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.invalid.asm.comp +++ b/reference/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.asm.comp @@ -8,7 +8,7 @@ struct _3 typedef struct _3 _3; -void _20(__global uchar* _21[16]) +void _20(__global uchar (*_21)[16]) { (*_21)[2u] = (uchar)(0); } @@ -16,6 +16,6 @@ void _20(__global uchar* _21[16]) __attribute__((reqd_work_group_size(16, 1, 1))) __kernel void comp_main(__global uchar* _2) { - _20(&_2[0]); + _20((__global uchar (*)[16])&_2[0]); } diff --git a/reference/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp b/reference/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp deleted file mode 100644 index 497606109..000000000 --- a/reference/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp +++ /dev/null @@ -1,23 +0,0 @@ -// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) - - -struct D -{ - float data_d[1]; -}; - -typedef struct D D; - -struct A -{ - float data_a[1]; -}; - -typedef struct A A; - -__attribute__((reqd_work_group_size(1, 1, 1))) -__kernel void comp_main(__global float* d, __global const float* a) -{ - d[0][0] = a[0][0]; -} - diff --git a/reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.invalid.comp b/reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.invalid.comp new file mode 100644 index 000000000..c01ac818c --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.invalid.comp @@ -0,0 +1,29 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +struct SSBO0 +{ + short4 inputs[1]; +}; + +typedef struct SSBO0 SSBO0; + +struct SSBO1 +{ + int4 outputs[1]; +}; + +typedef struct SSBO1 SSBO1; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global short4* _25, __global int4* _39) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + half2 a = as_half2(_25[ident].xy); + _39[ident][0u] = as_int(as_uint(a + (half2)(half(1.0)))); + _39[ident][1u] = as_int(_25[ident].zw); + _39[ident][2u] = as_int(as_uint(as_ushort2(_25[ident].xy))); +} + diff --git a/reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.invalid.comp b/reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.invalid.comp deleted file mode 100644 index e69de29bb..000000000 diff --git a/reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.invalid.comp b/reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.invalid.comp new file mode 100644 index 000000000..bca3c2996 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.invalid.comp @@ -0,0 +1,40 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +struct SSBO1 +{ + short4 outputs[1]; +}; + +typedef struct SSBO1 SSBO1; + +struct SSBO0 +{ + int4 inputs[1]; +}; + +typedef struct SSBO0 SSBO0; + +struct UBO +{ + half4 const0; +}; + +typedef struct UBO UBO; + +__attribute__((reqd_work_group_size(1, 1, 1))) +__kernel void comp_main(__global short4* _21, __global int4* _29, UBO _40) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint ident_1 = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; + int _33 = _29[ident_1][0u]; + short2 _47 = as_short2(_33) + as_short2(_40.const0.xy); + _21[ident_1][0u] = _47.x; + _21[ident_1][1u] = _47.y; + int _57 = _29[ident_1][1u]; + short2 _67 = as_short2(as_ushort2(as_uint(_57)) - as_ushort2(_40.const0.zw)); + _21[ident_1][2u] = _67.x; + _21[ident_1][3u] = _67.y; +} + diff --git a/reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.invalid.comp b/reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.invalid.comp deleted file mode 100644 index e69de29bb..000000000 diff --git a/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.invalid.asm.comp b/shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.asm.comp similarity index 100% rename from shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.invalid.asm.comp rename to shaders-opencl-no-opt/asm/comp/variable-ssbo-array-argument.spv16.asm.comp diff --git a/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp b/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp deleted file mode 100644 index f5f05a1ae..000000000 --- a/shaders-opencl-no-opt/comp/argument-buffer-readonly-writeonly-alias.argument.argument-tier-1.device-argument-buffer.invalid.comp +++ /dev/null @@ -1,9 +0,0 @@ -#version 450 -#extension GL_EXT_nonuniform_qualifier : require - -layout (binding = 0) readonly buffer A {float data_a[];} a[]; -layout (binding = 0) writeonly buffer D {float data_d[];} d[]; - -void main() { - d[gl_WorkGroupID.x].data_d[0] = a[gl_WorkGroupID.x].data_a[0]; -} diff --git a/shaders-opencl-no-opt/comp/bitcast-16bit-1.invalid.comp b/shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.invalid.comp similarity index 100% rename from shaders-opencl-no-opt/comp/bitcast-16bit-1.invalid.comp rename to shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.invalid.comp diff --git a/shaders-opencl-no-opt/comp/bitcast-16bit-2.invalid.comp b/shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.invalid.comp similarity index 100% rename from shaders-opencl-no-opt/comp/bitcast-16bit-2.invalid.comp rename to shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.invalid.comp diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp index 806bf7497..6b902517d 100644 --- a/spirv_opencl.cpp +++ b/spirv_opencl.cpp @@ -2822,7 +2822,43 @@ std::string CompilerOpenCL::to_func_call_arg(const SPIRFunction::Parameter &call } } - return CompilerGLSL::to_func_call_arg(callee_param, id); + // If callee expects a pointer-to-array (e.g., __global uchar (*)[16]) but we have a flat + // pointer (e.g., from a flattened SSBO), cast the argument to the expected type. + auto &callee_type = expression_type(callee_param.id); + if (is_pointer(callee_type) && !callee_type.array.empty()) + { + auto addr_space = get_type_address_space(callee_type, callee_param.id); + const auto *pointee = &get(callee_type.parent_type); + while (is_pointer(*pointee)) + pointee = &get(pointee->parent_type); + string base = type_to_glsl(*pointee, callee_param.id); + string array_dims = type_to_array_glsl(callee_type, callee_param.id); + string cast_type = (!addr_space.empty() ? addr_space + " " : "") + base + " (*)" + array_dims; + return join("(", cast_type, ")", to_pointer_expression(id)); + } + + // Get the base class result (handles to_pointer_expression for buffer/physical pointers). + auto result = CompilerGLSL::to_func_call_arg(callee_param, id); + + // BDA pointer-to-pointer mismatch: struct members store BDA pointers as ulong + // (emit_struct_member), so taking &member gives ulong* in C, not the expected + // pointer-to-pointer type. Add a cast to the callee's parameter type. + // Skip function parameters — they already have the correct pointer type. + if (is_pointer(callee_type) && callee_type.storage == StorageClassPhysicalStorageBuffer) + { + auto &pointee = get(callee_type.parent_type); + if (is_pointer(pointee) && pointee.storage == StorageClassPhysicalStorageBuffer) + { + auto *var = maybe_get(id); + if (!var || !var->parameter) + { + auto cast_type = type_to_glsl(callee_type, callee_param.id); + return join("(", cast_type, ")", result); + } + } + } + + return result; } std::string CompilerOpenCL::entry_point_args(bool append_comma) @@ -2986,8 +3022,36 @@ void CompilerOpenCL::emit_function_prototype(SPIRFunction &func, const Bitset &r // OpenCL C has no in/out/inout qualifiers — skip direction prefix from argument_decl. auto &arg_type = expression_type(arg.id); - decl += to_qualifiers_glsl(arg.id); - decl += variable_decl(arg_type, to_name(arg.id), arg.id); + + // For StorageBuffer/Uniform pointer params that are never written, add const + // to match the constness of NonWritable kernel parameters at call sites. + bool is_readonly_ptr = + is_pointer(arg_type) && arg.write_count == 0 && + (arg_type.storage == StorageClassStorageBuffer || arg_type.storage == StorageClassUniform); + + // Pointer-to-array parameters need special C syntax: "T (*name)[N]" not "T* name[N]". + // "T* name[N]" in C means "array of N pointers to T", which is wrong. + if (is_pointer(arg_type) && !arg_type.array.empty()) + { + auto addr_space = get_type_address_space(arg_type, arg.id); + const auto *pointee = &get(arg_type.parent_type); + while (is_pointer(*pointee)) + pointee = &get(pointee->parent_type); + string base = type_to_glsl(*pointee, arg.id); + string restrict_kw = to_restrict(arg.id, true); + if (!addr_space.empty()) + decl += addr_space + " "; + if (is_readonly_ptr) + decl += "const "; + decl += base + " (*" + restrict_kw + to_name(arg.id) + ")" + type_to_array_glsl(arg_type, arg.id); + } + else + { + if (is_readonly_ptr) + decl += "const "; + decl += to_qualifiers_glsl(arg.id); + decl += variable_decl(arg_type, to_name(arg.id), arg.id); + } if (&arg != &func.arguments.back()) decl += ", "; diff --git a/test_shaders.py b/test_shaders.py index 5278a1b4a..309fefd10 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -646,7 +646,7 @@ def validate_shader_opencl(shader, opt, paths): [ '-emit-llvm', '-target', 'spir64-unknown-unknown', # clang may incorrectly claim that some extension pragmas are unnecessary - '-Wignored-pragmas', + '-Wno-ignored-pragmas', '-Xclang', '-finclude-default-header', '-x', 'cl', '-c', shader, '-o', os.devnull]) From 0c6ec1ae102266db1cfe27ad757adba5e495352c Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Mon, 16 Mar 2026 17:19:33 -0700 Subject: [PATCH 12/16] OpenCL: fixing 16-bit float tests; suppress cl_khr_3d_image_writes pragma --- ...fp16.invalid.comp => bitcast-16bit-1.fp16.comp} | 2 +- ...fp16.invalid.comp => bitcast-16bit-2.fp16.comp} | 0 .../comp/{int64.invalid.comp => int64.comp} | 0 ...comp => struct-packing-scalar.nocompat.vk.comp} | 0 ...subgroups-arithmetic.nocompat.vk.subgroups.comp | 1 - .../subgroups-ballot.nocompat.vk.subgroups.comp | 1 - .../subgroups-basic.nocompat.vk.subgroups.comp | 1 - .../subgroups-clustered.nocompat.vk.subgroups.comp | 1 - .../subgroups-rotate.nocompat.vk.subgroups.comp | 1 - ...ups-shuffle-relative.nocompat.vk.subgroups.comp | 1 - .../subgroups-shuffle.nocompat.vk.subgroups.comp | 1 - .../comp/subgroups-vote.nocompat.vk.subgroups.comp | 1 - ...fp16.invalid.comp => bitcast-16bit-1.fp16.comp} | 0 ...fp16.invalid.comp => bitcast-16bit-2.fp16.comp} | 0 .../comp/{int64.invalid.comp => int64.comp} | 0 ...comp => struct-packing-scalar.nocompat.vk.comp} | 0 spirv_glsl.cpp | 14 ++++++++++---- spirv_opencl.cpp | 12 +++++++++++- spirv_opencl.hpp | 5 ++++- 19 files changed, 26 insertions(+), 15 deletions(-) rename reference/shaders-opencl-no-opt/comp/{bitcast-16bit-1.fp16.invalid.comp => bitcast-16bit-1.fp16.comp} (91%) rename reference/shaders-opencl-no-opt/comp/{bitcast-16bit-2.fp16.invalid.comp => bitcast-16bit-2.fp16.comp} (100%) rename reference/shaders-opencl-no-opt/comp/{int64.invalid.comp => int64.comp} (100%) rename reference/shaders-opencl-no-opt/comp/{struct-packing-scalar.nocompat.invalid.vk.comp => struct-packing-scalar.nocompat.vk.comp} (100%) rename shaders-opencl-no-opt/comp/{bitcast-16bit-1.fp16.invalid.comp => bitcast-16bit-1.fp16.comp} (100%) rename shaders-opencl-no-opt/comp/{bitcast-16bit-2.fp16.invalid.comp => bitcast-16bit-2.fp16.comp} (100%) rename shaders-opencl-no-opt/comp/{int64.invalid.comp => int64.comp} (100%) rename shaders-opencl-no-opt/comp/{struct-packing-scalar.nocompat.invalid.vk.comp => struct-packing-scalar.nocompat.vk.comp} (100%) diff --git a/reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.invalid.comp b/reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.comp similarity index 91% rename from reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.invalid.comp rename to reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.comp index c01ac818c..0c6516642 100644 --- a/reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.invalid.comp +++ b/reference/shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.comp @@ -22,7 +22,7 @@ __kernel void comp_main(__global short4* _25, __global int4* _39) uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); uint ident = ((uint3)(get_global_id(0), get_global_id(1), get_global_id(2))).x; half2 a = as_half2(_25[ident].xy); - _39[ident][0u] = as_int(as_uint(a + (half2)(half(1.0)))); + _39[ident][0u] = as_int(as_uint(a + (half2)((half)(1.0)))); _39[ident][1u] = as_int(_25[ident].zw); _39[ident][2u] = as_int(as_uint(as_ushort2(_25[ident].xy))); } diff --git a/reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.invalid.comp b/reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.comp similarity index 100% rename from reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.invalid.comp rename to reference/shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.comp diff --git a/reference/shaders-opencl-no-opt/comp/int64.invalid.comp b/reference/shaders-opencl-no-opt/comp/int64.comp similarity index 100% rename from reference/shaders-opencl-no-opt/comp/int64.invalid.comp rename to reference/shaders-opencl-no-opt/comp/int64.comp diff --git a/reference/shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/reference/shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.vk.comp similarity index 100% rename from reference/shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp rename to reference/shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.vk.comp diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups.comp index 916168719..be962897e 100644 --- a/reference/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups.comp +++ b/reference/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups.comp @@ -1,6 +1,5 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) -#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #pragma OPENCL EXTENSION cl_khr_subgroups : enable #pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups.comp index c7d53554c..742c27dc9 100644 --- a/reference/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups.comp +++ b/reference/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups.comp @@ -1,6 +1,5 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) -#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #pragma OPENCL EXTENSION cl_khr_subgroups : enable #pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp index e4921be88..ab81e408a 100644 --- a/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp +++ b/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp @@ -1,6 +1,5 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) -#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #pragma OPENCL EXTENSION cl_khr_subgroups : enable struct SSBO diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups.comp index 10a67ecce..9f44352d9 100644 --- a/reference/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups.comp +++ b/reference/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups.comp @@ -1,6 +1,5 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) -#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #pragma OPENCL EXTENSION cl_khr_subgroups : enable #pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups.comp index d97431603..c9462bad4 100644 --- a/reference/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups.comp +++ b/reference/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups.comp @@ -1,6 +1,5 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) -#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #pragma OPENCL EXTENSION cl_khr_subgroups : enable #pragma OPENCL EXTENSION cl_khr_subgroup_rotate : enable diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-shuffle-relative.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-shuffle-relative.nocompat.vk.subgroups.comp index 7c076e911..ddfb30d8c 100644 --- a/reference/shaders-opencl-no-opt/comp/subgroups-shuffle-relative.nocompat.vk.subgroups.comp +++ b/reference/shaders-opencl-no-opt/comp/subgroups-shuffle-relative.nocompat.vk.subgroups.comp @@ -1,6 +1,5 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) -#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #pragma OPENCL EXTENSION cl_khr_subgroups : enable #pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups.comp index 5c032dda2..0910d63ef 100644 --- a/reference/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups.comp +++ b/reference/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups.comp @@ -1,6 +1,5 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) -#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #pragma OPENCL EXTENSION cl_khr_subgroups : enable #pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups.comp index 63276058c..7c872b2e7 100644 --- a/reference/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups.comp +++ b/reference/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups.comp @@ -1,6 +1,5 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) -#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #pragma OPENCL EXTENSION cl_khr_subgroups : enable #pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable diff --git a/shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.invalid.comp b/shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.comp similarity index 100% rename from shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.invalid.comp rename to shaders-opencl-no-opt/comp/bitcast-16bit-1.fp16.comp diff --git a/shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.invalid.comp b/shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.comp similarity index 100% rename from shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.invalid.comp rename to shaders-opencl-no-opt/comp/bitcast-16bit-2.fp16.comp diff --git a/shaders-opencl-no-opt/comp/int64.invalid.comp b/shaders-opencl-no-opt/comp/int64.comp similarity index 100% rename from shaders-opencl-no-opt/comp/int64.invalid.comp rename to shaders-opencl-no-opt/comp/int64.comp diff --git a/shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp b/shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.vk.comp similarity index 100% rename from shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.invalid.vk.comp rename to shaders-opencl-no-opt/comp/struct-packing-scalar.nocompat.vk.comp diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index e782463c7..bd2bd67a1 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -6350,11 +6350,14 @@ string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, type.columns = 1; if (float_value == numeric_limits::infinity()) - res = join(type_to_glsl(type), "(1.0 / 0.0)"); + res = backend.c_style_casts ? join("(", type_to_glsl(type), ")(1.0 / 0.0)") : + join(type_to_glsl(type), "(1.0 / 0.0)"); else if (float_value == -numeric_limits::infinity()) - res = join(type_to_glsl(type), "(-1.0 / 0.0)"); + res = backend.c_style_casts ? join("(", type_to_glsl(type), ")(-1.0 / 0.0)") : + join(type_to_glsl(type), "(-1.0 / 0.0)"); else if (std::isnan(float_value)) - res = join(type_to_glsl(type), "(0.0 / 0.0)"); + res = backend.c_style_casts ? join("(", type_to_glsl(type), ")(0.0 / 0.0)") : + join(type_to_glsl(type), "(0.0 / 0.0)"); else SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); } @@ -6364,7 +6367,10 @@ string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, type.basetype = is_bfloat8 ? SPIRType::FloatE5M2 : SPIRType::Half; type.vecsize = 1; type.columns = 1; - res = join(type_to_glsl(type), "(", format_float(float_value), ")"); + if (backend.c_style_casts) + res = join("(", type_to_glsl(type), ")(", format_float(float_value), ")"); + else + res = join(type_to_glsl(type), "(", format_float(float_value), ")"); } return res; diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp index 6b902517d..0bab1dcec 100644 --- a/spirv_opencl.cpp +++ b/spirv_opencl.cpp @@ -169,7 +169,9 @@ void CompilerOpenCL::emit_header() statement("// Generated from SPIR-V by SPIRV-Cross (OpenCL backend)"); statement(""); - if (opencl_options.opencl_version >= 200) + // cl_khr_3d_image_writes is a core feature in OpenCL 2.x (no pragma needed). + // For OpenCL < 2.0 or >= 3.0, emit the pragma only when the shader writes to a 3D image. + if (needs_3d_image_writes && (opencl_options.opencl_version < 200 || opencl_options.opencl_version >= 300)) statement("#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable"); if (opencl_options.enable_fp16) statement("#pragma OPENCL EXTENSION cl_khr_fp16 : enable"); @@ -5298,6 +5300,14 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) uint32_t coord_id = ops[1]; uint32_t texel_id = ops[2]; + // Track if we write to a 3D image (needs cl_khr_3d_image_writes pragma). + auto &img_type = expression_type(image_id); + if (img_type.image.dim == Dim3D && !needs_3d_image_writes) + { + needs_3d_image_writes = true; + force_recompile(); + } + // Unset NonWritable so the variable can be written (mirroring GLSL backend). auto *image_var = maybe_get_backing_variable(image_id); if (image_var) diff --git a/spirv_opencl.hpp b/spirv_opencl.hpp index 4010997fe..fc3962ddf 100644 --- a/spirv_opencl.hpp +++ b/spirv_opencl.hpp @@ -192,8 +192,11 @@ class CompilerOpenCL : public CompilerGLSL bool needs_inverse_3 = false; bool needs_inverse_4 = false; - // Subgroup extension requirements discovered during emit_subgroup_op / builtin_to_glsl. + // Extension requirements discovered during instruction emission. // These trigger force_recompile() so emit_header() can emit the correct pragmas. + bool needs_3d_image_writes = false; + + // Subgroup extension requirements discovered during emit_subgroup_op / builtin_to_glsl. bool needs_subgroup_vote = false; bool needs_subgroup_ballot = false; bool needs_subgroup_arithmetic = false; From 7969b8b90cfba3a07fb26a665d28542e2764dad7 Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Tue, 17 Mar 2026 08:15:58 -0700 Subject: [PATCH 13/16] OpenCL: Support for subgroups emulation. --- main.cpp | 4 + spirv_cross_c.cpp | 3 + spirv_cross_c.h | 1 + spirv_opencl.cpp | 1797 +++++++++++++++++++++++++++++++++++++++++---- spirv_opencl.hpp | 16 + test_shaders.py | 16 +- 6 files changed, 1669 insertions(+), 168 deletions(-) diff --git a/main.cpp b/main.cpp index a53f5e758..bb8163b70 100644 --- a/main.cpp +++ b/main.cpp @@ -785,6 +785,7 @@ struct CLIArguments bool opencl_enable_subgroups_all = false; bool opencl_emulate_subgroups = false; uint32_t opencl_fixed_subgroup_size = 0; + uint32_t opencl_max_workgroup_size = 256; }; static void print_version() @@ -1371,6 +1372,7 @@ static string compile_iteration(const CLIArguments &args, std::vector ocl_opts.enable_subgroups_all = args.opencl_enable_subgroups_all; ocl_opts.emulate_subgroups = args.opencl_emulate_subgroups; ocl_opts.fixed_subgroup_size = args.opencl_fixed_subgroup_size; + ocl_opts.max_workgroup_size = args.opencl_max_workgroup_size; ocl_comp->set_opencl_options(ocl_opts); } else if (args.hlsl) @@ -2009,6 +2011,8 @@ static int main_inner(int argc, char *argv[]) cbs.add("--opencl-emulate-subgroups", [&args](CLIParser &) { args.opencl_emulate_subgroups = true; }); cbs.add("--opencl-fixed-subgroup-size", [&args](CLIParser &parser) { args.opencl_fixed_subgroup_size = parser.next_uint(); }); + cbs.add("--opencl-max-workgroup-size", + [&args](CLIParser &parser) { args.opencl_max_workgroup_size = parser.next_uint(); }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) diff --git a/spirv_cross_c.cpp b/spirv_cross_c.cpp index 1146f92d0..ede1f3f9a 100644 --- a/spirv_cross_c.cpp +++ b/spirv_cross_c.cpp @@ -833,6 +833,9 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_OPENCL_FIXED_SUBGROUP_SIZE: options->opencl.fixed_subgroup_size = value; break; + case SPVC_COMPILER_OPTION_OPENCL_MAX_WORKGROUP_SIZE: + options->opencl.max_workgroup_size = value; + break; #endif default: diff --git a/spirv_cross_c.h b/spirv_cross_c.h index e4d37ce46..b56a5635b 100644 --- a/spirv_cross_c.h +++ b/spirv_cross_c.h @@ -766,6 +766,7 @@ extern "C" SPVC_COMPILER_OPTION_OPENCL_ENABLE_SUBGROUPS_ALL = 100 | SPVC_COMPILER_OPTION_OPENCL_BIT, SPVC_COMPILER_OPTION_OPENCL_EMULATE_SUBGROUPS = 101 | SPVC_COMPILER_OPTION_OPENCL_BIT, SPVC_COMPILER_OPTION_OPENCL_FIXED_SUBGROUP_SIZE = 102 | SPVC_COMPILER_OPTION_OPENCL_BIT, + SPVC_COMPILER_OPTION_OPENCL_MAX_WORKGROUP_SIZE = 103 | SPVC_COMPILER_OPTION_OPENCL_BIT, SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff } spvc_compiler_option; diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp index 0bab1dcec..6bddfa00f 100644 --- a/spirv_opencl.cpp +++ b/spirv_opencl.cpp @@ -504,6 +504,9 @@ void CompilerOpenCL::compute_kernel_resources() func_workgroup_args[kv.first] = sorted; } } + + // Scan for subgroup emulation usage (which functions need scratch params threaded). + scan_subgroup_emulation_usage(); } void CompilerOpenCL::emit_resources() @@ -768,6 +771,9 @@ void CompilerOpenCL::emit_resources() statement(""); } + // Subgroup emulation helper functions. + emit_subgroup_emulation_helpers(); + // Default sampler for combined image+sampler usage (OpenCL C requires file-scope const sampler_t). if (needs_default_sampler) { @@ -996,6 +1002,10 @@ void CompilerOpenCL::emit_entry_point_declarations() } } + // Emit subgroup emulation local variables and scratch buffers. + if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) + emit_subgroup_emulation_entry_point_vars(); + // Materialize Input builtin variables as local variables. // In OpenCL C, builtins like get_global_id() are function calls, not variables. // When code needs variable pointers to these builtins (either threaded to non-entry @@ -1089,22 +1099,39 @@ string CompilerOpenCL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInGlobalSize: return "((uint3)(get_global_size(0), get_global_size(1), get_global_size(2)))"; case BuiltInNumSubgroups: + if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) + return "_spv_num_subgroups"; if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); return "get_num_sub_groups()"; case BuiltInSubgroupId: + if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) + return "_spv_subgroup_id"; if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); return "get_sub_group_id()"; case BuiltInSubgroupSize: + if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) + return "_spv_subgroup_size"; if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); return "get_sub_group_size()"; case BuiltInSubgroupLocalInvocationId: + if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) + return "_spv_lane_id"; if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); return "get_sub_group_local_id()"; case BuiltInSubgroupEqMask: + if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) + { + if (!needs_subgroup_emulation_scratch) + { + needs_subgroup_emulation_scratch = true; + force_recompile(); + } + return "spv_subgroup_eq_mask(_spv_lane_id)"; + } if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); if (!needs_subgroup_ballot) @@ -1114,6 +1141,15 @@ string CompilerOpenCL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) } return "get_sub_group_eq_mask()"; case BuiltInSubgroupGeMask: + if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) + { + if (!needs_subgroup_emulation_scratch) + { + needs_subgroup_emulation_scratch = true; + force_recompile(); + } + return "spv_subgroup_ge_mask(_spv_lane_id, _spv_subgroup_size)"; + } if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); if (!needs_subgroup_ballot) @@ -1123,6 +1159,15 @@ string CompilerOpenCL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) } return "get_sub_group_ge_mask()"; case BuiltInSubgroupGtMask: + if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) + { + if (!needs_subgroup_emulation_scratch) + { + needs_subgroup_emulation_scratch = true; + force_recompile(); + } + return "spv_subgroup_gt_mask(_spv_lane_id, _spv_subgroup_size)"; + } if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); if (!needs_subgroup_ballot) @@ -1132,6 +1177,15 @@ string CompilerOpenCL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) } return "get_sub_group_gt_mask()"; case BuiltInSubgroupLeMask: + if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) + { + if (!needs_subgroup_emulation_scratch) + { + needs_subgroup_emulation_scratch = true; + force_recompile(); + } + return "spv_subgroup_le_mask(_spv_lane_id, _spv_subgroup_size)"; + } if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); if (!needs_subgroup_ballot) @@ -1141,6 +1195,15 @@ string CompilerOpenCL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) } return "get_sub_group_le_mask()"; case BuiltInSubgroupLtMask: + if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) + { + if (!needs_subgroup_emulation_scratch) + { + needs_subgroup_emulation_scratch = true; + force_recompile(); + } + return "spv_subgroup_lt_mask(_spv_lane_id)"; + } if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); if (!needs_subgroup_ballot) @@ -3105,6 +3168,16 @@ void CompilerOpenCL::emit_function_prototype(SPIRFunction &func, const Bitset &r } } } + + // Thread subgroup emulation scratch buffers and emulation state. + if (needs_subgroup_emulation_scratch && funcs_using_subgroup_emulation.count(func.self)) + { + if (!first_resource) + decl += ", "; + first_resource = false; + decl += "__local uint* _spv_subgroup_scratch, uint _spv_linear_id, " + "uint _spv_subgroup_base, uint _spv_subgroup_size, uint _spv_lane_id"; + } } decl += ")"; @@ -3159,6 +3232,16 @@ void CompilerOpenCL::append_global_func_args(const SPIRFunction &func, uint32_t } } } + + // Thread subgroup emulation scratch buffers. + if (needs_subgroup_emulation_scratch && funcs_using_subgroup_emulation.count(func.self)) + { + arglist.push_back("_spv_subgroup_scratch"); + arglist.push_back("_spv_linear_id"); + arglist.push_back("_spv_subgroup_base"); + arglist.push_back("_spv_subgroup_size"); + arglist.push_back("_spv_lane_id"); + } } void CompilerOpenCL::emit_function(SPIRFunction &func, const Bitset &return_flags) @@ -3375,175 +3458,1536 @@ void CompilerOpenCL::emit_subgroup_op_vec_binary(uint32_t result_type, uint32_t } } -void CompilerOpenCL::emit_subgroup_op(const Instruction &i) +uint32_t CompilerOpenCL::get_emulation_max_workgroup_size() const { - const uint32_t *ops = stream(i); - auto op = static_cast(i.op); + auto &ep = get_entry_point(); + uint32_t x = ep.workgroup_size.x; + uint32_t y = ep.workgroup_size.y; + uint32_t z = ep.workgroup_size.z; + if (x != 0 && y != 0 && z != 0) + return x * y * z; + return opencl_options.max_workgroup_size; +} - if (!opencl_options.enable_subgroups) - SPIRV_CROSS_THROW("Subgroup operations require enable_subgroups option."); +string CompilerOpenCL::get_emulation_subgroup_size_expr() const +{ + if (opencl_options.fixed_subgroup_size != 0) + return to_string(opencl_options.fixed_subgroup_size) + "u"; + else + return "_spv_linear_workgroup_size"; +} - // Validate scope is Subgroup - if (op != OpGroupNonUniformQuadAllKHR && op != OpGroupNonUniformQuadAnyKHR) - { - auto scope = static_cast(evaluate_constant_u32(ops[2])); - if (scope != ScopeSubgroup) - SPIRV_CROSS_THROW("Only subgroup scope is supported."); - } +string CompilerOpenCL::subgroup_emulation_scratch_type(bool is_64bit) const +{ + return is_64bit ? "ulong" : "uint"; +} - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; +void CompilerOpenCL::emit_subgroup_emulation_entry_point_vars() +{ + uint32_t fixed = opencl_options.fixed_subgroup_size; - // If we need to do implicit bitcasts, make sure we do it with the correct type. - uint32_t integer_width = get_integer_width_for_instruction(i); - auto int_type = to_signed_basetype(integer_width); - auto uint_type = to_unsigned_basetype(integer_width); + // Linear ID computation + statement("uint _spv_linear_workgroup_size = get_local_size(0) * get_local_size(1) * get_local_size(2);"); + statement("uint _spv_linear_id = (get_local_id(2) * get_local_size(1) * get_local_size(0)) + " + "(get_local_id(1) * get_local_size(0)) + get_local_id(0);"); - // Helper to set an extension flag and trigger recompile if newly needed. - auto require_extension = [this](bool &flag) + if (fixed == 0) { - if (!flag) - { - flag = true; - force_recompile(); - } - }; + // subgroup_size == workgroup size (one big subgroup) + statement("uint _spv_subgroup_size = _spv_linear_workgroup_size;"); + statement("uint _spv_lane_id = _spv_linear_id;"); + statement("uint _spv_subgroup_id = 0u;"); + statement("uint _spv_num_subgroups = 1u;"); + statement("uint _spv_subgroup_base = 0u;"); + } + else if (fixed == 1) + { + // Degenerate: each invocation is its own subgroup + statement("uint _spv_subgroup_size = 1u;"); + statement("uint _spv_lane_id = 0u;"); + statement("uint _spv_subgroup_id = _spv_linear_id;"); + statement("uint _spv_num_subgroups = _spv_linear_workgroup_size;"); + statement("uint _spv_subgroup_base = _spv_linear_id;"); + } + else + { + statement("uint _spv_subgroup_size = ", fixed, "u;"); + statement("uint _spv_lane_id = _spv_linear_id % ", fixed, "u;"); + statement("uint _spv_subgroup_id = _spv_linear_id / ", fixed, "u;"); + statement("uint _spv_num_subgroups = _spv_linear_workgroup_size / ", fixed, "u;"); + statement("uint _spv_subgroup_base = _spv_subgroup_id * ", fixed, "u;"); + } - switch (op) + // Scratch buffers (only when needed) + if (needs_subgroup_emulation_scratch) { - // === Task 5: cl_khr_subgroup_non_uniform_vote === + uint32_t max_wg = get_emulation_max_workgroup_size(); + statement("__local uint _spv_subgroup_scratch[", max_wg, "];"); + } + if (needs_subgroup_emulation_scratch64) + { + uint32_t max_wg = get_emulation_max_workgroup_size(); + statement("__local ulong _spv_subgroup_scratch64[", max_wg, "];"); + } +} - case OpGroupNonUniformElect: - require_extension(needs_subgroup_vote); - emit_op(result_type, id, "sub_group_elect()", true); - break; +void CompilerOpenCL::scan_subgroup_emulation_usage() +{ + if (!opencl_options.emulate_subgroups || opencl_options.enable_subgroups) + return; - case OpGroupNonUniformAllEqual: + funcs_using_subgroup_emulation.clear(); + + // First pass: find functions that directly use subgroup ops. + ir.for_each_typed_id( + [&](uint32_t func_id, SPIRFunction &func) + { + if (func_id == ir.default_entry_point) + return; + for (auto block_id : func.blocks) + { + auto &block = get(block_id); + for (auto &insn : block.ops) + { + auto insn_op = static_cast(insn.op); + if (insn_op >= OpGroupNonUniformElect && insn_op <= OpGroupNonUniformQuadSwap) + { + funcs_using_subgroup_emulation.insert(func_id); + return; + } + if (insn_op == OpGroupNonUniformRotateKHR || insn_op == OpGroupNonUniformQuadAllKHR || + insn_op == OpGroupNonUniformQuadAnyKHR) + { + funcs_using_subgroup_emulation.insert(func_id); + return; + } + } + } + }); + + // Propagate transitively through call graph. + bool changed = true; + while (changed) { - require_extension(needs_subgroup_vote); - auto &type = expression_type(ops[3]); - if (type.vecsize > 1) - { - // OpenCL sub_group_non_uniform_all_equal only accepts scalars. - // For vectors, decompose into per-component calls combined with &&. - string expr; - for (uint32_t c = 0; c < type.vecsize; c++) - { - if (c > 0) - expr += " && "; - string component = join(to_enclosed_expression(ops[3]), ".", "xyzw"[c]); - expr += join("sub_group_non_uniform_all_equal(", component, ")"); - } - emit_op(result_type, id, expr, should_forward(ops[3])); - inherit_expression_dependencies(id, ops[3]); - } - else - { - emit_unary_func_op(result_type, id, ops[3], "sub_group_non_uniform_all_equal"); - } - break; + changed = false; + ir.for_each_typed_id( + [&](uint32_t func_id, SPIRFunction &func) + { + if (func_id == ir.default_entry_point) + return; + if (funcs_using_subgroup_emulation.count(func_id)) + return; + for (auto block_id : func.blocks) + { + auto &block = get(block_id); + for (auto &insn : block.ops) + { + if (static_cast(insn.op) == OpFunctionCall) + { + const uint32_t *insn_ops = stream(insn); + uint32_t callee_id = insn_ops[2]; + if (funcs_using_subgroup_emulation.count(callee_id)) + { + funcs_using_subgroup_emulation.insert(func_id); + changed = true; + return; + } + } + } + } + }); } +} - // === Task 4: cl_khr_subgroups (base) — vote/broadcast === +void CompilerOpenCL::emit_subgroup_emulation_helpers() +{ + if (!opencl_options.emulate_subgroups || opencl_options.enable_subgroups) + return; + if (!needs_subgroup_emulation_scratch) + return; - case OpGroupNonUniformAll: - emit_unary_func_op(result_type, id, ops[3], "sub_group_all"); - break; + // Barrier helper name (OpenCL 1.2 vs 2.0) + const char *barrier_call = opencl_options.supports_opencl_version(2, 0) ? + "work_group_barrier(CLK_LOCAL_MEM_FENCE)" : + "barrier(CLK_LOCAL_MEM_FENCE)"; + + // --- Broadcast --- + statement("static uint spv_emulate_broadcast_uint(__local uint* scratch, uint val, " + "uint src_lane, uint linear_id, uint subgroup_base) {"); + statement(" scratch[linear_id] = val;"); + statement(" ", barrier_call, ";"); + statement(" uint r = scratch[subgroup_base + src_lane];"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); - case OpGroupNonUniformAny: - emit_unary_func_op(result_type, id, ops[3], "sub_group_any"); - break; + // --- BroadcastFirst (lane 0) --- + statement("static uint spv_emulate_broadcast_first_uint(__local uint* scratch, uint val, " + "uint linear_id, uint subgroup_base) {"); + statement(" scratch[linear_id] = val;"); + statement(" ", barrier_call, ";"); + statement(" uint r = scratch[subgroup_base];"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); - case OpGroupNonUniformBroadcast: - emit_subgroup_op_vec_binary(result_type, id, ops[3], ops[4], "sub_group_broadcast"); - break; + // --- Shuffle --- + statement("static uint spv_emulate_shuffle_uint(__local uint* scratch, uint val, " + "uint index, uint linear_id, uint subgroup_base) {"); + statement(" scratch[linear_id] = val;"); + statement(" ", barrier_call, ";"); + statement(" uint r = scratch[subgroup_base + index];"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); - // === Task 6: cl_khr_subgroup_ballot === + // --- ShuffleXor --- + statement("static uint spv_emulate_shuffle_xor_uint(__local uint* scratch, uint val, " + "uint mask, uint lane_id, uint linear_id, uint subgroup_base) {"); + statement(" scratch[linear_id] = val;"); + statement(" ", barrier_call, ";"); + statement(" uint r = scratch[subgroup_base + (lane_id ^ mask)];"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); - case OpGroupNonUniformBroadcastFirst: - require_extension(needs_subgroup_ballot); - emit_subgroup_op_vec(result_type, id, ops[3], "sub_group_broadcast_first"); - break; + // --- ShuffleUp --- + statement("static uint spv_emulate_shuffle_up_uint(__local uint* scratch, uint val, " + "uint delta, uint lane_id, uint linear_id, uint subgroup_base) {"); + statement(" scratch[linear_id] = val;"); + statement(" ", barrier_call, ";"); + statement(" uint r = (lane_id >= delta) ? scratch[linear_id - delta] : val;"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); - case OpGroupNonUniformBallot: - require_extension(needs_subgroup_ballot); - emit_unary_func_op(result_type, id, ops[3], "sub_group_ballot"); - break; + // --- ShuffleDown --- + statement("static uint spv_emulate_shuffle_down_uint(__local uint* scratch, uint val, " + "uint delta, uint lane_id, uint linear_id, uint subgroup_size) {"); + statement(" scratch[linear_id] = val;"); + statement(" ", barrier_call, ";"); + statement(" uint r = (lane_id + delta < subgroup_size) ? scratch[linear_id + delta] : val;"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); - case OpGroupNonUniformInverseBallot: - require_extension(needs_subgroup_ballot); - emit_unary_func_op(result_type, id, ops[3], "sub_group_inverse_ballot"); - break; + // --- Rotate --- + statement("static uint spv_emulate_rotate_uint(__local uint* scratch, uint val, " + "uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val;"); + statement(" ", barrier_call, ";"); + statement(" uint r = scratch[subgroup_base + ((lane_id + delta) % subgroup_size)];"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); - case OpGroupNonUniformBallotBitExtract: - require_extension(needs_subgroup_ballot); - emit_binary_func_op(result_type, id, ops[3], ops[4], "sub_group_ballot_bit_extract"); - break; + // --- Clustered rotate --- + statement("static uint spv_emulate_clustered_rotate_uint(__local uint* scratch, uint val, " + "uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint cluster_size) {"); + statement(" scratch[linear_id] = val;"); + statement(" ", barrier_call, ";"); + statement(" uint cluster_base = (lane_id / cluster_size) * cluster_size;"); + statement( + " uint r = scratch[subgroup_base + cluster_base + ((lane_id - cluster_base + delta) % cluster_size)];"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); - case OpGroupNonUniformBallotFindLSB: - require_extension(needs_subgroup_ballot); - emit_unary_func_op(result_type, id, ops[3], "sub_group_ballot_find_lsb"); - break; + // --- Vote All --- + statement("static bool spv_emulate_all(__local uint* scratch, bool predicate, " + "uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = predicate ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" bool r = true;"); + statement(" for (uint i = 0u; i < subgroup_size; i++)"); + statement(" r = r && (scratch[subgroup_base + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); - case OpGroupNonUniformBallotFindMSB: - require_extension(needs_subgroup_ballot); - emit_unary_func_op(result_type, id, ops[3], "sub_group_ballot_find_msb"); - break; + // --- Vote Any --- + statement("static bool spv_emulate_any(__local uint* scratch, bool predicate, " + "uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = predicate ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" bool r = false;"); + statement(" for (uint i = 0u; i < subgroup_size; i++)"); + statement(" r = r || (scratch[subgroup_base + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); - case OpGroupNonUniformBallotBitCount: - { - require_extension(needs_subgroup_ballot); - auto operation = static_cast(ops[3]); - if (operation == GroupOperationReduce) - emit_unary_func_op(result_type, id, ops[4], "sub_group_ballot_bit_count"); - else if (operation == GroupOperationInclusiveScan) - emit_unary_func_op(result_type, id, ops[4], "sub_group_ballot_inclusive_scan"); - else if (operation == GroupOperationExclusiveScan) - emit_unary_func_op(result_type, id, ops[4], "sub_group_ballot_exclusive_scan"); - else - SPIRV_CROSS_THROW("Invalid group operation for BallotBitCount."); - break; - } + // --- AllEqual --- + statement("static bool spv_emulate_all_equal_uint(__local uint* scratch, uint val, " + "uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val;"); + statement(" ", barrier_call, ";"); + statement(" uint first = scratch[subgroup_base];"); + statement(" bool r = true;"); + statement(" for (uint i = 1u; i < subgroup_size; i++)"); + statement(" r = r && (scratch[subgroup_base + i] == first);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); - // === Tasks 4/7/10: Arithmetic ops (Reduce/Scan/Clustered) === - // The same SPIR-V opcodes are used for base cl_khr_subgroups (Reduce/InclusiveScan/ExclusiveScan - // with add/min/max), cl_khr_subgroup_non_uniform_arithmetic (all ops with Reduce/Scan), - // and cl_khr_subgroup_clustered_reduce (ClusteredReduce). + // --- Ballot --- + statement("static uint4 spv_emulate_ballot(__local uint* scratch, bool predicate, " + "uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = predicate ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" uint4 r = (uint4)(0u);"); + statement(" for (uint i = 0u; i < subgroup_size; i++) {"); + statement(" if (scratch[subgroup_base + i] != 0u) {"); + statement(" uint word = i / 32u;"); + statement(" uint bit = i % 32u;"); + statement(" if (word == 0u) r.x |= (1u << bit);"); + statement(" else if (word == 1u) r.y |= (1u << bit);"); + statement(" else if (word == 2u) r.z |= (1u << bit);"); + statement(" else r.w |= (1u << bit);"); + statement(" }"); + statement(" }"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); - // clang-format off - // OpenCL subgroup functions are scalar-only; vectors are decomposed per-component - // via emit_subgroup_op_vec / emit_subgroup_op_vec_binary. + // --- Mask builtins (pure arithmetic, no scratch) --- + statement("static uint4 spv_subgroup_eq_mask(uint lane_id) {"); + statement(" uint4 r = (uint4)(0u);"); + statement(" uint word = lane_id / 32u;"); + statement(" uint bit = lane_id % 32u;"); + statement(" if (word == 0u) r.x = (1u << bit);"); + statement(" else if (word == 1u) r.y = (1u << bit);"); + statement(" else if (word == 2u) r.z = (1u << bit);"); + statement(" else r.w = (1u << bit);"); + statement(" return r;"); + statement("}"); + statement(""); -#define OPENCL_SUBGROUP_ARITH(spirv_op, base_name, nu_name) \ - case OpGroupNonUniform##spirv_op: \ - { \ - auto operation = static_cast(ops[3]); \ - if (operation == GroupOperationReduce) \ - emit_subgroup_op_vec(result_type, id, ops[4], "sub_group_reduce_" base_name); \ - else if (operation == GroupOperationInclusiveScan) \ - emit_subgroup_op_vec(result_type, id, ops[4], "sub_group_scan_inclusive_" base_name); \ - else if (operation == GroupOperationExclusiveScan) \ - emit_subgroup_op_vec(result_type, id, ops[4], "sub_group_scan_exclusive_" base_name); \ - else if (operation == GroupOperationClusteredReduce) \ - { \ - require_extension(needs_subgroup_clustered); \ - emit_subgroup_op_vec_binary(result_type, id, ops[4], ops[5], "sub_group_clustered_reduce_" base_name); \ - } \ - else \ - SPIRV_CROSS_THROW("Unsupported group operation."); \ - break; \ - } + statement("static uint4 spv_subgroup_ge_mask(uint lane_id, uint subgroup_size) {"); + statement(" uint4 r = (uint4)(0u);"); + statement(" for (uint i = lane_id; i < subgroup_size; i++) {"); + statement(" uint word = i / 32u;"); + statement(" uint bit = i % 32u;"); + statement(" if (word == 0u) r.x |= (1u << bit);"); + statement(" else if (word == 1u) r.y |= (1u << bit);"); + statement(" else if (word == 2u) r.z |= (1u << bit);"); + statement(" else r.w |= (1u << bit);"); + statement(" }"); + statement(" return r;"); + statement("}"); + statement(""); -#define OPENCL_SUBGROUP_ARITH_CAST(spirv_op, base_name, nu_name, cast_type) \ - case OpGroupNonUniform##spirv_op: \ - { \ - auto operation = static_cast(ops[3]); \ - if (operation == GroupOperationReduce) \ - emit_unary_func_op_cast(result_type, id, ops[4], "sub_group_reduce_" base_name, cast_type, cast_type); \ - else if (operation == GroupOperationInclusiveScan) \ - emit_unary_func_op_cast(result_type, id, ops[4], "sub_group_scan_inclusive_" base_name, cast_type, cast_type); \ - else if (operation == GroupOperationExclusiveScan) \ - emit_unary_func_op_cast(result_type, id, ops[4], "sub_group_scan_exclusive_" base_name, cast_type, cast_type); \ + statement("static uint4 spv_subgroup_gt_mask(uint lane_id, uint subgroup_size) {"); + statement(" return spv_subgroup_ge_mask(lane_id + 1u, subgroup_size);"); + statement("}"); + statement(""); + + statement("static uint4 spv_subgroup_le_mask(uint lane_id, uint subgroup_size) {"); + statement(" return spv_subgroup_ge_mask(0u, lane_id + 1u);"); + statement("}"); + statement(""); + + statement("static uint4 spv_subgroup_lt_mask(uint lane_id) {"); + statement(" if (lane_id == 0u) return (uint4)(0u);"); + statement(" return spv_subgroup_ge_mask(0u, lane_id);"); + statement("}"); + statement(""); + + // Arithmetic reduce/scan helpers: one set per type+operation. + // Uint operations + auto emit_arith_set = [&](const char *type_name, const char *as_cast, const char *suffix, const char *op, + const char *identity, bool use_cast) + { + string cast_read = + use_cast ? join("as_", as_cast, "(scratch[subgroup_base + i])") : "scratch[subgroup_base + i]"; + string cast_first = use_cast ? join("as_", as_cast, "(scratch[subgroup_base])") : "scratch[subgroup_base]"; + string cast_write = use_cast ? join("as_uint(val)") : "val"; + string cast_clust = use_cast ? join("as_", as_cast, "(scratch[subgroup_base + cluster_base_in_sg + i])") : + "scratch[subgroup_base + cluster_base_in_sg + i]"; + string cast_clust_first = use_cast ? join("as_", as_cast, "(scratch[subgroup_base + cluster_base_in_sg])") : + "scratch[subgroup_base + cluster_base_in_sg]"; + + // Reduce + statement("static ", type_name, " spv_emulate_reduce_", suffix, "(__local uint* scratch, ", type_name, + " val, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = ", cast_write, ";"); + statement(" ", barrier_call, ";"); + statement(" ", type_name, " r = ", cast_first, ";"); + statement(" for (uint i = 1u; i < subgroup_size; i++)"); + statement(" r = r ", op, " ", cast_read, ";"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + // Inclusive scan + statement("static ", type_name, " spv_emulate_inclusive_scan_", suffix, "(__local uint* scratch, ", type_name, + " val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = ", cast_write, ";"); + statement(" ", barrier_call, ";"); + statement(" ", type_name, " r = ", cast_first, ";"); + statement(" for (uint i = 1u; i <= lane_id; i++)"); + statement(" r = r ", op, " ", cast_read, ";"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + // Exclusive scan + statement("static ", type_name, " spv_emulate_exclusive_scan_", suffix, "(__local uint* scratch, ", type_name, + " val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = ", cast_write, ";"); + statement(" ", barrier_call, ";"); + statement(" ", type_name, " r = ", identity, ";"); + statement(" for (uint i = 0u; i < lane_id; i++)"); + statement(" r = r ", op, " ", cast_read, ";"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + // Clustered reduce + statement("static ", type_name, " spv_emulate_clustered_reduce_", suffix, "(__local uint* scratch, ", type_name, + " val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = ", cast_write, ";"); + statement(" ", barrier_call, ";"); + statement(" uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size;"); + statement(" ", type_name, " r = ", cast_clust_first, ";"); + statement(" for (uint i = 1u; i < cluster_size; i++)"); + statement(" r = r ", op, " ", cast_clust, ";"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + }; + + // For min/max we need function-call style instead of operator + auto emit_arith_func_set = [&](const char *type_name, const char *as_cast, const char *suffix, + const char *func_name, const char *identity, bool use_cast) + { + string cast_read = + use_cast ? join("as_", as_cast, "(scratch[subgroup_base + i])") : "scratch[subgroup_base + i]"; + string cast_first = use_cast ? join("as_", as_cast, "(scratch[subgroup_base])") : "scratch[subgroup_base]"; + string cast_write = use_cast ? join("as_uint(val)") : "val"; + string cast_clust = use_cast ? join("as_", as_cast, "(scratch[subgroup_base + cluster_base_in_sg + i])") : + "scratch[subgroup_base + cluster_base_in_sg + i]"; + string cast_clust_first = use_cast ? join("as_", as_cast, "(scratch[subgroup_base + cluster_base_in_sg])") : + "scratch[subgroup_base + cluster_base_in_sg]"; + + // Reduce + statement("static ", type_name, " spv_emulate_reduce_", suffix, "(__local uint* scratch, ", type_name, + " val, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = ", cast_write, ";"); + statement(" ", barrier_call, ";"); + statement(" ", type_name, " r = ", cast_first, ";"); + statement(" for (uint i = 1u; i < subgroup_size; i++)"); + statement(" r = ", func_name, "(r, ", cast_read, ");"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + // Inclusive scan + statement("static ", type_name, " spv_emulate_inclusive_scan_", suffix, "(__local uint* scratch, ", type_name, + " val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = ", cast_write, ";"); + statement(" ", barrier_call, ";"); + statement(" ", type_name, " r = ", cast_first, ";"); + statement(" for (uint i = 1u; i <= lane_id; i++)"); + statement(" r = ", func_name, "(r, ", cast_read, ");"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + // Exclusive scan + statement("static ", type_name, " spv_emulate_exclusive_scan_", suffix, "(__local uint* scratch, ", type_name, + " val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = ", cast_write, ";"); + statement(" ", barrier_call, ";"); + statement(" ", type_name, " r = ", identity, ";"); + statement(" for (uint i = 0u; i < lane_id; i++)"); + statement(" r = ", func_name, "(r, ", cast_read, ");"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + // Clustered reduce + statement("static ", type_name, " spv_emulate_clustered_reduce_", suffix, "(__local uint* scratch, ", type_name, + " val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = ", cast_write, ";"); + statement(" ", barrier_call, ";"); + statement(" uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size;"); + statement(" ", type_name, " r = ", cast_clust_first, ";"); + statement(" for (uint i = 1u; i < cluster_size; i++)"); + statement(" r = ", func_name, "(r, ", cast_clust, ");"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + }; + + // Integer arithmetic (uint) + emit_arith_set("uint", "uint", "add_uint", "+", "0u", false); + emit_arith_set("uint", "uint", "mul_uint", "*", "1u", false); + emit_arith_func_set("uint", "uint", "min_uint", "min", "UINT_MAX", false); + emit_arith_func_set("uint", "uint", "max_uint", "max", "0u", false); + emit_arith_set("uint", "uint", "and_uint", "&", "0xFFFFFFFFu", false); + emit_arith_set("uint", "uint", "or_uint", "|", "0u", false); + emit_arith_set("uint", "uint", "xor_uint", "^", "0u", false); + + // Integer arithmetic (int) — uses as_int/as_uint bitcasts + emit_arith_set("int", "int", "add_int", "+", "0", true); + emit_arith_set("int", "int", "mul_int", "*", "1", true); + emit_arith_func_set("int", "int", "min_int", "min", "INT_MAX", true); + emit_arith_func_set("int", "int", "max_int", "max", "INT_MIN", true); + emit_arith_set("int", "int", "and_int", "&", "as_int(0xFFFFFFFFu)", true); + emit_arith_set("int", "int", "or_int", "|", "0", true); + emit_arith_set("int", "int", "xor_int", "^", "0", true); + + // Float arithmetic — uses as_float/as_uint bitcasts + emit_arith_set("float", "float", "add_float", "+", "0.0f", true); + emit_arith_set("float", "float", "mul_float", "*", "1.0f", true); + emit_arith_func_set("float", "float", "min_float", "fmin", "INFINITY", true); + emit_arith_func_set("float", "float", "max_float", "fmax", "-INFINITY", true); + + // Logical operations (bool → uint mapping) + statement("static bool spv_emulate_reduce_logical_and(__local uint* scratch, bool val, " + "uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" bool r = true;"); + statement(" for (uint i = 0u; i < subgroup_size; i++)"); + statement(" r = r && (scratch[subgroup_base + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + statement("static bool spv_emulate_inclusive_scan_logical_and(__local uint* scratch, bool val, " + "uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" bool r = true;"); + statement(" for (uint i = 0u; i <= lane_id; i++)"); + statement(" r = r && (scratch[subgroup_base + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + statement("static bool spv_emulate_exclusive_scan_logical_and(__local uint* scratch, bool val, " + "uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" bool r = true;"); + statement(" for (uint i = 0u; i < lane_id; i++)"); + statement(" r = r && (scratch[subgroup_base + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + statement("static bool spv_emulate_clustered_reduce_logical_and(__local uint* scratch, bool val, " + "uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size;"); + statement(" bool r = true;"); + statement(" for (uint i = 0u; i < cluster_size; i++)"); + statement(" r = r && (scratch[subgroup_base + cluster_base_in_sg + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + // logical_or + statement("static bool spv_emulate_reduce_logical_or(__local uint* scratch, bool val, " + "uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" bool r = false;"); + statement(" for (uint i = 0u; i < subgroup_size; i++)"); + statement(" r = r || (scratch[subgroup_base + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + statement("static bool spv_emulate_inclusive_scan_logical_or(__local uint* scratch, bool val, " + "uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" bool r = false;"); + statement(" for (uint i = 0u; i <= lane_id; i++)"); + statement(" r = r || (scratch[subgroup_base + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + statement("static bool spv_emulate_exclusive_scan_logical_or(__local uint* scratch, bool val, " + "uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" bool r = false;"); + statement(" for (uint i = 0u; i < lane_id; i++)"); + statement(" r = r || (scratch[subgroup_base + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + statement("static bool spv_emulate_clustered_reduce_logical_or(__local uint* scratch, bool val, " + "uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size;"); + statement(" bool r = false;"); + statement(" for (uint i = 0u; i < cluster_size; i++)"); + statement(" r = r || (scratch[subgroup_base + cluster_base_in_sg + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + // logical_xor + statement("static bool spv_emulate_reduce_logical_xor(__local uint* scratch, bool val, " + "uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" bool r = false;"); + statement(" for (uint i = 0u; i < subgroup_size; i++)"); + statement(" r = r != (scratch[subgroup_base + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + statement("static bool spv_emulate_inclusive_scan_logical_xor(__local uint* scratch, bool val, " + "uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" bool r = false;"); + statement(" for (uint i = 0u; i <= lane_id; i++)"); + statement(" r = r != (scratch[subgroup_base + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + statement("static bool spv_emulate_exclusive_scan_logical_xor(__local uint* scratch, bool val, " + "uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" bool r = false;"); + statement(" for (uint i = 0u; i < lane_id; i++)"); + statement(" r = r != (scratch[subgroup_base + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + statement("static bool spv_emulate_clustered_reduce_logical_xor(__local uint* scratch, bool val, " + "uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) {"); + statement(" scratch[linear_id] = val ? 1u : 0u;"); + statement(" ", barrier_call, ";"); + statement(" uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size;"); + statement(" bool r = false;"); + statement(" for (uint i = 0u; i < cluster_size; i++)"); + statement(" r = r != (scratch[subgroup_base + cluster_base_in_sg + i] != 0u);"); + statement(" ", barrier_call, ";"); + statement(" return r;"); + statement("}"); + statement(""); + + // Ballot derived operations (pure arithmetic on uint4, no scratch needed) + statement("static bool spv_emulate_inverse_ballot(uint4 ballot, uint lane_id) {"); + statement(" uint word = lane_id / 32u;"); + statement(" uint bit = lane_id % 32u;"); + statement(" uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w;"); + statement(" return (v & (1u << bit)) != 0u;"); + statement("}"); + statement(""); + + statement("static bool spv_emulate_ballot_bit_extract(uint4 ballot, uint index) {"); + statement(" uint word = index / 32u;"); + statement(" uint bit = index % 32u;"); + statement(" uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w;"); + statement(" return (v & (1u << bit)) != 0u;"); + statement("}"); + statement(""); + + statement("static uint spv_popcount4(uint4 v) {"); + statement(" return popcount(v.x) + popcount(v.y) + popcount(v.z) + popcount(v.w);"); + statement("}"); + statement(""); + + statement("static uint spv_emulate_ballot_bit_count(uint4 ballot) {"); + statement(" return spv_popcount4(ballot);"); + statement("}"); + statement(""); + + statement("static uint spv_emulate_ballot_inclusive_bit_count(uint4 ballot, uint lane_id) {"); + statement(" uint4 masked = ballot;"); + statement(" uint word = lane_id / 32u;"); + statement(" uint bit = lane_id % 32u;"); + statement(" uint mask = (bit == 31u) ? 0xFFFFFFFFu : ((1u << (bit + 1u)) - 1u);"); + statement(" if (word == 0u) { masked.x &= mask; masked.y = 0u; masked.z = 0u; masked.w = 0u; }"); + statement(" else if (word == 1u) { masked.y &= mask; masked.z = 0u; masked.w = 0u; }"); + statement(" else if (word == 2u) { masked.z &= mask; masked.w = 0u; }"); + statement(" else { masked.w &= mask; }"); + statement(" return spv_popcount4(masked);"); + statement("}"); + statement(""); + + statement("static uint spv_emulate_ballot_exclusive_bit_count(uint4 ballot, uint lane_id) {"); + statement(" if (lane_id == 0u) return 0u;"); + statement(" return spv_emulate_ballot_inclusive_bit_count(ballot, lane_id - 1u);"); + statement("}"); + statement(""); + + statement("static uint spv_emulate_ballot_find_lsb(uint4 ballot) {"); + statement(" if (ballot.x != 0u) return (uint)clz(ballot.x & (0u - ballot.x));"); + statement(" if (ballot.y != 0u) return 32u + (uint)clz(ballot.y & (0u - ballot.y));"); + statement(" if (ballot.z != 0u) return 64u + (uint)clz(ballot.z & (0u - ballot.z));"); + statement(" if (ballot.w != 0u) return 96u + (uint)clz(ballot.w & (0u - ballot.w));"); + statement(" return ~0u;"); + statement("}"); + statement(""); + + statement("static uint spv_emulate_ballot_find_msb(uint4 ballot) {"); + statement(" if (ballot.w != 0u) return 127u - (uint)clz(ballot.w);"); + statement(" if (ballot.z != 0u) return 95u - (uint)clz(ballot.z);"); + statement(" if (ballot.y != 0u) return 63u - (uint)clz(ballot.y);"); + statement(" if (ballot.x != 0u) return 31u - (uint)clz(ballot.x);"); + statement(" return ~0u;"); + statement("}"); + statement(""); +} + +void CompilerOpenCL::emit_subgroup_op_emulated(const Instruction &i) +{ + const uint32_t *ops = stream(i); + auto op = static_cast(i.op); + + // Validate scope is Subgroup + if (op != OpGroupNonUniformQuadAllKHR && op != OpGroupNonUniformQuadAnyKHR) + { + auto scope = static_cast(evaluate_constant_u32(ops[2])); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t fixed = opencl_options.fixed_subgroup_size; + + // Request scratch buffer (triggers recompile if first time). + auto require_scratch = [this]() + { + if (!needs_subgroup_emulation_scratch) + { + needs_subgroup_emulation_scratch = true; + force_recompile(); + } + }; + + // Helper to get the as_uint cast for a value + auto to_uint_cast = [&](uint32_t value_id) -> string + { + auto &type = expression_type(value_id); + if (type.basetype == SPIRType::UInt) + return to_expression(value_id); + else if (type.basetype == SPIRType::Int) + return join("as_uint(", to_expression(value_id), ")"); + else if (type.basetype == SPIRType::Float) + return join("as_uint(", to_expression(value_id), ")"); + else if (type.basetype == SPIRType::Boolean) + return join("(", to_expression(value_id), " ? 1u : 0u)"); + return to_expression(value_id); + }; + + // Helper to wrap result with type cast from uint + auto from_uint_cast = [&](const string &expr, uint32_t value_id) -> string + { + auto &type = expression_type(value_id); + if (type.basetype == SPIRType::UInt) + return expr; + else if (type.basetype == SPIRType::Int) + return join("as_int(", expr, ")"); + else if (type.basetype == SPIRType::Float) + return join("as_float(", expr, ")"); + return expr; + }; + + // For emulated vector ops, decompose per-component calling the scalar helper. + auto emit_emulated_vec = [&](uint32_t value_id, const string &scalar_call_prefix, const string &scalar_call_suffix) + { + auto &type = expression_type(value_id); + if (type.vecsize > 1) + { + auto &out_type = get(result_type); + string expr = "(" + type_to_glsl(out_type) + ")("; + for (uint32_t c = 0; c < type.vecsize; c++) + { + if (c > 0) + expr += ", "; + string component = join(to_enclosed_expression(value_id), ".", "xyzw"[c]); + // Cast component to uint for the helper + string as_uint_comp; + if (type.basetype == SPIRType::UInt) + as_uint_comp = component; + else if (type.basetype == SPIRType::Int) + as_uint_comp = join("as_uint(", component, ")"); + else if (type.basetype == SPIRType::Float) + as_uint_comp = join("as_uint(", component, ")"); + else + as_uint_comp = component; + + string result_comp = scalar_call_prefix + as_uint_comp + scalar_call_suffix; + // Cast back from uint + if (type.basetype == SPIRType::Int) + result_comp = join("as_int(", result_comp, ")"); + else if (type.basetype == SPIRType::Float) + result_comp = join("as_float(", result_comp, ")"); + expr += result_comp; + } + expr += ")"; + emit_op(result_type, id, expr, should_forward(value_id)); + inherit_expression_dependencies(id, value_id); + } + else + { + string result_expr = scalar_call_prefix + to_uint_cast(value_id) + scalar_call_suffix; + result_expr = from_uint_cast(result_expr, value_id); + emit_op(result_type, id, result_expr, should_forward(value_id)); + inherit_expression_dependencies(id, value_id); + } + }; + + switch (op) + { + case OpGroupNonUniformElect: + if (fixed == 1) + emit_op(result_type, id, "true", true); + else + emit_op(result_type, id, "(_spv_lane_id == 0u)", true); + break; + + case OpGroupNonUniformAll: + require_scratch(); + if (fixed == 1) + { + emit_op(result_type, id, to_enclosed_expression(ops[3]), should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + else + { + emit_op(result_type, id, + join("spv_emulate_all(_spv_subgroup_scratch, ", to_expression(ops[3]), + ", _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"), + should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + break; + + case OpGroupNonUniformAny: + require_scratch(); + if (fixed == 1) + { + emit_op(result_type, id, to_enclosed_expression(ops[3]), should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + else + { + emit_op(result_type, id, + join("spv_emulate_any(_spv_subgroup_scratch, ", to_expression(ops[3]), + ", _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"), + should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + break; + + case OpGroupNonUniformAllEqual: + { + require_scratch(); + if (fixed == 1) + { + emit_op(result_type, id, "true", true); + } + else + { + emit_op(result_type, id, + join("spv_emulate_all_equal_uint(_spv_subgroup_scratch, ", to_uint_cast(ops[3]), + ", _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"), + should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + break; + } + + case OpGroupNonUniformBroadcast: + require_scratch(); + if (fixed == 1) + { + emit_op(result_type, id, to_enclosed_expression(ops[3]), should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + else + { + string src_lane = to_expression(ops[4]); + emit_emulated_vec(ops[3], "spv_emulate_broadcast_uint(_spv_subgroup_scratch, ", + join(", ", src_lane, ", _spv_linear_id, _spv_subgroup_base)")); + } + break; + + case OpGroupNonUniformBroadcastFirst: + require_scratch(); + if (fixed == 1) + { + emit_op(result_type, id, to_enclosed_expression(ops[3]), should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + else + { + emit_emulated_vec(ops[3], "spv_emulate_broadcast_first_uint(_spv_subgroup_scratch, ", + ", _spv_linear_id, _spv_subgroup_base)"); + } + break; + + case OpGroupNonUniformBallot: + require_scratch(); + if (fixed == 1) + { + emit_op(result_type, id, join("(", to_expression(ops[3]), " ? (uint4)(1u, 0u, 0u, 0u) : (uint4)(0u))"), + should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + else + { + emit_op(result_type, id, + join("spv_emulate_ballot(_spv_subgroup_scratch, ", to_expression(ops[3]), + ", _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"), + should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + break; + + case OpGroupNonUniformInverseBallot: + require_scratch(); + emit_op(result_type, id, join("spv_emulate_inverse_ballot(", to_expression(ops[3]), ", _spv_lane_id)"), + should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + break; + + case OpGroupNonUniformBallotBitExtract: + require_scratch(); + emit_op(result_type, id, + join("spv_emulate_ballot_bit_extract(", to_expression(ops[3]), ", ", to_expression(ops[4]), ")"), + should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + break; + + case OpGroupNonUniformBallotFindLSB: + require_scratch(); + emit_op(result_type, id, join("spv_emulate_ballot_find_lsb(", to_expression(ops[3]), ")"), + should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + break; + + case OpGroupNonUniformBallotFindMSB: + require_scratch(); + emit_op(result_type, id, join("spv_emulate_ballot_find_msb(", to_expression(ops[3]), ")"), + should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + break; + + case OpGroupNonUniformBallotBitCount: + { + require_scratch(); + auto operation = static_cast(ops[3]); + if (operation == GroupOperationReduce) + { + emit_op(result_type, id, join("spv_emulate_ballot_bit_count(", to_expression(ops[4]), ")"), + should_forward(ops[4])); + inherit_expression_dependencies(id, ops[4]); + } + else if (operation == GroupOperationInclusiveScan) + { + emit_op(result_type, id, + join("spv_emulate_ballot_inclusive_bit_count(", to_expression(ops[4]), ", _spv_lane_id)"), + should_forward(ops[4])); + inherit_expression_dependencies(id, ops[4]); + } + else if (operation == GroupOperationExclusiveScan) + { + emit_op(result_type, id, + join("spv_emulate_ballot_exclusive_bit_count(", to_expression(ops[4]), ", _spv_lane_id)"), + should_forward(ops[4])); + inherit_expression_dependencies(id, ops[4]); + } + else + SPIRV_CROSS_THROW("Invalid group operation for BallotBitCount."); + break; + } + + case OpGroupNonUniformShuffle: + require_scratch(); + if (fixed == 1) + { + emit_op(result_type, id, to_enclosed_expression(ops[3]), should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + else + { + string idx = to_expression(ops[4]); + emit_emulated_vec(ops[3], "spv_emulate_shuffle_uint(_spv_subgroup_scratch, ", + join(", ", idx, ", _spv_linear_id, _spv_subgroup_base)")); + } + break; + + case OpGroupNonUniformShuffleXor: + require_scratch(); + if (fixed == 1) + { + emit_op(result_type, id, to_enclosed_expression(ops[3]), should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + else + { + string mask = to_expression(ops[4]); + emit_emulated_vec(ops[3], "spv_emulate_shuffle_xor_uint(_spv_subgroup_scratch, ", + join(", ", mask, ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base)")); + } + break; + + case OpGroupNonUniformShuffleUp: + require_scratch(); + if (fixed == 1) + { + emit_op(result_type, id, to_enclosed_expression(ops[3]), should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + else + { + string delta = to_expression(ops[4]); + emit_emulated_vec(ops[3], "spv_emulate_shuffle_up_uint(_spv_subgroup_scratch, ", + join(", ", delta, ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base)")); + } + break; + + case OpGroupNonUniformShuffleDown: + require_scratch(); + if (fixed == 1) + { + emit_op(result_type, id, to_enclosed_expression(ops[3]), should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + else + { + string delta = to_expression(ops[4]); + emit_emulated_vec(ops[3], "spv_emulate_shuffle_down_uint(_spv_subgroup_scratch, ", + join(", ", delta, ", _spv_lane_id, _spv_linear_id, _spv_subgroup_size)")); + } + break; + + case OpGroupNonUniformRotateKHR: + require_scratch(); + if (fixed == 1) + { + emit_op(result_type, id, to_enclosed_expression(ops[3]), should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + else if (i.length > 5) + { + // Clustered rotate + string delta = to_expression(ops[4]); + string cluster_size = to_expression(ops[5]); + emit_emulated_vec( + ops[3], "spv_emulate_clustered_rotate_uint(_spv_subgroup_scratch, ", + join(", ", delta, ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, ", cluster_size, ")")); + } + else + { + string delta = to_expression(ops[4]); + emit_emulated_vec( + ops[3], "spv_emulate_rotate_uint(_spv_subgroup_scratch, ", + join(", ", delta, ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)")); + } + break; + + // === Arithmetic ops (Reduce / InclusiveScan / ExclusiveScan / ClusteredReduce) === + case OpGroupNonUniformFAdd: + case OpGroupNonUniformIAdd: + case OpGroupNonUniformFMul: + case OpGroupNonUniformIMul: + case OpGroupNonUniformFMin: + case OpGroupNonUniformFMax: + case OpGroupNonUniformSMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMin: + case OpGroupNonUniformUMax: + case OpGroupNonUniformBitwiseAnd: + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + case OpGroupNonUniformLogicalAnd: + case OpGroupNonUniformLogicalOr: + case OpGroupNonUniformLogicalXor: + { + require_scratch(); + auto operation = static_cast(ops[3]); + uint32_t value_id = ops[4]; + + if (fixed == 1) + { + // For subgroup_size==1: reduce/inclusive return val; exclusive returns identity. + if (operation == GroupOperationExclusiveScan) + { + // Return the identity value for the operation + auto &type = get(result_type); + string identity; + switch (op) + { + case OpGroupNonUniformFAdd: + case OpGroupNonUniformIAdd: + identity = (type.basetype == SPIRType::Float) ? "0.0f" : "0"; + break; + case OpGroupNonUniformFMul: + case OpGroupNonUniformIMul: + identity = (type.basetype == SPIRType::Float) ? "1.0f" : "1"; + break; + case OpGroupNonUniformFMin: + identity = "INFINITY"; + break; + case OpGroupNonUniformFMax: + identity = "-INFINITY"; + break; + case OpGroupNonUniformSMin: + identity = "INT_MAX"; + break; + case OpGroupNonUniformSMax: + identity = "INT_MIN"; + break; + case OpGroupNonUniformUMin: + identity = "UINT_MAX"; + break; + case OpGroupNonUniformUMax: + identity = "0u"; + break; + case OpGroupNonUniformBitwiseAnd: + identity = (type.basetype == SPIRType::Int) ? "as_int(0xFFFFFFFFu)" : "0xFFFFFFFFu"; + break; + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + identity = (type.basetype == SPIRType::Int) ? "0" : "0u"; + break; + case OpGroupNonUniformLogicalAnd: + identity = "true"; + break; + case OpGroupNonUniformLogicalOr: + case OpGroupNonUniformLogicalXor: + identity = "false"; + break; + default: + identity = "0"; + break; + } + + if (type.vecsize > 1) + emit_op(result_type, id, join("(", type_to_glsl(type), ")(", identity, ")"), true); + else + emit_op(result_type, id, identity, true); + } + else + { + // Reduce, InclusiveScan, ClusteredReduce all return the value itself + emit_op(result_type, id, to_enclosed_expression(value_id), should_forward(value_id)); + inherit_expression_dependencies(id, value_id); + } + break; + } + + // Determine the suffix for the helper function + const char *op_suffix = nullptr; + bool is_logical = false; + switch (op) + { + case OpGroupNonUniformFAdd: + op_suffix = "add_float"; + break; + case OpGroupNonUniformIAdd: + { + auto &type = expression_type(value_id); + op_suffix = (type.basetype == SPIRType::Int) ? "add_int" : "add_uint"; + break; + } + case OpGroupNonUniformFMul: + op_suffix = "mul_float"; + break; + case OpGroupNonUniformIMul: + { + auto &type = expression_type(value_id); + op_suffix = (type.basetype == SPIRType::Int) ? "mul_int" : "mul_uint"; + break; + } + case OpGroupNonUniformFMin: + op_suffix = "min_float"; + break; + case OpGroupNonUniformFMax: + op_suffix = "max_float"; + break; + case OpGroupNonUniformSMin: + op_suffix = "min_int"; + break; + case OpGroupNonUniformSMax: + op_suffix = "max_int"; + break; + case OpGroupNonUniformUMin: + op_suffix = "min_uint"; + break; + case OpGroupNonUniformUMax: + op_suffix = "max_uint"; + break; + case OpGroupNonUniformBitwiseAnd: + { + auto &type = expression_type(value_id); + op_suffix = (type.basetype == SPIRType::Int) ? "and_int" : "and_uint"; + break; + } + case OpGroupNonUniformBitwiseOr: + { + auto &type = expression_type(value_id); + op_suffix = (type.basetype == SPIRType::Int) ? "or_int" : "or_uint"; + break; + } + case OpGroupNonUniformBitwiseXor: + { + auto &type = expression_type(value_id); + op_suffix = (type.basetype == SPIRType::Int) ? "xor_int" : "xor_uint"; + break; + } + case OpGroupNonUniformLogicalAnd: + op_suffix = "logical_and"; + is_logical = true; + break; + case OpGroupNonUniformLogicalOr: + op_suffix = "logical_or"; + is_logical = true; + break; + case OpGroupNonUniformLogicalXor: + op_suffix = "logical_xor"; + is_logical = true; + break; + default: + SPIRV_CROSS_THROW("Unsupported arithmetic op for emulation."); + break; + } + + // Determine the group operation prefix + const char *group_prefix = nullptr; + switch (operation) + { + case GroupOperationReduce: + group_prefix = "reduce"; + break; + case GroupOperationInclusiveScan: + group_prefix = "inclusive_scan"; + break; + case GroupOperationExclusiveScan: + group_prefix = "exclusive_scan"; + break; + case GroupOperationClusteredReduce: + group_prefix = "clustered_reduce"; + break; + default: + SPIRV_CROSS_THROW("Unsupported group operation for emulation."); + break; + } + + string helper_name = join("spv_emulate_", group_prefix, "_", op_suffix); + + if (is_logical) + { + // Logical ops work on bool directly (scalar only) + string val_expr = to_expression(value_id); + string expr; + if (operation == GroupOperationClusteredReduce) + { + string cluster_size = to_expression(ops[5]); + expr = join(helper_name, "(_spv_subgroup_scratch, ", val_expr, ", ", cluster_size, + ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + } + else if (operation == GroupOperationReduce) + { + expr = join(helper_name, "(_spv_subgroup_scratch, ", val_expr, + ", _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + } + else + { + // inclusive/exclusive scan + expr = join(helper_name, "(_spv_subgroup_scratch, ", val_expr, + ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + } + emit_op(result_type, id, expr, should_forward(value_id)); + inherit_expression_dependencies(id, value_id); + } + else + { + // Arithmetic ops: use vector decomposition like native subgroup ops + auto &type = expression_type(value_id); + if (type.vecsize > 1) + { + auto &out_type = get(result_type); + string expr = "(" + type_to_glsl(out_type) + ")("; + for (uint32_t c = 0; c < type.vecsize; c++) + { + if (c > 0) + expr += ", "; + string component = join(to_enclosed_expression(value_id), ".", "xyzw"[c]); + string call; + if (operation == GroupOperationClusteredReduce) + { + string cluster_size = to_expression(ops[5]); + call = join(helper_name, "(_spv_subgroup_scratch, ", component, ", ", cluster_size, + ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + } + else if (operation == GroupOperationReduce) + { + call = join(helper_name, "(_spv_subgroup_scratch, ", component, + ", _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + } + else + { + call = join(helper_name, "(_spv_subgroup_scratch, ", component, + ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + } + expr += call; + } + expr += ")"; + emit_op(result_type, id, expr, should_forward(value_id)); + inherit_expression_dependencies(id, value_id); + } + else + { + string val_expr = to_expression(value_id); + string expr; + if (operation == GroupOperationClusteredReduce) + { + string cluster_size = to_expression(ops[5]); + expr = join(helper_name, "(_spv_subgroup_scratch, ", val_expr, ", ", cluster_size, + ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + } + else if (operation == GroupOperationReduce) + { + expr = join(helper_name, "(_spv_subgroup_scratch, ", val_expr, + ", _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + } + else + { + expr = join(helper_name, "(_spv_subgroup_scratch, ", val_expr, + ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + } + emit_op(result_type, id, expr, should_forward(value_id)); + inherit_expression_dependencies(id, value_id); + } + } + break; + } + + default: + SPIRV_CROSS_THROW("Unsupported subgroup op for OpenCL emulation."); + } +} + +void CompilerOpenCL::emit_subgroup_op(const Instruction &i) +{ + const uint32_t *ops = stream(i); + auto op = static_cast(i.op); + + if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) + { + emit_subgroup_op_emulated(i); + return; + } + + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup operations require enable_subgroups option."); + + // Validate scope is Subgroup + if (op != OpGroupNonUniformQuadAllKHR && op != OpGroupNonUniformQuadAnyKHR) + { + auto scope = static_cast(evaluate_constant_u32(ops[2])); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(i); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + // Helper to set an extension flag and trigger recompile if newly needed. + auto require_extension = [this](bool &flag) + { + if (!flag) + { + flag = true; + force_recompile(); + } + }; + + switch (op) + { + // === Task 5: cl_khr_subgroup_non_uniform_vote === + + case OpGroupNonUniformElect: + require_extension(needs_subgroup_vote); + emit_op(result_type, id, "sub_group_elect()", true); + break; + + case OpGroupNonUniformAllEqual: + { + require_extension(needs_subgroup_vote); + auto &type = expression_type(ops[3]); + if (type.vecsize > 1) + { + // OpenCL sub_group_non_uniform_all_equal only accepts scalars. + // For vectors, decompose into per-component calls combined with &&. + string expr; + for (uint32_t c = 0; c < type.vecsize; c++) + { + if (c > 0) + expr += " && "; + string component = join(to_enclosed_expression(ops[3]), ".", "xyzw"[c]); + expr += join("sub_group_non_uniform_all_equal(", component, ")"); + } + emit_op(result_type, id, expr, should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + else + { + emit_unary_func_op(result_type, id, ops[3], "sub_group_non_uniform_all_equal"); + } + break; + } + + // === Task 4: cl_khr_subgroups (base) — vote/broadcast === + + case OpGroupNonUniformAll: + emit_unary_func_op(result_type, id, ops[3], "sub_group_all"); + break; + + case OpGroupNonUniformAny: + emit_unary_func_op(result_type, id, ops[3], "sub_group_any"); + break; + + case OpGroupNonUniformBroadcast: + emit_subgroup_op_vec_binary(result_type, id, ops[3], ops[4], "sub_group_broadcast"); + break; + + // === Task 6: cl_khr_subgroup_ballot === + + case OpGroupNonUniformBroadcastFirst: + require_extension(needs_subgroup_ballot); + emit_subgroup_op_vec(result_type, id, ops[3], "sub_group_broadcast_first"); + break; + + case OpGroupNonUniformBallot: + require_extension(needs_subgroup_ballot); + emit_unary_func_op(result_type, id, ops[3], "sub_group_ballot"); + break; + + case OpGroupNonUniformInverseBallot: + require_extension(needs_subgroup_ballot); + emit_unary_func_op(result_type, id, ops[3], "sub_group_inverse_ballot"); + break; + + case OpGroupNonUniformBallotBitExtract: + require_extension(needs_subgroup_ballot); + emit_binary_func_op(result_type, id, ops[3], ops[4], "sub_group_ballot_bit_extract"); + break; + + case OpGroupNonUniformBallotFindLSB: + require_extension(needs_subgroup_ballot); + emit_unary_func_op(result_type, id, ops[3], "sub_group_ballot_find_lsb"); + break; + + case OpGroupNonUniformBallotFindMSB: + require_extension(needs_subgroup_ballot); + emit_unary_func_op(result_type, id, ops[3], "sub_group_ballot_find_msb"); + break; + + case OpGroupNonUniformBallotBitCount: + { + require_extension(needs_subgroup_ballot); + auto operation = static_cast(ops[3]); + if (operation == GroupOperationReduce) + emit_unary_func_op(result_type, id, ops[4], "sub_group_ballot_bit_count"); + else if (operation == GroupOperationInclusiveScan) + emit_unary_func_op(result_type, id, ops[4], "sub_group_ballot_inclusive_scan"); + else if (operation == GroupOperationExclusiveScan) + emit_unary_func_op(result_type, id, ops[4], "sub_group_ballot_exclusive_scan"); + else + SPIRV_CROSS_THROW("Invalid group operation for BallotBitCount."); + break; + } + + // === Tasks 4/7/10: Arithmetic ops (Reduce/Scan/Clustered) === + // The same SPIR-V opcodes are used for base cl_khr_subgroups (Reduce/InclusiveScan/ExclusiveScan + // with add/min/max), cl_khr_subgroup_non_uniform_arithmetic (all ops with Reduce/Scan), + // and cl_khr_subgroup_clustered_reduce (ClusteredReduce). + + // clang-format off + // OpenCL subgroup functions are scalar-only; vectors are decomposed per-component + // via emit_subgroup_op_vec / emit_subgroup_op_vec_binary. + +#define OPENCL_SUBGROUP_ARITH(spirv_op, base_name, nu_name) \ + case OpGroupNonUniform##spirv_op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_subgroup_op_vec(result_type, id, ops[4], "sub_group_reduce_" base_name); \ + else if (operation == GroupOperationInclusiveScan) \ + emit_subgroup_op_vec(result_type, id, ops[4], "sub_group_scan_inclusive_" base_name); \ + else if (operation == GroupOperationExclusiveScan) \ + emit_subgroup_op_vec(result_type, id, ops[4], "sub_group_scan_exclusive_" base_name); \ + else if (operation == GroupOperationClusteredReduce) \ + { \ + require_extension(needs_subgroup_clustered); \ + emit_subgroup_op_vec_binary(result_type, id, ops[4], ops[5], "sub_group_clustered_reduce_" base_name); \ + } \ + else \ + SPIRV_CROSS_THROW("Unsupported group operation."); \ + break; \ + } + +#define OPENCL_SUBGROUP_ARITH_CAST(spirv_op, base_name, nu_name, cast_type) \ + case OpGroupNonUniform##spirv_op: \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op_cast(result_type, id, ops[4], "sub_group_reduce_" base_name, cast_type, cast_type); \ + else if (operation == GroupOperationInclusiveScan) \ + emit_unary_func_op_cast(result_type, id, ops[4], "sub_group_scan_inclusive_" base_name, cast_type, cast_type); \ + else if (operation == GroupOperationExclusiveScan) \ + emit_unary_func_op_cast(result_type, id, ops[4], "sub_group_scan_exclusive_" base_name, cast_type, cast_type); \ else if (operation == GroupOperationClusteredReduce) \ { \ require_extension(needs_subgroup_clustered); \ @@ -4325,21 +5769,36 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) if (execution_scope == ScopeSubgroup) { - if (!opencl_options.enable_subgroups) - SPIRV_CROSS_THROW("Subgroup barriers require enable_subgroups option."); - - // Subgroup barrier with memory fence flags - const uint32_t all_barriers = - MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; - - if (semantics == 0 || (semantics & all_barriers) == all_barriers) + if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) { - statement("sub_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);"); + // Emulated subgroup barrier: no-op for subgroup_size==1, + // otherwise use work_group_barrier (over-syncs but correct). + if (opencl_options.fixed_subgroup_size != 1) + { + if (opencl_options.supports_opencl_version(2, 0)) + statement("work_group_barrier(CLK_LOCAL_MEM_FENCE);"); + else + statement("barrier(CLK_LOCAL_MEM_FENCE);"); + } } else { - string fence_flags = opencl_mem_fence_flags(semantics); - statement("sub_group_barrier(", fence_flags, ");"); + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup barriers require enable_subgroups option."); + + // Subgroup barrier with memory fence flags + const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | + MemorySemanticsImageMemoryMask; + + if (semantics == 0 || (semantics & all_barriers) == all_barriers) + { + statement("sub_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);"); + } + else + { + string fence_flags = opencl_mem_fence_flags(semantics); + statement("sub_group_barrier(", fence_flags, ");"); + } } } else @@ -4378,21 +5837,35 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) { if (memory_scope == ScopeSubgroup) { - if (!opencl_options.enable_subgroups) - SPIRV_CROSS_THROW("Subgroup memory barriers require enable_subgroups option."); - - const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | - MemorySemanticsImageMemoryMask; - - if ((semantics & all_barriers) == all_barriers || - (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))) + if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) { - statement("sub_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);"); + // Emulated: no-op for size==1, otherwise work_group_barrier + if (opencl_options.fixed_subgroup_size != 1) + { + if (opencl_options.supports_opencl_version(2, 0)) + statement("work_group_barrier(CLK_LOCAL_MEM_FENCE);"); + else + statement("barrier(CLK_LOCAL_MEM_FENCE);"); + } } else { - string fence_flags = opencl_mem_fence_flags(semantics); - statement("sub_group_barrier(", fence_flags, ");"); + if (!opencl_options.enable_subgroups) + SPIRV_CROSS_THROW("Subgroup memory barriers require enable_subgroups option."); + + const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | + MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; + + if ((semantics & all_barriers) == all_barriers || + (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))) + { + statement("sub_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);"); + } + else + { + string fence_flags = opencl_mem_fence_flags(semantics); + statement("sub_group_barrier(", fence_flags, ");"); + } } } else diff --git a/spirv_opencl.hpp b/spirv_opencl.hpp index fc3962ddf..a5e6c4bee 100644 --- a/spirv_opencl.hpp +++ b/spirv_opencl.hpp @@ -57,6 +57,8 @@ class CompilerOpenCL : public CompilerGLSL bool emulate_subgroups = false; // Size of subgroup emulation uint32_t fixed_subgroup_size = 0; + // Maximum workgroup size (used for scratch buffer sizing when reqd_work_group_size is absent) + uint32_t max_workgroup_size = 256; void set_opencl_version(uint32_t major, uint32_t minor = 0, uint32_t patch = 0) { @@ -205,6 +207,20 @@ class CompilerOpenCL : public CompilerGLSL bool needs_subgroup_clustered = false; bool needs_subgroup_rotate = false; + // Subgroup emulation scratch buffer flags (set during emit, trigger force_recompile). + bool needs_subgroup_emulation_scratch = false; + bool needs_subgroup_emulation_scratch64 = false; + // Set of function IDs that need subgroup emulation scratch parameters threaded. + std::unordered_set funcs_using_subgroup_emulation; + // Helpers to emit subgroup emulation polyfills and scratch infrastructure. + void emit_subgroup_emulation_helpers(); + void emit_subgroup_emulation_entry_point_vars(); + uint32_t get_emulation_max_workgroup_size() const; + std::string get_emulation_subgroup_size_expr() const; + void emit_subgroup_op_emulated(const Instruction &i); + std::string subgroup_emulation_scratch_type(bool is_64bit) const; + void scan_subgroup_emulation_usage(); + // Matrix type support: tracks which matrix signatures (basetype, vecsize, columns) are needed. struct MatrixTypeKey { diff --git a/test_shaders.py b/test_shaders.py index 309fefd10..fb9ec52c5 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -586,9 +586,9 @@ def cross_compile_hlsl(shader, spirv, opt, force_no_external_validation, iterati return (spirv_path, hlsl_path) def path_to_opencl_standard_cli(shader): - # clang seems warn about cl_khr_subgroups unless is specified. + # clang seems warn about cl_khr_subgroups unless 2.0 is specified. # Revisit when OpenCL 3.0 support is no longer experimental. - if '.subgroups.' in shader: + if '.subgroups.' in shader or '.subgroups-core.' in shader: return '200' # OpenCL 3.0 support in clang is experimental and 2.1 and 2.2 seem unsupported. if '.cl30.' in shader: @@ -618,8 +618,10 @@ def validate_shader_opencl(shader, opt, paths): if '.fp64.' in shader: extensions.append('cl_khr_fp64') if '.subgroups-emulate.' in shader: - if '.subgroups.' in shader: - extensions.append('cl_khr_subgroups') + # Make sure no extensions are included + pass + elif '.subgroups-core.' in shader: + extensions.append('cl_khr_subgroups') elif '.subgroups.' in shader: extensions.append('cl_khr_subgroups') extensions.append('cl_khr_subgroup_ballot') @@ -703,12 +705,14 @@ def cross_compile_opencl(shader, spirv, opt, iterations, paths): opencl_args.append('--opencl-fp16') if '.fp64.' in shader: opencl_args.append('--opencl-fp64') - if '.subgroups.' in shader: - opencl_args.append('--opencl-subgroups-all') if '.subgroups-emulate.' in shader: + if '.subgroups.' in shader: + opencl_args.append('--opencl-subgroups-all') opencl_args.append('--opencl-emulate-subgroups') opencl_args.append('--opencl-fixed-subgroup-size') opencl_args.append('32') + elif '.subgroups.' in shader: + opencl_args.append('--opencl-subgroups-all') if shader_is_invalid_spirv(shader): subprocess.run(opencl_args) From b9f85fb90f935271d7b3c7eccbf961517d8c0960 Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Tue, 17 Mar 2026 10:17:15 -0700 Subject: [PATCH 14/16] OpenCL: Support for partial subgroups emulation. --- ...subgroups-basic.nocompat.vk.subgroups.comp | 76 +- ...subgroups-basic.nocompat.vk.subgroups.comp | 12 +- spirv_opencl.cpp | 889 +++++++++++++++++- spirv_opencl.hpp | 3 + test_shaders.py | 8 +- 5 files changed, 931 insertions(+), 57 deletions(-) diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp b/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp index ab81e408a..32cba0e4e 100644 --- a/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp +++ b/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp @@ -1,6 +1,7 @@ // Generated from SPIR-V by SPIRV-Cross (OpenCL backend) #pragma OPENCL EXTENSION cl_khr_subgroups : enable +#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable struct SSBO { @@ -11,46 +12,55 @@ struct SSBO typedef struct SSBO SSBO; +float helper( float* val_1) +{ + float reduced_1 = sub_group_reduce_add((*val_1)); + bool elected_1 = sub_group_elect(); + return elected_1 ? reduced_1 : 0.0f; +} + __attribute__((reqd_work_group_size(256, 1, 1))) -__kernel void comp_main(__global SSBO* _11) +__kernel void comp_main(__global SSBO* _30) { uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); - _11->FragColor = convert_float(get_num_sub_groups()); - _11->FragColor = convert_float(get_sub_group_id()); - _11->FragColor = convert_float(get_sub_group_size()); - _11->FragColor = convert_float(get_sub_group_local_id()); + _30->FragColor = convert_float(get_num_sub_groups()); + _30->FragColor = convert_float(get_sub_group_id()); + _30->FragColor = convert_float(get_sub_group_size()); + _30->FragColor = convert_float(get_sub_group_local_id()); sub_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); sub_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); sub_group_barrier(CLK_GLOBAL_MEM_FENCE); sub_group_barrier(CLK_LOCAL_MEM_FENCE); sub_group_barrier(CLK_GLOBAL_MEM_FENCE); - bool has_all = sub_group_all(true); - bool has_any = sub_group_any(true); - uint broadcasted = sub_group_broadcast(42u, 0u); - float fadd = sub_group_reduce_add(_11->FragColor); - int iadd = sub_group_reduce_add(_11->idat); - float fmin = sub_group_reduce_min(_11->FragColor); - float fmax = sub_group_reduce_max(_11->FragColor); - int smin = sub_group_reduce_min(_11->idat); - int smax = sub_group_reduce_max(_11->idat); - uint umin = sub_group_reduce_min(_11->udat); - uint umax = sub_group_reduce_max(_11->udat); - float finc_add = sub_group_scan_inclusive_add(_11->FragColor); - float finc_min = sub_group_scan_inclusive_min(_11->FragColor); - float finc_max = sub_group_scan_inclusive_max(_11->FragColor); - int iinc_add = sub_group_scan_inclusive_add(_11->idat); - int iinc_min = sub_group_scan_inclusive_min(_11->idat); - int iinc_max = sub_group_scan_inclusive_max(_11->idat); - uint uinc_min = sub_group_scan_inclusive_min(_11->udat); - uint uinc_max = sub_group_scan_inclusive_max(_11->udat); - float fexc_add = sub_group_scan_exclusive_add(_11->FragColor); - float fexc_min = sub_group_scan_exclusive_min(_11->FragColor); - float fexc_max = sub_group_scan_exclusive_max(_11->FragColor); - int iexc_add = sub_group_scan_exclusive_add(_11->idat); - int iexc_min = sub_group_scan_exclusive_min(_11->idat); - int iexc_max = sub_group_scan_exclusive_max(_11->idat); - uint uexc_min = sub_group_scan_exclusive_min(_11->udat); - uint uexc_max = sub_group_scan_exclusive_max(_11->udat); - _11->FragColor = (((((((((((fadd + fmin) + fmax) + finc_add) + finc_min) + finc_max) + fexc_add) + fexc_min) + fexc_max) + convert_float((((((((iadd + smin) + smax) + iinc_add) + iinc_min) + iinc_max) + iexc_add) + iexc_min) + iexc_max)) + convert_float((((((umin + umax) + uinc_min) + uinc_max) + uexc_min) + uexc_max) + broadcasted)) + (float)(has_all)) + (float)(has_any); + bool has_all_1 = sub_group_all(true); + bool has_any_1 = sub_group_any(true); + uint broadcasted_1 = sub_group_broadcast(42u, 0u); + float fadd_1 = sub_group_reduce_add(_30->FragColor); + int iadd_1 = sub_group_reduce_add(_30->idat); + float fmin_1 = sub_group_reduce_min(_30->FragColor); + float fmax_1 = sub_group_reduce_max(_30->FragColor); + int smin_1 = sub_group_reduce_min(_30->idat); + int smax_1 = sub_group_reduce_max(_30->idat); + uint umin_1 = sub_group_reduce_min(_30->udat); + uint umax_1 = sub_group_reduce_max(_30->udat); + float finc_add_1 = sub_group_scan_inclusive_add(_30->FragColor); + float finc_min_1 = sub_group_scan_inclusive_min(_30->FragColor); + float finc_max_1 = sub_group_scan_inclusive_max(_30->FragColor); + int iinc_add_1 = sub_group_scan_inclusive_add(_30->idat); + int iinc_min_1 = sub_group_scan_inclusive_min(_30->idat); + int iinc_max_1 = sub_group_scan_inclusive_max(_30->idat); + uint uinc_min_1 = sub_group_scan_inclusive_min(_30->udat); + uint uinc_max_1 = sub_group_scan_inclusive_max(_30->udat); + float fexc_add_1 = sub_group_scan_exclusive_add(_30->FragColor); + float fexc_min_1 = sub_group_scan_exclusive_min(_30->FragColor); + float fexc_max_1 = sub_group_scan_exclusive_max(_30->FragColor); + int iexc_add_1 = sub_group_scan_exclusive_add(_30->idat); + int iexc_min_1 = sub_group_scan_exclusive_min(_30->idat); + int iexc_max_1 = sub_group_scan_exclusive_max(_30->idat); + uint uexc_min_1 = sub_group_scan_exclusive_min(_30->udat); + uint uexc_max_1 = sub_group_scan_exclusive_max(_30->udat); + float param_1 = _30->FragColor; + float from_helper_1 = helper(¶m_1); + _30->FragColor = ((((((((((((fadd_1 + fmin_1) + fmax_1) + finc_add_1) + finc_min_1) + finc_max_1) + fexc_add_1) + fexc_min_1) + fexc_max_1) + convert_float((((((((iadd_1 + smin_1) + smax_1) + iinc_add_1) + iinc_min_1) + iinc_max_1) + iexc_add_1) + iexc_min_1) + iexc_max_1)) + convert_float((((((umin_1 + umax_1) + uinc_min_1) + uinc_max_1) + uexc_min_1) + uexc_max_1) + broadcasted_1)) + (float)(has_all_1)) + (float)(has_any_1)) + from_helper_1; } diff --git a/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp b/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp index da981bccf..927d5e9f4 100644 --- a/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp +++ b/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.comp @@ -12,6 +12,13 @@ layout(std430, binding = 0) buffer SSBO uint udat; }; +float helper(float val) +{ + float reduced = subgroupAdd(val); + bool elected = subgroupElect(); + return elected ? reduced : 0.0; +} + void main() { // Builtins @@ -64,9 +71,12 @@ void main() uint uexc_min = subgroupExclusiveMin(udat); uint uexc_max = subgroupExclusiveMax(udat); + // Call helper function that uses subgroup ops + float from_helper = helper(FragColor); + // Write results to prevent dead-code elimination FragColor = fadd + fmin + fmax + finc_add + finc_min + finc_max + fexc_add + fexc_min + fexc_max + float(iadd + smin + smax + iinc_add + iinc_min + iinc_max + iexc_add + iexc_min + iexc_max) + float(umin + umax + uinc_min + uinc_max + uexc_min + uexc_max + broadcasted) - + float(has_all) + float(has_any); + + float(has_all) + float(has_any) + from_helper; } diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp index 6bddfa00f..b820f447c 100644 --- a/spirv_opencl.cpp +++ b/spirv_opencl.cpp @@ -181,20 +181,26 @@ void CompilerOpenCL::emit_header() statement("#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable"); if (opencl_options.enable_subgroups) statement("#pragma OPENCL EXTENSION cl_khr_subgroups : enable"); - if (needs_subgroup_vote) - statement("#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable"); - if (needs_subgroup_ballot) - statement("#pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable"); - if (needs_subgroup_arithmetic) - statement("#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable"); - if (needs_subgroup_shuffle) - statement("#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable"); - if (needs_subgroup_shuffle_relative) - statement("#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable"); - if (needs_subgroup_clustered) - statement("#pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable"); - if (needs_subgroup_rotate) - statement("#pragma OPENCL EXTENSION cl_khr_subgroup_rotate : enable"); + // In combined mode, extension-specific pragmas are emitted inside #ifdef blocks + // in the wrapper section, not here. + bool combined_subgroup_mode = opencl_options.emulate_subgroups && opencl_options.enable_subgroups; + if (!combined_subgroup_mode) + { + if (needs_subgroup_vote) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable"); + if (needs_subgroup_ballot) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable"); + if (needs_subgroup_arithmetic) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable"); + if (needs_subgroup_shuffle) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable"); + if (needs_subgroup_shuffle_relative) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable"); + if (needs_subgroup_clustered) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable"); + if (needs_subgroup_rotate) + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_rotate : enable"); + } statement(""); // Emit FP_CONTRACT pragma based on ContractionOff execution mode and FPFastMathDefault. @@ -771,8 +777,11 @@ void CompilerOpenCL::emit_resources() statement(""); } - // Subgroup emulation helper functions. - emit_subgroup_emulation_helpers(); + // Subgroup emulation helper functions and combined-mode wrappers. + if (opencl_options.emulate_subgroups && opencl_options.enable_subgroups) + emit_subgroup_combined_wrappers(); + else + emit_subgroup_emulation_helpers(); // Default sampler for combined image+sampler usage (OpenCL C requires file-scope const sampler_t). if (needs_default_sampler) @@ -1003,7 +1012,9 @@ void CompilerOpenCL::emit_entry_point_declarations() } // Emit subgroup emulation local variables and scratch buffers. - if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) + if (opencl_options.emulate_subgroups && opencl_options.enable_subgroups) + emit_subgroup_combined_entry_point_vars(); + else if (opencl_options.emulate_subgroups && !opencl_options.enable_subgroups) emit_subgroup_emulation_entry_point_vars(); // Materialize Input builtin variables as local variables. @@ -1132,6 +1143,20 @@ string CompilerOpenCL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) } return "spv_subgroup_eq_mask(_spv_lane_id)"; } + if (opencl_options.emulate_subgroups && opencl_options.enable_subgroups) + { + if (!needs_subgroup_ballot) + { + needs_subgroup_ballot = true; + force_recompile(); + } + if (!needs_subgroup_emulation_scratch) + { + needs_subgroup_emulation_scratch = true; + force_recompile(); + } + return "spv_get_sub_group_eq_mask(_spv_lane_id)"; + } if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); if (!needs_subgroup_ballot) @@ -1150,6 +1175,20 @@ string CompilerOpenCL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) } return "spv_subgroup_ge_mask(_spv_lane_id, _spv_subgroup_size)"; } + if (opencl_options.emulate_subgroups && opencl_options.enable_subgroups) + { + if (!needs_subgroup_ballot) + { + needs_subgroup_ballot = true; + force_recompile(); + } + if (!needs_subgroup_emulation_scratch) + { + needs_subgroup_emulation_scratch = true; + force_recompile(); + } + return "spv_get_sub_group_ge_mask(_spv_lane_id, _spv_subgroup_size)"; + } if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); if (!needs_subgroup_ballot) @@ -1168,6 +1207,20 @@ string CompilerOpenCL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) } return "spv_subgroup_gt_mask(_spv_lane_id, _spv_subgroup_size)"; } + if (opencl_options.emulate_subgroups && opencl_options.enable_subgroups) + { + if (!needs_subgroup_ballot) + { + needs_subgroup_ballot = true; + force_recompile(); + } + if (!needs_subgroup_emulation_scratch) + { + needs_subgroup_emulation_scratch = true; + force_recompile(); + } + return "spv_get_sub_group_gt_mask(_spv_lane_id, _spv_subgroup_size)"; + } if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); if (!needs_subgroup_ballot) @@ -1186,6 +1239,20 @@ string CompilerOpenCL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) } return "spv_subgroup_le_mask(_spv_lane_id, _spv_subgroup_size)"; } + if (opencl_options.emulate_subgroups && opencl_options.enable_subgroups) + { + if (!needs_subgroup_ballot) + { + needs_subgroup_ballot = true; + force_recompile(); + } + if (!needs_subgroup_emulation_scratch) + { + needs_subgroup_emulation_scratch = true; + force_recompile(); + } + return "spv_get_sub_group_le_mask(_spv_lane_id, _spv_subgroup_size)"; + } if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); if (!needs_subgroup_ballot) @@ -1204,6 +1271,20 @@ string CompilerOpenCL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) } return "spv_subgroup_lt_mask(_spv_lane_id)"; } + if (opencl_options.emulate_subgroups && opencl_options.enable_subgroups) + { + if (!needs_subgroup_ballot) + { + needs_subgroup_ballot = true; + force_recompile(); + } + if (!needs_subgroup_emulation_scratch) + { + needs_subgroup_emulation_scratch = true; + force_recompile(); + } + return "spv_get_sub_group_lt_mask(_spv_lane_id)"; + } if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup builtins require enable_subgroups option."); if (!needs_subgroup_ballot) @@ -3533,7 +3614,7 @@ void CompilerOpenCL::emit_subgroup_emulation_entry_point_vars() void CompilerOpenCL::scan_subgroup_emulation_usage() { - if (!opencl_options.emulate_subgroups || opencl_options.enable_subgroups) + if (!opencl_options.emulate_subgroups) return; funcs_using_subgroup_emulation.clear(); @@ -4179,6 +4260,739 @@ void CompilerOpenCL::emit_subgroup_emulation_helpers() statement(""); } +void CompilerOpenCL::emit_subgroup_combined_entry_point_vars() +{ + // In combined mode, derive subgroup geometry from native cl_khr_subgroups builtins. + statement("uint _spv_subgroup_size = get_sub_group_size();"); + statement("uint _spv_lane_id = get_sub_group_local_id();"); + statement("uint _spv_subgroup_id = get_sub_group_id();"); + statement("uint _spv_linear_id = _spv_subgroup_id * _spv_subgroup_size + _spv_lane_id;"); + statement("uint _spv_subgroup_base = _spv_subgroup_id * _spv_subgroup_size;"); + + if (needs_subgroup_emulation_scratch) + { + uint32_t max_wg = get_emulation_max_workgroup_size(); + statement("__local uint _spv_subgroup_scratch[", max_wg, "];"); + } + if (needs_subgroup_emulation_scratch64) + { + uint32_t max_wg = get_emulation_max_workgroup_size(); + statement("__local ulong _spv_subgroup_scratch64[", max_wg, "];"); + } +} + +void CompilerOpenCL::emit_subgroup_combined_wrappers() +{ + if (!opencl_options.emulate_subgroups || !opencl_options.enable_subgroups) + return; + if (!needs_subgroup_emulation_scratch) + return; + + // Emit all emulation helpers unconditionally (unused static functions are DCE'd by compiler). + // These are the same helpers as pure emulation mode. + emit_subgroup_emulation_helpers(); + + // Now emit per-extension #ifdef/#else macro blocks. + // In the #ifdef path: macros map spv_* to native calls. + // In the #else path: macros map spv_* to the emulation helpers emitted above. + + // === cl_khr_subgroup_non_uniform_vote === + if (needs_subgroup_vote) + { + statement("#ifdef cl_khr_subgroup_non_uniform_vote"); + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable"); + statement("#define spv_sub_group_elect(lane_id) sub_group_elect()"); + statement("#define spv_all_equal_uint(scratch, val, linear_id, subgroup_base, subgroup_size) " + "sub_group_non_uniform_all_equal((val))"); + statement("#else"); + statement("#define spv_sub_group_elect(lane_id) ((lane_id) == 0u)"); + statement("#define spv_all_equal_uint(scratch, val, linear_id, subgroup_base, subgroup_size) " + "spv_emulate_all_equal_uint((scratch), (val), (linear_id), (subgroup_base), (subgroup_size))"); + statement("#endif"); + statement(""); + } + + // === cl_khr_subgroup_ballot === + if (needs_subgroup_ballot) + { + statement("#ifdef cl_khr_subgroup_ballot"); + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_ballot : enable"); + // Native macros + statement("#define spv_broadcast_first_uint(scratch, val, linear_id, subgroup_base) " + "sub_group_broadcast_first((val))"); + statement("#define spv_ballot(scratch, pred, linear_id, subgroup_base, subgroup_size) " + "sub_group_ballot((pred))"); + statement("#define spv_inverse_ballot(ballot, lane_id) " + "sub_group_inverse_ballot((ballot))"); + statement("#define spv_ballot_bit_extract(ballot, idx) " + "sub_group_ballot_bit_extract((ballot), (idx))"); + statement("#define spv_ballot_bit_count(ballot) " + "sub_group_ballot_bit_count((ballot))"); + statement("#define spv_ballot_inclusive_bit_count(ballot, lane_id) " + "sub_group_ballot_inclusive_scan((ballot))"); + statement("#define spv_ballot_exclusive_bit_count(ballot, lane_id) " + "sub_group_ballot_exclusive_scan((ballot))"); + statement("#define spv_ballot_find_lsb(ballot) sub_group_ballot_find_lsb((ballot))"); + statement("#define spv_ballot_find_msb(ballot) sub_group_ballot_find_msb((ballot))"); + // Mask builtins + statement("#define spv_get_sub_group_eq_mask(lane_id) get_sub_group_eq_mask()"); + statement("#define spv_get_sub_group_ge_mask(lane_id, sg_size) get_sub_group_ge_mask()"); + statement("#define spv_get_sub_group_gt_mask(lane_id, sg_size) get_sub_group_gt_mask()"); + statement("#define spv_get_sub_group_le_mask(lane_id, sg_size) get_sub_group_le_mask()"); + statement("#define spv_get_sub_group_lt_mask(lane_id) get_sub_group_lt_mask()"); + statement("#else"); + // Emulation macros + statement("#define spv_broadcast_first_uint(scratch, val, linear_id, subgroup_base) " + "spv_emulate_broadcast_first_uint((scratch), (val), (linear_id), (subgroup_base))"); + statement("#define spv_ballot(scratch, pred, linear_id, subgroup_base, subgroup_size) " + "spv_emulate_ballot((scratch), (pred), (linear_id), (subgroup_base), (subgroup_size))"); + statement("#define spv_inverse_ballot(ballot, lane_id) " + "spv_emulate_inverse_ballot((ballot), (lane_id))"); + statement("#define spv_ballot_bit_extract(ballot, idx) " + "spv_emulate_ballot_bit_extract((ballot), (idx))"); + statement("#define spv_ballot_bit_count(ballot) " + "spv_emulate_ballot_bit_count((ballot))"); + statement("#define spv_ballot_inclusive_bit_count(ballot, lane_id) " + "spv_emulate_ballot_inclusive_bit_count((ballot), (lane_id))"); + statement("#define spv_ballot_exclusive_bit_count(ballot, lane_id) " + "spv_emulate_ballot_exclusive_bit_count((ballot), (lane_id))"); + statement("#define spv_ballot_find_lsb(ballot) spv_emulate_ballot_find_lsb((ballot))"); + statement("#define spv_ballot_find_msb(ballot) spv_emulate_ballot_find_msb((ballot))"); + statement("#define spv_get_sub_group_eq_mask(lane_id) spv_subgroup_eq_mask((lane_id))"); + statement("#define spv_get_sub_group_ge_mask(lane_id, sg_size) spv_subgroup_ge_mask((lane_id), (sg_size))"); + statement("#define spv_get_sub_group_gt_mask(lane_id, sg_size) spv_subgroup_gt_mask((lane_id), (sg_size))"); + statement("#define spv_get_sub_group_le_mask(lane_id, sg_size) spv_subgroup_le_mask((lane_id), (sg_size))"); + statement("#define spv_get_sub_group_lt_mask(lane_id) spv_subgroup_lt_mask((lane_id))"); + statement("#endif"); + statement(""); + } + + // === cl_khr_subgroup_shuffle === + if (needs_subgroup_shuffle) + { + statement("#ifdef cl_khr_subgroup_shuffle"); + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle : enable"); + statement("#define spv_shuffle_uint(scratch, val, idx, linear_id, subgroup_base) " + "sub_group_shuffle((val), (idx))"); + statement("#define spv_shuffle_xor_uint(scratch, val, mask, lane_id, linear_id, subgroup_base) " + "sub_group_shuffle_xor((val), (mask))"); + statement("#else"); + statement("#define spv_shuffle_uint(scratch, val, idx, linear_id, subgroup_base) " + "spv_emulate_shuffle_uint((scratch), (val), (idx), (linear_id), (subgroup_base))"); + statement("#define spv_shuffle_xor_uint(scratch, val, mask, lane_id, linear_id, subgroup_base) " + "spv_emulate_shuffle_xor_uint((scratch), (val), (mask), (lane_id), (linear_id), (subgroup_base))"); + statement("#endif"); + statement(""); + } + + // === cl_khr_subgroup_shuffle_relative === + if (needs_subgroup_shuffle_relative) + { + statement("#ifdef cl_khr_subgroup_shuffle_relative"); + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_shuffle_relative : enable"); + statement("#define spv_shuffle_up_uint(scratch, val, delta, lane_id, linear_id, subgroup_base) " + "sub_group_shuffle_up((val), (delta))"); + statement("#define spv_shuffle_down_uint(scratch, val, delta, lane_id, linear_id, subgroup_size) " + "sub_group_shuffle_down((val), (delta))"); + statement("#else"); + statement("#define spv_shuffle_up_uint(scratch, val, delta, lane_id, linear_id, subgroup_base) " + "spv_emulate_shuffle_up_uint((scratch), (val), (delta), (lane_id), (linear_id), (subgroup_base))"); + statement("#define spv_shuffle_down_uint(scratch, val, delta, lane_id, linear_id, subgroup_size) " + "spv_emulate_shuffle_down_uint((scratch), (val), (delta), (lane_id), (linear_id), (subgroup_size))"); + statement("#endif"); + statement(""); + } + + // === cl_khr_subgroup_rotate === + if (needs_subgroup_rotate) + { + statement("#ifdef cl_khr_subgroup_rotate"); + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_rotate : enable"); + statement("#define spv_rotate_uint(scratch, val, delta, lane_id, linear_id, subgroup_base, subgroup_size) " + "sub_group_rotate((val), (delta))"); + statement("#define spv_clustered_rotate_uint(scratch, val, delta, lane_id, linear_id, subgroup_base, " + "cluster_size) sub_group_clustered_rotate((val), (delta), (cluster_size))"); + statement("#else"); + statement("#define spv_rotate_uint(scratch, val, delta, lane_id, linear_id, subgroup_base, subgroup_size) " + "spv_emulate_rotate_uint((scratch), (val), (delta), (lane_id), (linear_id), (subgroup_base), " + "(subgroup_size))"); + statement("#define spv_clustered_rotate_uint(scratch, val, delta, lane_id, linear_id, subgroup_base, " + "cluster_size) spv_emulate_clustered_rotate_uint((scratch), (val), (delta), (lane_id), " + "(linear_id), (subgroup_base), (cluster_size))"); + statement("#endif"); + statement(""); + } + + // === cl_khr_subgroup_non_uniform_arithmetic === + // Covers: mul, and, or, xor, logical_and/or/xor for Reduce/InclusiveScan/ExclusiveScan + if (needs_subgroup_arithmetic) + { + statement("#ifdef cl_khr_subgroup_non_uniform_arithmetic"); + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_arithmetic : enable"); + + // For each non-base arithmetic op, emit native macro. + // Native functions are generic (overloaded), so one macro per type-suffix works. + auto emit_arith_macros_native = [&](const char *op_suffix, const char *native_reduce, + const char *native_inclusive, const char *native_exclusive) + { + statement("#define spv_reduce_", op_suffix, "(scratch, val, linear_id, subgroup_base, subgroup_size) ", + native_reduce, "((val))"); + statement("#define spv_inclusive_scan_", op_suffix, + "(scratch, val, lane_id, linear_id, subgroup_base, subgroup_size) ", native_inclusive, "((val))"); + statement("#define spv_exclusive_scan_", op_suffix, + "(scratch, val, lane_id, linear_id, subgroup_base, subgroup_size) ", native_exclusive, "((val))"); + }; + + emit_arith_macros_native("mul_uint", "sub_group_non_uniform_reduce_mul", + "sub_group_non_uniform_scan_inclusive_mul", + "sub_group_non_uniform_scan_exclusive_mul"); + emit_arith_macros_native("mul_int", "sub_group_non_uniform_reduce_mul", + "sub_group_non_uniform_scan_inclusive_mul", + "sub_group_non_uniform_scan_exclusive_mul"); + emit_arith_macros_native("mul_float", "sub_group_non_uniform_reduce_mul", + "sub_group_non_uniform_scan_inclusive_mul", + "sub_group_non_uniform_scan_exclusive_mul"); + emit_arith_macros_native("and_uint", "sub_group_non_uniform_reduce_and", + "sub_group_non_uniform_scan_inclusive_and", + "sub_group_non_uniform_scan_exclusive_and"); + emit_arith_macros_native("and_int", "sub_group_non_uniform_reduce_and", + "sub_group_non_uniform_scan_inclusive_and", + "sub_group_non_uniform_scan_exclusive_and"); + emit_arith_macros_native("or_uint", "sub_group_non_uniform_reduce_or", + "sub_group_non_uniform_scan_inclusive_or", "sub_group_non_uniform_scan_exclusive_or"); + emit_arith_macros_native("or_int", "sub_group_non_uniform_reduce_or", "sub_group_non_uniform_scan_inclusive_or", + "sub_group_non_uniform_scan_exclusive_or"); + emit_arith_macros_native("xor_uint", "sub_group_non_uniform_reduce_xor", + "sub_group_non_uniform_scan_inclusive_xor", + "sub_group_non_uniform_scan_exclusive_xor"); + emit_arith_macros_native("xor_int", "sub_group_non_uniform_reduce_xor", + "sub_group_non_uniform_scan_inclusive_xor", + "sub_group_non_uniform_scan_exclusive_xor"); + emit_arith_macros_native("logical_and", "sub_group_non_uniform_reduce_logical_and", + "sub_group_non_uniform_scan_inclusive_logical_and", + "sub_group_non_uniform_scan_exclusive_logical_and"); + emit_arith_macros_native("logical_or", "sub_group_non_uniform_reduce_logical_or", + "sub_group_non_uniform_scan_inclusive_logical_or", + "sub_group_non_uniform_scan_exclusive_logical_or"); + emit_arith_macros_native("logical_xor", "sub_group_non_uniform_reduce_logical_xor", + "sub_group_non_uniform_scan_inclusive_logical_xor", + "sub_group_non_uniform_scan_exclusive_logical_xor"); + + statement("#else"); + + auto emit_arith_macros_emulated = [&](const char *op_suffix) + { + statement("#define spv_reduce_", op_suffix, + "(scratch, val, linear_id, subgroup_base, subgroup_size) " + "spv_emulate_reduce_", + op_suffix, "((scratch), (val), (linear_id), (subgroup_base), (subgroup_size))"); + statement("#define spv_inclusive_scan_", op_suffix, + "(scratch, val, lane_id, linear_id, subgroup_base, subgroup_size) " + "spv_emulate_inclusive_scan_", + op_suffix, "((scratch), (val), (lane_id), (linear_id), (subgroup_base), (subgroup_size))"); + statement("#define spv_exclusive_scan_", op_suffix, + "(scratch, val, lane_id, linear_id, subgroup_base, subgroup_size) " + "spv_emulate_exclusive_scan_", + op_suffix, "((scratch), (val), (lane_id), (linear_id), (subgroup_base), (subgroup_size))"); + }; + + for (const char *suffix : { "mul_uint", "mul_int", "mul_float", "and_uint", "and_int", "or_uint", "or_int", + "xor_uint", "xor_int", "logical_and", "logical_or", "logical_xor" }) + emit_arith_macros_emulated(suffix); + + statement("#endif"); + statement(""); + } + + // === cl_khr_subgroup_clustered_reduce === + // Covers ALL ops (including add/min/max) with ClusteredReduce + if (needs_subgroup_clustered) + { + statement("#ifdef cl_khr_subgroup_clustered_reduce"); + statement("#pragma OPENCL EXTENSION cl_khr_subgroup_clustered_reduce : enable"); + + auto emit_clustered_native = [&](const char *op_suffix, const char *native_func) + { + statement("#define spv_clustered_reduce_", op_suffix, + "(scratch, val, cluster, lane_id, linear_id, subgroup_base, subgroup_size) ", native_func, + "((val), (cluster))"); + }; + + emit_clustered_native("add_uint", "sub_group_clustered_reduce_add"); + emit_clustered_native("add_int", "sub_group_clustered_reduce_add"); + emit_clustered_native("add_float", "sub_group_clustered_reduce_add"); + emit_clustered_native("mul_uint", "sub_group_clustered_reduce_mul"); + emit_clustered_native("mul_int", "sub_group_clustered_reduce_mul"); + emit_clustered_native("mul_float", "sub_group_clustered_reduce_mul"); + emit_clustered_native("min_uint", "sub_group_clustered_reduce_min"); + emit_clustered_native("min_int", "sub_group_clustered_reduce_min"); + emit_clustered_native("min_float", "sub_group_clustered_reduce_min"); + emit_clustered_native("max_uint", "sub_group_clustered_reduce_max"); + emit_clustered_native("max_int", "sub_group_clustered_reduce_max"); + emit_clustered_native("max_float", "sub_group_clustered_reduce_max"); + emit_clustered_native("and_uint", "sub_group_clustered_reduce_and"); + emit_clustered_native("and_int", "sub_group_clustered_reduce_and"); + emit_clustered_native("or_uint", "sub_group_clustered_reduce_or"); + emit_clustered_native("or_int", "sub_group_clustered_reduce_or"); + emit_clustered_native("xor_uint", "sub_group_clustered_reduce_xor"); + emit_clustered_native("xor_int", "sub_group_clustered_reduce_xor"); + emit_clustered_native("logical_and", "sub_group_clustered_reduce_logical_and"); + emit_clustered_native("logical_or", "sub_group_clustered_reduce_logical_or"); + emit_clustered_native("logical_xor", "sub_group_clustered_reduce_logical_xor"); + + statement("#else"); + + auto emit_clustered_emulated = [&](const char *op_suffix) + { + statement("#define spv_clustered_reduce_", op_suffix, + "(scratch, val, cluster, lane_id, linear_id, subgroup_base, subgroup_size) " + "spv_emulate_clustered_reduce_", + op_suffix, + "((scratch), (val), (cluster), (lane_id), (linear_id), (subgroup_base), (subgroup_size))"); + }; + + for (const char *suffix : + { "add_uint", "add_int", "add_float", "mul_uint", "mul_int", "mul_float", "min_uint", + "min_int", "min_float", "max_uint", "max_int", "max_float", "and_uint", "and_int", + "or_uint", "or_int", "xor_uint", "xor_int", "logical_and", "logical_or", "logical_xor" }) + emit_clustered_emulated(suffix); + + statement("#endif"); + statement(""); + } +} + +void CompilerOpenCL::emit_subgroup_op_combined(const Instruction &i) +{ + // Combined mode: emit spv_* wrapper macro calls for non-base ops. + // Base ops (all, any, broadcast, add/min/max reduce/scan) are handled by the native path. + const uint32_t *ops = stream(i); + auto op = static_cast(i.op); + + auto scope = static_cast(evaluate_constant_u32(ops[2])); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto require_extension = [this](bool &flag) + { + if (!flag) + { + flag = true; + force_recompile(); + } + }; + + auto require_scratch = [this]() + { + if (!needs_subgroup_emulation_scratch) + { + needs_subgroup_emulation_scratch = true; + force_recompile(); + } + }; + + // Helper for vector decomposition with spv_* macros (uint-based, like emulated path) + auto to_uint_cast = [&](uint32_t value_id) -> string + { + auto &type = expression_type(value_id); + if (type.basetype == SPIRType::UInt) + return to_expression(value_id); + else if (type.basetype == SPIRType::Int) + return join("as_uint(", to_expression(value_id), ")"); + else if (type.basetype == SPIRType::Float) + return join("as_uint(", to_expression(value_id), ")"); + else if (type.basetype == SPIRType::Boolean) + return join("(", to_expression(value_id), " ? 1u : 0u)"); + return to_expression(value_id); + }; + + auto from_uint_cast = [&](const string &expr, uint32_t value_id) -> string + { + auto &type = expression_type(value_id); + if (type.basetype == SPIRType::Int) + return join("as_int(", expr, ")"); + else if (type.basetype == SPIRType::Float) + return join("as_float(", expr, ")"); + return expr; + }; + + // Vector decomposition calling a spv_* macro per component + auto emit_combined_vec = [&](uint32_t value_id, const string &prefix, const string &suffix) + { + auto &type = expression_type(value_id); + if (type.vecsize > 1) + { + auto &out_type = get(result_type); + string expr = "(" + type_to_glsl(out_type) + ")("; + for (uint32_t c = 0; c < type.vecsize; c++) + { + if (c > 0) + expr += ", "; + string comp = join(to_enclosed_expression(value_id), ".", "xyzw"[c]); + string as_uint_comp; + if (type.basetype == SPIRType::UInt) + as_uint_comp = comp; + else if (type.basetype == SPIRType::Int) + as_uint_comp = join("as_uint(", comp, ")"); + else if (type.basetype == SPIRType::Float) + as_uint_comp = join("as_uint(", comp, ")"); + else + as_uint_comp = comp; + string result_comp = prefix + as_uint_comp + suffix; + if (type.basetype == SPIRType::Int) + result_comp = join("as_int(", result_comp, ")"); + else if (type.basetype == SPIRType::Float) + result_comp = join("as_float(", result_comp, ")"); + expr += result_comp; + } + expr += ")"; + emit_op(result_type, id, expr, should_forward(value_id)); + inherit_expression_dependencies(id, value_id); + } + else + { + string result_expr = prefix + to_uint_cast(value_id) + suffix; + result_expr = from_uint_cast(result_expr, value_id); + emit_op(result_type, id, result_expr, should_forward(value_id)); + inherit_expression_dependencies(id, value_id); + } + }; + + switch (op) + { + // === Vote === + case OpGroupNonUniformElect: + require_extension(needs_subgroup_vote); + require_scratch(); + emit_op(result_type, id, "spv_sub_group_elect(_spv_lane_id)", true); + break; + + case OpGroupNonUniformAllEqual: + { + require_extension(needs_subgroup_vote); + require_scratch(); + auto &type = expression_type(ops[3]); + if (type.vecsize > 1) + { + string expr; + for (uint32_t c = 0; c < type.vecsize; c++) + { + if (c > 0) + expr += " && "; + string comp = join(to_enclosed_expression(ops[3]), ".", "xyzw"[c]); + string as_uint_comp = (type.basetype == SPIRType::UInt) ? comp : join("as_uint(", comp, ")"); + expr += join("spv_all_equal_uint(_spv_subgroup_scratch, ", as_uint_comp, + ", _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + } + emit_op(result_type, id, expr, should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + else + { + emit_op(result_type, id, + join("spv_all_equal_uint(_spv_subgroup_scratch, ", to_uint_cast(ops[3]), + ", _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"), + should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + } + break; + } + + // === Ballot === + case OpGroupNonUniformBroadcastFirst: + require_extension(needs_subgroup_ballot); + require_scratch(); + emit_combined_vec(ops[3], "spv_broadcast_first_uint(_spv_subgroup_scratch, ", + ", _spv_linear_id, _spv_subgroup_base)"); + break; + + case OpGroupNonUniformBallot: + require_extension(needs_subgroup_ballot); + require_scratch(); + emit_op(result_type, id, + join("spv_ballot(_spv_subgroup_scratch, ", to_expression(ops[3]), + ", _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"), + should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + break; + + case OpGroupNonUniformInverseBallot: + require_extension(needs_subgroup_ballot); + require_scratch(); + emit_op(result_type, id, join("spv_inverse_ballot(", to_expression(ops[3]), ", _spv_lane_id)"), + should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + break; + + case OpGroupNonUniformBallotBitExtract: + require_extension(needs_subgroup_ballot); + require_scratch(); + emit_op(result_type, id, + join("spv_ballot_bit_extract(", to_expression(ops[3]), ", ", to_expression(ops[4]), ")"), + should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + break; + + case OpGroupNonUniformBallotFindLSB: + require_extension(needs_subgroup_ballot); + require_scratch(); + emit_op(result_type, id, join("spv_ballot_find_lsb(", to_expression(ops[3]), ")"), should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + break; + + case OpGroupNonUniformBallotFindMSB: + require_extension(needs_subgroup_ballot); + require_scratch(); + emit_op(result_type, id, join("spv_ballot_find_msb(", to_expression(ops[3]), ")"), should_forward(ops[3])); + inherit_expression_dependencies(id, ops[3]); + break; + + case OpGroupNonUniformBallotBitCount: + { + require_extension(needs_subgroup_ballot); + require_scratch(); + auto operation = static_cast(ops[3]); + if (operation == GroupOperationReduce) + emit_op(result_type, id, join("spv_ballot_bit_count(", to_expression(ops[4]), ")"), should_forward(ops[4])); + else if (operation == GroupOperationInclusiveScan) + emit_op(result_type, id, join("spv_ballot_inclusive_bit_count(", to_expression(ops[4]), ", _spv_lane_id)"), + should_forward(ops[4])); + else if (operation == GroupOperationExclusiveScan) + emit_op(result_type, id, join("spv_ballot_exclusive_bit_count(", to_expression(ops[4]), ", _spv_lane_id)"), + should_forward(ops[4])); + else + SPIRV_CROSS_THROW("Invalid group operation for BallotBitCount."); + inherit_expression_dependencies(id, ops[4]); + break; + } + + // === Shuffle === + case OpGroupNonUniformShuffle: + require_extension(needs_subgroup_shuffle); + require_scratch(); + emit_combined_vec(ops[3], "spv_shuffle_uint(_spv_subgroup_scratch, ", + join(", ", to_expression(ops[4]), ", _spv_linear_id, _spv_subgroup_base)")); + break; + + case OpGroupNonUniformShuffleXor: + require_extension(needs_subgroup_shuffle); + require_scratch(); + emit_combined_vec(ops[3], "spv_shuffle_xor_uint(_spv_subgroup_scratch, ", + join(", ", to_expression(ops[4]), ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base)")); + break; + + case OpGroupNonUniformShuffleUp: + require_extension(needs_subgroup_shuffle_relative); + require_scratch(); + emit_combined_vec(ops[3], "spv_shuffle_up_uint(_spv_subgroup_scratch, ", + join(", ", to_expression(ops[4]), ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base)")); + break; + + case OpGroupNonUniformShuffleDown: + require_extension(needs_subgroup_shuffle_relative); + require_scratch(); + emit_combined_vec(ops[3], "spv_shuffle_down_uint(_spv_subgroup_scratch, ", + join(", ", to_expression(ops[4]), ", _spv_lane_id, _spv_linear_id, _spv_subgroup_size)")); + break; + + // === Rotate === + case OpGroupNonUniformRotateKHR: + require_extension(needs_subgroup_rotate); + require_scratch(); + if (i.length > 5) + { + emit_combined_vec(ops[3], "spv_clustered_rotate_uint(_spv_subgroup_scratch, ", + join(", ", to_expression(ops[4]), ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, ", + to_expression(ops[5]), ")")); + } + else + { + emit_combined_vec(ops[3], "spv_rotate_uint(_spv_subgroup_scratch, ", + join(", ", to_expression(ops[4]), + ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)")); + } + break; + + // === Arithmetic (non-base: mul, bitwise, logical + clustered reduce for all) === + case OpGroupNonUniformFAdd: + case OpGroupNonUniformIAdd: + case OpGroupNonUniformFMul: + case OpGroupNonUniformIMul: + case OpGroupNonUniformFMin: + case OpGroupNonUniformFMax: + case OpGroupNonUniformSMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMin: + case OpGroupNonUniformUMax: + case OpGroupNonUniformBitwiseAnd: + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + case OpGroupNonUniformLogicalAnd: + case OpGroupNonUniformLogicalOr: + case OpGroupNonUniformLogicalXor: + { + require_scratch(); + auto operation = static_cast(ops[3]); + uint32_t value_id = ops[4]; + + // Determine the op suffix (matching emulation helper names) + const char *op_suffix = nullptr; + bool is_logical = false; + switch (op) + { + case OpGroupNonUniformFAdd: + op_suffix = "add_float"; + break; + case OpGroupNonUniformIAdd: + op_suffix = (expression_type(value_id).basetype == SPIRType::Int) ? "add_int" : "add_uint"; + break; + case OpGroupNonUniformFMul: + op_suffix = "mul_float"; + break; + case OpGroupNonUniformIMul: + op_suffix = (expression_type(value_id).basetype == SPIRType::Int) ? "mul_int" : "mul_uint"; + break; + case OpGroupNonUniformFMin: + op_suffix = "min_float"; + break; + case OpGroupNonUniformFMax: + op_suffix = "max_float"; + break; + case OpGroupNonUniformSMin: + op_suffix = "min_int"; + break; + case OpGroupNonUniformSMax: + op_suffix = "max_int"; + break; + case OpGroupNonUniformUMin: + op_suffix = "min_uint"; + break; + case OpGroupNonUniformUMax: + op_suffix = "max_uint"; + break; + case OpGroupNonUniformBitwiseAnd: + op_suffix = (expression_type(value_id).basetype == SPIRType::Int) ? "and_int" : "and_uint"; + break; + case OpGroupNonUniformBitwiseOr: + op_suffix = (expression_type(value_id).basetype == SPIRType::Int) ? "or_int" : "or_uint"; + break; + case OpGroupNonUniformBitwiseXor: + op_suffix = (expression_type(value_id).basetype == SPIRType::Int) ? "xor_int" : "xor_uint"; + break; + case OpGroupNonUniformLogicalAnd: + op_suffix = "logical_and"; + is_logical = true; + break; + case OpGroupNonUniformLogicalOr: + op_suffix = "logical_or"; + is_logical = true; + break; + case OpGroupNonUniformLogicalXor: + op_suffix = "logical_xor"; + is_logical = true; + break; + default: + break; + } + + // Determine group prefix and required extension + const char *group_prefix = nullptr; + switch (operation) + { + case GroupOperationReduce: + group_prefix = "spv_reduce_"; + require_extension(needs_subgroup_arithmetic); + break; + case GroupOperationInclusiveScan: + group_prefix = "spv_inclusive_scan_"; + require_extension(needs_subgroup_arithmetic); + break; + case GroupOperationExclusiveScan: + group_prefix = "spv_exclusive_scan_"; + require_extension(needs_subgroup_arithmetic); + break; + case GroupOperationClusteredReduce: + group_prefix = "spv_clustered_reduce_"; + require_extension(needs_subgroup_clustered); + break; + default: + SPIRV_CROSS_THROW("Unsupported group operation."); + } + + string macro_name = join(group_prefix, op_suffix); + + if (is_logical) + { + string val_expr = to_expression(value_id); + string expr; + if (operation == GroupOperationClusteredReduce) + expr = join(macro_name, "(_spv_subgroup_scratch, ", val_expr, ", ", to_expression(ops[5]), + ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + else if (operation == GroupOperationReduce) + expr = join(macro_name, "(_spv_subgroup_scratch, ", val_expr, + ", _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + else + expr = join(macro_name, "(_spv_subgroup_scratch, ", val_expr, + ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + emit_op(result_type, id, expr, should_forward(value_id)); + inherit_expression_dependencies(id, value_id); + } + else + { + auto &type = expression_type(value_id); + if (type.vecsize > 1) + { + auto &out_type = get(result_type); + string expr = "(" + type_to_glsl(out_type) + ")("; + for (uint32_t c = 0; c < type.vecsize; c++) + { + if (c > 0) + expr += ", "; + string comp = join(to_enclosed_expression(value_id), ".", "xyzw"[c]); + string call; + if (operation == GroupOperationClusteredReduce) + call = join(macro_name, "(_spv_subgroup_scratch, ", comp, ", ", to_expression(ops[5]), + ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + else if (operation == GroupOperationReduce) + call = join(macro_name, "(_spv_subgroup_scratch, ", comp, + ", _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + else + call = join(macro_name, "(_spv_subgroup_scratch, ", comp, + ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + expr += call; + } + expr += ")"; + emit_op(result_type, id, expr, should_forward(value_id)); + inherit_expression_dependencies(id, value_id); + } + else + { + string val_expr = to_expression(value_id); + string expr; + if (operation == GroupOperationClusteredReduce) + expr = join(macro_name, "(_spv_subgroup_scratch, ", val_expr, ", ", to_expression(ops[5]), + ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + else if (operation == GroupOperationReduce) + expr = join(macro_name, "(_spv_subgroup_scratch, ", val_expr, + ", _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + else + expr = join(macro_name, "(_spv_subgroup_scratch, ", val_expr, + ", _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size)"); + emit_op(result_type, id, expr, should_forward(value_id)); + inherit_expression_dependencies(id, value_id); + } + } + break; + } + + default: + SPIRV_CROSS_THROW("Unsupported subgroup op for OpenCL combined mode."); + } +} + void CompilerOpenCL::emit_subgroup_op_emulated(const Instruction &i) { const uint32_t *ops = stream(i); @@ -4824,6 +5638,45 @@ void CompilerOpenCL::emit_subgroup_op(const Instruction &i) return; } + // Combined mode: non-base ops go through wrapper macros, base ops fall through to native. + if (opencl_options.emulate_subgroups && opencl_options.enable_subgroups) + { + // Base cl_khr_subgroups ops: All, Any, Broadcast, add/min/max Reduce/Scan + bool is_base = false; + switch (op) + { + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformBroadcast: + is_base = true; + break; + case OpGroupNonUniformFAdd: + case OpGroupNonUniformIAdd: + case OpGroupNonUniformFMin: + case OpGroupNonUniformFMax: + case OpGroupNonUniformSMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMin: + case OpGroupNonUniformUMax: + { + // Base only for Reduce/InclusiveScan/ExclusiveScan, not ClusteredReduce + auto operation = static_cast(ops[3]); + if (operation != GroupOperationClusteredReduce) + is_base = true; + break; + } + default: + break; + } + + if (!is_base) + { + emit_subgroup_op_combined(i); + return; + } + // Base ops fall through to native path below. + } + if (!opencl_options.enable_subgroups) SPIRV_CROSS_THROW("Subgroup operations require enable_subgroups option."); diff --git a/spirv_opencl.hpp b/spirv_opencl.hpp index a5e6c4bee..3dea7039a 100644 --- a/spirv_opencl.hpp +++ b/spirv_opencl.hpp @@ -215,6 +215,9 @@ class CompilerOpenCL : public CompilerGLSL // Helpers to emit subgroup emulation polyfills and scratch infrastructure. void emit_subgroup_emulation_helpers(); void emit_subgroup_emulation_entry_point_vars(); + void emit_subgroup_combined_entry_point_vars(); + void emit_subgroup_combined_wrappers(); + void emit_subgroup_op_combined(const Instruction &i); uint32_t get_emulation_max_workgroup_size() const; std::string get_emulation_subgroup_size_expr() const; void emit_subgroup_op_emulated(const Instruction &i); diff --git a/test_shaders.py b/test_shaders.py index fb9ec52c5..f7818b0a4 100755 --- a/test_shaders.py +++ b/test_shaders.py @@ -588,7 +588,7 @@ def cross_compile_hlsl(shader, spirv, opt, force_no_external_validation, iterati def path_to_opencl_standard_cli(shader): # clang seems warn about cl_khr_subgroups unless 2.0 is specified. # Revisit when OpenCL 3.0 support is no longer experimental. - if '.subgroups.' in shader or '.subgroups-core.' in shader: + if '.subgroups.' in shader: return '200' # OpenCL 3.0 support in clang is experimental and 2.1 and 2.2 seem unsupported. if '.cl30.' in shader: @@ -618,10 +618,8 @@ def validate_shader_opencl(shader, opt, paths): if '.fp64.' in shader: extensions.append('cl_khr_fp64') if '.subgroups-emulate.' in shader: - # Make sure no extensions are included - pass - elif '.subgroups-core.' in shader: - extensions.append('cl_khr_subgroups') + if '.subgroups.' in shader: + extensions.append('cl_khr_subgroups') elif '.subgroups.' in shader: extensions.append('cl_khr_subgroups') extensions.append('cl_khr_subgroup_ballot') From 218c1183623e985977d60b24713eca6a753930e0 Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Wed, 18 Mar 2026 19:53:15 -0700 Subject: [PATCH 15/16] OpenCL: Subgroup emulation tests --- ...thmetic.nocompat.vk.subgroups-emulate.comp | 1105 ++++++++++++++++ ...-ballot.nocompat.vk.subgroups-emulate.comp | 1107 ++++++++++++++++ ...s-basic.nocompat.vk.subgroups-emulate.comp | 1132 +++++++++++++++++ ...compat.vk.subgroups.subgroups-emulate.comp | 73 ++ ...ustered.nocompat.vk.subgroups-emulate.comp | 1105 ++++++++++++++++ ...-rotate.nocompat.vk.subgroups-emulate.comp | 1094 ++++++++++++++++ ...shuffle.nocompat.vk.subgroups-emulate.comp | 1097 ++++++++++++++++ ...ps-vote.nocompat.vk.subgroups-emulate.comp | 1098 ++++++++++++++++ ...thmetic.nocompat.vk.subgroups-emulate.comp | 39 + ...-ballot.nocompat.vk.subgroups-emulate.comp | 43 + ...s-basic.nocompat.vk.subgroups-emulate.comp | 77 ++ ...compat.vk.subgroups.subgroups-emulate.comp | 77 ++ ...ustered.nocompat.vk.subgroups-emulate.comp | 34 + ...-rotate.nocompat.vk.subgroups-emulate.comp | 17 + ...shuffle.nocompat.vk.subgroups-emulate.comp | 22 + ...ps-vote.nocompat.vk.subgroups-emulate.comp | 27 + 16 files changed, 8147 insertions(+) create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups-emulate.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups-emulate.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups-emulate.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.subgroups-emulate.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups-emulate.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups-emulate.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups-emulate.comp create mode 100644 reference/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups-emulate.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups-emulate.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups-emulate.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups-emulate.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.subgroups-emulate.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups-emulate.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups-emulate.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups-emulate.comp create mode 100644 shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups-emulate.comp diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups-emulate.comp b/reference/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..7cdaa8e90 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,1105 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float FragColor; + int idat; + uint udat; +}; + +typedef struct SSBO SSBO; + +static uint spv_emulate_broadcast_uint(__local uint* scratch, uint val, uint src_lane, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + src_lane]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_broadcast_first_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_uint(__local uint* scratch, uint val, uint index, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + index]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_xor_uint(__local uint* scratch, uint val, uint mask, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + (lane_id ^ mask)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_up_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id >= delta) ? scratch[linear_id - delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_down_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id + delta < subgroup_size) ? scratch[linear_id + delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + ((lane_id + delta) % subgroup_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint cluster_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base + ((lane_id - cluster_base + delta) % cluster_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_any(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all_equal_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint first = scratch[subgroup_base]; + bool r = true; + for (uint i = 1u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] == first); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_emulate_ballot(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint4 r = (uint4)(0u); + for (uint i = 0u; i < subgroup_size; i++) { + if (scratch[subgroup_base + i] != 0u) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_subgroup_eq_mask(uint lane_id) { + uint4 r = (uint4)(0u); + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + if (word == 0u) r.x = (1u << bit); + else if (word == 1u) r.y = (1u << bit); + else if (word == 2u) r.z = (1u << bit); + else r.w = (1u << bit); + return r; +} + +static uint4 spv_subgroup_ge_mask(uint lane_id, uint subgroup_size) { + uint4 r = (uint4)(0u); + for (uint i = lane_id; i < subgroup_size; i++) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + return r; +} + +static uint4 spv_subgroup_gt_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(lane_id + 1u, subgroup_size); +} + +static uint4 spv_subgroup_le_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(0u, lane_id + 1u); +} + +static uint4 spv_subgroup_lt_mask(uint lane_id) { + if (lane_id == 0u) return (uint4)(0u); + return spv_subgroup_ge_mask(0u, lane_id); +} + +static uint spv_emulate_reduce_add_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_add_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r + scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_mul_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 1u; + for (uint i = 0u; i < lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_mul_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r * scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_min_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = UINT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_min_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = min(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_max_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_max_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = max(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_and_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0xFFFFFFFFu; + for (uint i = 0u; i < lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_and_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r & scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_or_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_or_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r | scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_xor_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_xor_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r ^ scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_add_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_add_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_mul_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 1; + for (uint i = 0u; i < lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_mul_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_min_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_min_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = min(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_max_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MIN; + for (uint i = 0u; i < lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_max_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = max(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_and_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(0xFFFFFFFFu); + for (uint i = 0u; i < lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_and_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r & as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_or_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_or_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r | as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_xor_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_xor_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r ^ as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_add_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 0.0f; + for (uint i = 0u; i < lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_add_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_mul_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 1.0f; + for (uint i = 0u; i < lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_mul_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_min_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_min_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_max_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = -INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_max_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_and(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i <= lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_and(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = true; + for (uint i = 0u; i < cluster_size; i++) + r = r && (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_or(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_or(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r || (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_xor(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_xor(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r != (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inverse_ballot(uint4 ballot, uint lane_id) { + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static bool spv_emulate_ballot_bit_extract(uint4 ballot, uint index) { + uint word = index / 32u; + uint bit = index % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static uint spv_popcount4(uint4 v) { + return popcount(v.x) + popcount(v.y) + popcount(v.z) + popcount(v.w); +} + +static uint spv_emulate_ballot_bit_count(uint4 ballot) { + return spv_popcount4(ballot); +} + +static uint spv_emulate_ballot_inclusive_bit_count(uint4 ballot, uint lane_id) { + uint4 masked = ballot; + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint mask = (bit == 31u) ? 0xFFFFFFFFu : ((1u << (bit + 1u)) - 1u); + if (word == 0u) { masked.x &= mask; masked.y = 0u; masked.z = 0u; masked.w = 0u; } + else if (word == 1u) { masked.y &= mask; masked.z = 0u; masked.w = 0u; } + else if (word == 2u) { masked.z &= mask; masked.w = 0u; } + else { masked.w &= mask; } + return spv_popcount4(masked); +} + +static uint spv_emulate_ballot_exclusive_bit_count(uint4 ballot, uint lane_id) { + if (lane_id == 0u) return 0u; + return spv_emulate_ballot_inclusive_bit_count(ballot, lane_id - 1u); +} + +static uint spv_emulate_ballot_find_lsb(uint4 ballot) { + if (ballot.x != 0u) return (uint)clz(ballot.x & (0u - ballot.x)); + if (ballot.y != 0u) return 32u + (uint)clz(ballot.y & (0u - ballot.y)); + if (ballot.z != 0u) return 64u + (uint)clz(ballot.z & (0u - ballot.z)); + if (ballot.w != 0u) return 96u + (uint)clz(ballot.w & (0u - ballot.w)); + return ~0u; +} + +static uint spv_emulate_ballot_find_msb(uint4 ballot) { + if (ballot.w != 0u) return 127u - (uint)clz(ballot.w); + if (ballot.z != 0u) return 95u - (uint)clz(ballot.z); + if (ballot.y != 0u) return 63u - (uint)clz(ballot.y); + if (ballot.x != 0u) return 31u - (uint)clz(ballot.x); + return ~0u; +} + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global SSBO* _13) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint _spv_linear_workgroup_size = get_local_size(0) * get_local_size(1) * get_local_size(2); + uint _spv_linear_id = (get_local_id(2) * get_local_size(1) * get_local_size(0)) + (get_local_id(1) * get_local_size(0)) + get_local_id(0); + uint _spv_subgroup_size = 32u; + uint _spv_lane_id = _spv_linear_id % 32u; + uint _spv_subgroup_id = _spv_linear_id / 32u; + uint _spv_num_subgroups = _spv_linear_workgroup_size / 32u; + uint _spv_subgroup_base = _spv_subgroup_id * 32u; + __local uint _spv_subgroup_scratch[256]; + float fmul_1 = spv_emulate_reduce_mul_float(_spv_subgroup_scratch, _13->FragColor, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + int imul_1 = spv_emulate_reduce_mul_int(_spv_subgroup_scratch, _13->idat, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint umul_1 = spv_emulate_reduce_mul_uint(_spv_subgroup_scratch, _13->udat, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint band_1 = spv_emulate_reduce_and_uint(_spv_subgroup_scratch, _13->udat, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint bor_1 = spv_emulate_reduce_or_uint(_spv_subgroup_scratch, _13->udat, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint bxor_1 = spv_emulate_reduce_xor_uint(_spv_subgroup_scratch, _13->udat, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + bool land_1 = spv_emulate_reduce_logical_and(_spv_subgroup_scratch, _13->udat > 0u, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + bool lor_1 = spv_emulate_reduce_logical_or(_spv_subgroup_scratch, _13->udat > 0u, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + bool lxor_1 = spv_emulate_reduce_logical_xor(_spv_subgroup_scratch, _13->udat > 0u, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + float fmul_inc_1 = spv_emulate_inclusive_scan_mul_float(_spv_subgroup_scratch, _13->FragColor, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + float fmul_exc_1 = spv_emulate_exclusive_scan_mul_float(_spv_subgroup_scratch, _13->FragColor, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + _13->FragColor = ((((((fmul_1 + fmul_inc_1) + fmul_exc_1) + convert_float(imul_1 + as_int(umul_1))) + convert_float((band_1 + bor_1) + bxor_1)) + (float)(land_1)) + (float)(lor_1)) + (float)(lxor_1); +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups-emulate.comp b/reference/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..46c143c9a --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,1107 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float FragColor; + uint udat; +}; + +typedef struct SSBO SSBO; + +static uint spv_emulate_broadcast_uint(__local uint* scratch, uint val, uint src_lane, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + src_lane]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_broadcast_first_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_uint(__local uint* scratch, uint val, uint index, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + index]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_xor_uint(__local uint* scratch, uint val, uint mask, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + (lane_id ^ mask)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_up_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id >= delta) ? scratch[linear_id - delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_down_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id + delta < subgroup_size) ? scratch[linear_id + delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + ((lane_id + delta) % subgroup_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint cluster_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base + ((lane_id - cluster_base + delta) % cluster_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_any(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all_equal_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint first = scratch[subgroup_base]; + bool r = true; + for (uint i = 1u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] == first); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_emulate_ballot(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint4 r = (uint4)(0u); + for (uint i = 0u; i < subgroup_size; i++) { + if (scratch[subgroup_base + i] != 0u) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_subgroup_eq_mask(uint lane_id) { + uint4 r = (uint4)(0u); + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + if (word == 0u) r.x = (1u << bit); + else if (word == 1u) r.y = (1u << bit); + else if (word == 2u) r.z = (1u << bit); + else r.w = (1u << bit); + return r; +} + +static uint4 spv_subgroup_ge_mask(uint lane_id, uint subgroup_size) { + uint4 r = (uint4)(0u); + for (uint i = lane_id; i < subgroup_size; i++) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + return r; +} + +static uint4 spv_subgroup_gt_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(lane_id + 1u, subgroup_size); +} + +static uint4 spv_subgroup_le_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(0u, lane_id + 1u); +} + +static uint4 spv_subgroup_lt_mask(uint lane_id) { + if (lane_id == 0u) return (uint4)(0u); + return spv_subgroup_ge_mask(0u, lane_id); +} + +static uint spv_emulate_reduce_add_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_add_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r + scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_mul_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 1u; + for (uint i = 0u; i < lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_mul_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r * scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_min_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = UINT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_min_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = min(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_max_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_max_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = max(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_and_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0xFFFFFFFFu; + for (uint i = 0u; i < lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_and_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r & scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_or_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_or_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r | scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_xor_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_xor_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r ^ scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_add_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_add_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_mul_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 1; + for (uint i = 0u; i < lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_mul_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_min_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_min_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = min(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_max_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MIN; + for (uint i = 0u; i < lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_max_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = max(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_and_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(0xFFFFFFFFu); + for (uint i = 0u; i < lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_and_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r & as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_or_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_or_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r | as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_xor_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_xor_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r ^ as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_add_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 0.0f; + for (uint i = 0u; i < lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_add_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_mul_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 1.0f; + for (uint i = 0u; i < lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_mul_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_min_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_min_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_max_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = -INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_max_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_and(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i <= lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_and(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = true; + for (uint i = 0u; i < cluster_size; i++) + r = r && (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_or(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_or(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r || (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_xor(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_xor(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r != (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inverse_ballot(uint4 ballot, uint lane_id) { + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static bool spv_emulate_ballot_bit_extract(uint4 ballot, uint index) { + uint word = index / 32u; + uint bit = index % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static uint spv_popcount4(uint4 v) { + return popcount(v.x) + popcount(v.y) + popcount(v.z) + popcount(v.w); +} + +static uint spv_emulate_ballot_bit_count(uint4 ballot) { + return spv_popcount4(ballot); +} + +static uint spv_emulate_ballot_inclusive_bit_count(uint4 ballot, uint lane_id) { + uint4 masked = ballot; + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint mask = (bit == 31u) ? 0xFFFFFFFFu : ((1u << (bit + 1u)) - 1u); + if (word == 0u) { masked.x &= mask; masked.y = 0u; masked.z = 0u; masked.w = 0u; } + else if (word == 1u) { masked.y &= mask; masked.z = 0u; masked.w = 0u; } + else if (word == 2u) { masked.z &= mask; masked.w = 0u; } + else { masked.w &= mask; } + return spv_popcount4(masked); +} + +static uint spv_emulate_ballot_exclusive_bit_count(uint4 ballot, uint lane_id) { + if (lane_id == 0u) return 0u; + return spv_emulate_ballot_inclusive_bit_count(ballot, lane_id - 1u); +} + +static uint spv_emulate_ballot_find_lsb(uint4 ballot) { + if (ballot.x != 0u) return (uint)clz(ballot.x & (0u - ballot.x)); + if (ballot.y != 0u) return 32u + (uint)clz(ballot.y & (0u - ballot.y)); + if (ballot.z != 0u) return 64u + (uint)clz(ballot.z & (0u - ballot.z)); + if (ballot.w != 0u) return 96u + (uint)clz(ballot.w & (0u - ballot.w)); + return ~0u; +} + +static uint spv_emulate_ballot_find_msb(uint4 ballot) { + if (ballot.w != 0u) return 127u - (uint)clz(ballot.w); + if (ballot.z != 0u) return 95u - (uint)clz(ballot.z); + if (ballot.y != 0u) return 63u - (uint)clz(ballot.y); + if (ballot.x != 0u) return 31u - (uint)clz(ballot.x); + return ~0u; +} + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global SSBO* _23) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint _spv_linear_workgroup_size = get_local_size(0) * get_local_size(1) * get_local_size(2); + uint _spv_linear_id = (get_local_id(2) * get_local_size(1) * get_local_size(0)) + (get_local_id(1) * get_local_size(0)) + get_local_id(0); + uint _spv_subgroup_size = 32u; + uint _spv_lane_id = _spv_linear_id % 32u; + uint _spv_subgroup_id = _spv_linear_id / 32u; + uint _spv_num_subgroups = _spv_linear_workgroup_size / 32u; + uint _spv_subgroup_base = _spv_subgroup_id * 32u; + __local uint _spv_subgroup_scratch[256]; + uint4 ballot_1 = spv_emulate_ballot(_spv_subgroup_scratch, _spv_lane_id < 16u, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + float first_1 = as_float(spv_emulate_broadcast_first_uint(_spv_subgroup_scratch, as_uint(_23->FragColor), _spv_linear_id, _spv_subgroup_base)); + bool extracted_1 = spv_emulate_ballot_bit_extract(ballot_1, 5u); + uint bit_count_1 = spv_emulate_ballot_bit_count(ballot_1); + uint inclusive_count_1 = spv_emulate_ballot_inclusive_bit_count(ballot_1, _spv_lane_id); + uint exclusive_count_1 = spv_emulate_ballot_exclusive_bit_count(ballot_1, _spv_lane_id); + uint find_lsb_1 = spv_emulate_ballot_find_lsb(ballot_1); + uint find_msb_1 = spv_emulate_ballot_find_msb(ballot_1); + bool inv_ballot_1 = spv_emulate_inverse_ballot(ballot_1, _spv_lane_id); + uint4 eq_mask_1 = spv_subgroup_eq_mask(_spv_lane_id); + uint4 ge_mask_1 = spv_subgroup_ge_mask(_spv_lane_id, _spv_subgroup_size); + uint4 gt_mask_1 = spv_subgroup_gt_mask(_spv_lane_id, _spv_subgroup_size); + uint4 le_mask_1 = spv_subgroup_le_mask(_spv_lane_id, _spv_subgroup_size); + uint4 lt_mask_1 = spv_subgroup_lt_mask(_spv_lane_id); + _23->FragColor = ((((((((first_1 + convert_float(((ballot_1.x + ballot_1.y) + ballot_1.z) + ballot_1.w)) + convert_float((((bit_count_1 + inclusive_count_1) + exclusive_count_1) + find_lsb_1) + find_msb_1)) + (float)(extracted_1)) + (float)(inv_ballot_1)) + convert_float(eq_mask_1.x)) + convert_float(ge_mask_1.x)) + convert_float(gt_mask_1.x)) + convert_float(le_mask_1.x)) + convert_float(lt_mask_1.x); +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups-emulate.comp b/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..48c743edf --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,1132 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float FragColor; + int idat; + uint udat; +}; + +typedef struct SSBO SSBO; + +static uint spv_emulate_broadcast_uint(__local uint* scratch, uint val, uint src_lane, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + src_lane]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_broadcast_first_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_uint(__local uint* scratch, uint val, uint index, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + index]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_xor_uint(__local uint* scratch, uint val, uint mask, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + (lane_id ^ mask)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_up_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id >= delta) ? scratch[linear_id - delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_down_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id + delta < subgroup_size) ? scratch[linear_id + delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + ((lane_id + delta) % subgroup_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint cluster_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base + ((lane_id - cluster_base + delta) % cluster_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_any(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all_equal_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint first = scratch[subgroup_base]; + bool r = true; + for (uint i = 1u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] == first); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_emulate_ballot(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint4 r = (uint4)(0u); + for (uint i = 0u; i < subgroup_size; i++) { + if (scratch[subgroup_base + i] != 0u) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_subgroup_eq_mask(uint lane_id) { + uint4 r = (uint4)(0u); + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + if (word == 0u) r.x = (1u << bit); + else if (word == 1u) r.y = (1u << bit); + else if (word == 2u) r.z = (1u << bit); + else r.w = (1u << bit); + return r; +} + +static uint4 spv_subgroup_ge_mask(uint lane_id, uint subgroup_size) { + uint4 r = (uint4)(0u); + for (uint i = lane_id; i < subgroup_size; i++) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + return r; +} + +static uint4 spv_subgroup_gt_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(lane_id + 1u, subgroup_size); +} + +static uint4 spv_subgroup_le_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(0u, lane_id + 1u); +} + +static uint4 spv_subgroup_lt_mask(uint lane_id) { + if (lane_id == 0u) return (uint4)(0u); + return spv_subgroup_ge_mask(0u, lane_id); +} + +static uint spv_emulate_reduce_add_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_add_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r + scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_mul_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 1u; + for (uint i = 0u; i < lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_mul_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r * scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_min_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = UINT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_min_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = min(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_max_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_max_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = max(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_and_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0xFFFFFFFFu; + for (uint i = 0u; i < lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_and_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r & scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_or_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_or_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r | scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_xor_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_xor_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r ^ scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_add_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_add_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_mul_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 1; + for (uint i = 0u; i < lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_mul_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_min_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_min_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = min(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_max_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MIN; + for (uint i = 0u; i < lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_max_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = max(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_and_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(0xFFFFFFFFu); + for (uint i = 0u; i < lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_and_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r & as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_or_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_or_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r | as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_xor_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_xor_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r ^ as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_add_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 0.0f; + for (uint i = 0u; i < lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_add_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_mul_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 1.0f; + for (uint i = 0u; i < lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_mul_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_min_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_min_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_max_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = -INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_max_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_and(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i <= lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_and(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = true; + for (uint i = 0u; i < cluster_size; i++) + r = r && (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_or(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_or(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r || (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_xor(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_xor(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r != (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inverse_ballot(uint4 ballot, uint lane_id) { + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static bool spv_emulate_ballot_bit_extract(uint4 ballot, uint index) { + uint word = index / 32u; + uint bit = index % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static uint spv_popcount4(uint4 v) { + return popcount(v.x) + popcount(v.y) + popcount(v.z) + popcount(v.w); +} + +static uint spv_emulate_ballot_bit_count(uint4 ballot) { + return spv_popcount4(ballot); +} + +static uint spv_emulate_ballot_inclusive_bit_count(uint4 ballot, uint lane_id) { + uint4 masked = ballot; + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint mask = (bit == 31u) ? 0xFFFFFFFFu : ((1u << (bit + 1u)) - 1u); + if (word == 0u) { masked.x &= mask; masked.y = 0u; masked.z = 0u; masked.w = 0u; } + else if (word == 1u) { masked.y &= mask; masked.z = 0u; masked.w = 0u; } + else if (word == 2u) { masked.z &= mask; masked.w = 0u; } + else { masked.w &= mask; } + return spv_popcount4(masked); +} + +static uint spv_emulate_ballot_exclusive_bit_count(uint4 ballot, uint lane_id) { + if (lane_id == 0u) return 0u; + return spv_emulate_ballot_inclusive_bit_count(ballot, lane_id - 1u); +} + +static uint spv_emulate_ballot_find_lsb(uint4 ballot) { + if (ballot.x != 0u) return (uint)clz(ballot.x & (0u - ballot.x)); + if (ballot.y != 0u) return 32u + (uint)clz(ballot.y & (0u - ballot.y)); + if (ballot.z != 0u) return 64u + (uint)clz(ballot.z & (0u - ballot.z)); + if (ballot.w != 0u) return 96u + (uint)clz(ballot.w & (0u - ballot.w)); + return ~0u; +} + +static uint spv_emulate_ballot_find_msb(uint4 ballot) { + if (ballot.w != 0u) return 127u - (uint)clz(ballot.w); + if (ballot.z != 0u) return 95u - (uint)clz(ballot.z); + if (ballot.y != 0u) return 63u - (uint)clz(ballot.y); + if (ballot.x != 0u) return 31u - (uint)clz(ballot.x); + return ~0u; +} + +float helper( float* val_1, __local uint* _spv_subgroup_scratch, uint _spv_linear_id, uint _spv_subgroup_base, uint _spv_subgroup_size, uint _spv_lane_id) +{ + float reduced_1 = spv_emulate_reduce_add_float(_spv_subgroup_scratch, (*val_1), _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + bool elected_2 = (_spv_lane_id == 0u); + return elected_2 ? reduced_1 : 0.0f; +} + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global SSBO* _30) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint _spv_linear_workgroup_size = get_local_size(0) * get_local_size(1) * get_local_size(2); + uint _spv_linear_id = (get_local_id(2) * get_local_size(1) * get_local_size(0)) + (get_local_id(1) * get_local_size(0)) + get_local_id(0); + uint _spv_subgroup_size = 32u; + uint _spv_lane_id = _spv_linear_id % 32u; + uint _spv_subgroup_id = _spv_linear_id / 32u; + uint _spv_num_subgroups = _spv_linear_workgroup_size / 32u; + uint _spv_subgroup_base = _spv_subgroup_id * 32u; + __local uint _spv_subgroup_scratch[256]; + _30->FragColor = convert_float(_spv_num_subgroups); + _30->FragColor = convert_float(_spv_subgroup_id); + _30->FragColor = convert_float(_spv_subgroup_size); + _30->FragColor = convert_float(_spv_lane_id); + bool elected_1_1 = (_spv_lane_id == 0u); + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + barrier(CLK_LOCAL_MEM_FENCE); + bool has_all_1 = spv_emulate_all(_spv_subgroup_scratch, true, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + bool has_any_1 = spv_emulate_any(_spv_subgroup_scratch, true, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint broadcasted_1 = spv_emulate_broadcast_uint(_spv_subgroup_scratch, 42u, 0u, _spv_linear_id, _spv_subgroup_base); + float fadd_1 = spv_emulate_reduce_add_float(_spv_subgroup_scratch, _30->FragColor, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + int iadd_1 = spv_emulate_reduce_add_int(_spv_subgroup_scratch, _30->idat, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + float fmin_1 = spv_emulate_reduce_min_float(_spv_subgroup_scratch, _30->FragColor, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + float fmax_1 = spv_emulate_reduce_max_float(_spv_subgroup_scratch, _30->FragColor, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + int smin_1 = spv_emulate_reduce_min_int(_spv_subgroup_scratch, _30->idat, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + int smax_1 = spv_emulate_reduce_max_int(_spv_subgroup_scratch, _30->idat, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint umin_1 = spv_emulate_reduce_min_uint(_spv_subgroup_scratch, _30->udat, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint umax_1 = spv_emulate_reduce_max_uint(_spv_subgroup_scratch, _30->udat, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + float finc_add_1 = spv_emulate_inclusive_scan_add_float(_spv_subgroup_scratch, _30->FragColor, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + int iinc_add_1 = spv_emulate_inclusive_scan_add_int(_spv_subgroup_scratch, _30->idat, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint uinc_min_1 = spv_emulate_inclusive_scan_min_uint(_spv_subgroup_scratch, _30->udat, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint uinc_max_1 = spv_emulate_inclusive_scan_max_uint(_spv_subgroup_scratch, _30->udat, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + float fexc_add_1 = spv_emulate_exclusive_scan_add_float(_spv_subgroup_scratch, _30->FragColor, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + int iexc_add_1 = spv_emulate_exclusive_scan_add_int(_spv_subgroup_scratch, _30->idat, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint uexc_min_1 = spv_emulate_exclusive_scan_min_uint(_spv_subgroup_scratch, _30->udat, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint uexc_max_1 = spv_emulate_exclusive_scan_max_uint(_spv_subgroup_scratch, _30->udat, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + float param_1 = _30->FragColor; + float from_helper_1 = helper(¶m_1, _spv_subgroup_scratch, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size, _spv_lane_id); + _30->FragColor = (((((((((fadd_1 + fmin_1) + fmax_1) + finc_add_1) + fexc_add_1) + convert_float((((iadd_1 + smin_1) + smax_1) + iinc_add_1) + iexc_add_1)) + convert_float((((((umin_1 + umax_1) + uinc_min_1) + uinc_max_1) + uexc_min_1) + uexc_max_1) + broadcasted_1)) + (float)(has_all_1)) + (float)(has_any_1)) + (float)(elected_1_1)) + from_helper_1; +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.subgroups-emulate.comp b/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.subgroups-emulate.comp new file mode 100644 index 000000000..27a775a32 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.subgroups-emulate.comp @@ -0,0 +1,73 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + +#pragma OPENCL EXTENSION cl_khr_subgroups : enable + +struct SSBO +{ + float FragColor; + int idat; + uint udat; +}; + +typedef struct SSBO SSBO; + +#ifdef cl_khr_subgroup_non_uniform_vote +#pragma OPENCL EXTENSION cl_khr_subgroup_non_uniform_vote : enable +#define spv_sub_group_elect(lane_id) sub_group_elect() +#define spv_all_equal_uint(scratch, val, linear_id, subgroup_base, subgroup_size) sub_group_non_uniform_all_equal((val)) +#else +#define spv_sub_group_elect(lane_id) ((lane_id) == 0u) +#define spv_all_equal_uint(scratch, val, linear_id, subgroup_base, subgroup_size) spv_emulate_all_equal_uint((scratch), (val), (linear_id), (subgroup_base), (subgroup_size)) +#endif + +float helper( float* val_1, __local uint* _spv_subgroup_scratch, uint _spv_linear_id, uint _spv_subgroup_base, uint _spv_subgroup_size, uint _spv_lane_id) +{ + float reduced_1 = sub_group_reduce_add((*val_1)); + bool elected_2 = spv_sub_group_elect(_spv_lane_id); + return elected_2 ? reduced_1 : 0.0f; +} + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global SSBO* _30) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint _spv_subgroup_size = get_sub_group_size(); + uint _spv_lane_id = get_sub_group_local_id(); + uint _spv_subgroup_id = get_sub_group_id(); + uint _spv_linear_id = _spv_subgroup_id * _spv_subgroup_size + _spv_lane_id; + uint _spv_subgroup_base = _spv_subgroup_id * _spv_subgroup_size; + __local uint _spv_subgroup_scratch[256]; + _30->FragColor = convert_float(get_num_sub_groups()); + _30->FragColor = convert_float(get_sub_group_id()); + _30->FragColor = convert_float(get_sub_group_size()); + _30->FragColor = convert_float(get_sub_group_local_id()); + bool elected_1_1 = spv_sub_group_elect(_spv_lane_id); + sub_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + sub_group_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); + sub_group_barrier(CLK_GLOBAL_MEM_FENCE); + sub_group_barrier(CLK_LOCAL_MEM_FENCE); + sub_group_barrier(CLK_GLOBAL_MEM_FENCE); + bool has_all_1 = sub_group_all(true); + bool has_any_1 = sub_group_any(true); + uint broadcasted_1 = sub_group_broadcast(42u, 0u); + float fadd_1 = sub_group_reduce_add(_30->FragColor); + int iadd_1 = sub_group_reduce_add(_30->idat); + float fmin_1 = sub_group_reduce_min(_30->FragColor); + float fmax_1 = sub_group_reduce_max(_30->FragColor); + int smin_1 = sub_group_reduce_min(_30->idat); + int smax_1 = sub_group_reduce_max(_30->idat); + uint umin_1 = sub_group_reduce_min(_30->udat); + uint umax_1 = sub_group_reduce_max(_30->udat); + float finc_add_1 = sub_group_scan_inclusive_add(_30->FragColor); + int iinc_add_1 = sub_group_scan_inclusive_add(_30->idat); + uint uinc_min_1 = sub_group_scan_inclusive_min(_30->udat); + uint uinc_max_1 = sub_group_scan_inclusive_max(_30->udat); + float fexc_add_1 = sub_group_scan_exclusive_add(_30->FragColor); + int iexc_add_1 = sub_group_scan_exclusive_add(_30->idat); + uint uexc_min_1 = sub_group_scan_exclusive_min(_30->udat); + uint uexc_max_1 = sub_group_scan_exclusive_max(_30->udat); + float param_1 = _30->FragColor; + float from_helper_1 = helper(¶m_1, _spv_subgroup_scratch, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size, _spv_lane_id); + _30->FragColor = (((((((((fadd_1 + fmin_1) + fmax_1) + finc_add_1) + fexc_add_1) + convert_float((((iadd_1 + smin_1) + smax_1) + iinc_add_1) + iexc_add_1)) + convert_float((((((umin_1 + umax_1) + uinc_min_1) + uinc_max_1) + uexc_min_1) + uexc_max_1) + broadcasted_1)) + (float)(has_all_1)) + (float)(has_any_1)) + (float)(elected_1_1)) + from_helper_1; +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups-emulate.comp b/reference/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..4675efa97 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,1105 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float FragColor; + int idat; + uint udat; +}; + +typedef struct SSBO SSBO; + +static uint spv_emulate_broadcast_uint(__local uint* scratch, uint val, uint src_lane, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + src_lane]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_broadcast_first_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_uint(__local uint* scratch, uint val, uint index, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + index]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_xor_uint(__local uint* scratch, uint val, uint mask, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + (lane_id ^ mask)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_up_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id >= delta) ? scratch[linear_id - delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_down_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id + delta < subgroup_size) ? scratch[linear_id + delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + ((lane_id + delta) % subgroup_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint cluster_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base + ((lane_id - cluster_base + delta) % cluster_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_any(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all_equal_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint first = scratch[subgroup_base]; + bool r = true; + for (uint i = 1u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] == first); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_emulate_ballot(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint4 r = (uint4)(0u); + for (uint i = 0u; i < subgroup_size; i++) { + if (scratch[subgroup_base + i] != 0u) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_subgroup_eq_mask(uint lane_id) { + uint4 r = (uint4)(0u); + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + if (word == 0u) r.x = (1u << bit); + else if (word == 1u) r.y = (1u << bit); + else if (word == 2u) r.z = (1u << bit); + else r.w = (1u << bit); + return r; +} + +static uint4 spv_subgroup_ge_mask(uint lane_id, uint subgroup_size) { + uint4 r = (uint4)(0u); + for (uint i = lane_id; i < subgroup_size; i++) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + return r; +} + +static uint4 spv_subgroup_gt_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(lane_id + 1u, subgroup_size); +} + +static uint4 spv_subgroup_le_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(0u, lane_id + 1u); +} + +static uint4 spv_subgroup_lt_mask(uint lane_id) { + if (lane_id == 0u) return (uint4)(0u); + return spv_subgroup_ge_mask(0u, lane_id); +} + +static uint spv_emulate_reduce_add_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_add_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r + scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_mul_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 1u; + for (uint i = 0u; i < lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_mul_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r * scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_min_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = UINT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_min_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = min(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_max_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_max_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = max(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_and_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0xFFFFFFFFu; + for (uint i = 0u; i < lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_and_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r & scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_or_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_or_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r | scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_xor_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_xor_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r ^ scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_add_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_add_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_mul_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 1; + for (uint i = 0u; i < lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_mul_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_min_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_min_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = min(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_max_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MIN; + for (uint i = 0u; i < lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_max_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = max(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_and_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(0xFFFFFFFFu); + for (uint i = 0u; i < lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_and_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r & as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_or_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_or_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r | as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_xor_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_xor_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r ^ as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_add_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 0.0f; + for (uint i = 0u; i < lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_add_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_mul_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 1.0f; + for (uint i = 0u; i < lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_mul_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_min_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_min_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_max_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = -INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_max_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_and(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i <= lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_and(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = true; + for (uint i = 0u; i < cluster_size; i++) + r = r && (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_or(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_or(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r || (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_xor(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_xor(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r != (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inverse_ballot(uint4 ballot, uint lane_id) { + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static bool spv_emulate_ballot_bit_extract(uint4 ballot, uint index) { + uint word = index / 32u; + uint bit = index % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static uint spv_popcount4(uint4 v) { + return popcount(v.x) + popcount(v.y) + popcount(v.z) + popcount(v.w); +} + +static uint spv_emulate_ballot_bit_count(uint4 ballot) { + return spv_popcount4(ballot); +} + +static uint spv_emulate_ballot_inclusive_bit_count(uint4 ballot, uint lane_id) { + uint4 masked = ballot; + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint mask = (bit == 31u) ? 0xFFFFFFFFu : ((1u << (bit + 1u)) - 1u); + if (word == 0u) { masked.x &= mask; masked.y = 0u; masked.z = 0u; masked.w = 0u; } + else if (word == 1u) { masked.y &= mask; masked.z = 0u; masked.w = 0u; } + else if (word == 2u) { masked.z &= mask; masked.w = 0u; } + else { masked.w &= mask; } + return spv_popcount4(masked); +} + +static uint spv_emulate_ballot_exclusive_bit_count(uint4 ballot, uint lane_id) { + if (lane_id == 0u) return 0u; + return spv_emulate_ballot_inclusive_bit_count(ballot, lane_id - 1u); +} + +static uint spv_emulate_ballot_find_lsb(uint4 ballot) { + if (ballot.x != 0u) return (uint)clz(ballot.x & (0u - ballot.x)); + if (ballot.y != 0u) return 32u + (uint)clz(ballot.y & (0u - ballot.y)); + if (ballot.z != 0u) return 64u + (uint)clz(ballot.z & (0u - ballot.z)); + if (ballot.w != 0u) return 96u + (uint)clz(ballot.w & (0u - ballot.w)); + return ~0u; +} + +static uint spv_emulate_ballot_find_msb(uint4 ballot) { + if (ballot.w != 0u) return 127u - (uint)clz(ballot.w); + if (ballot.z != 0u) return 95u - (uint)clz(ballot.z); + if (ballot.y != 0u) return 63u - (uint)clz(ballot.y); + if (ballot.x != 0u) return 31u - (uint)clz(ballot.x); + return ~0u; +} + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global SSBO* _13) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint _spv_linear_workgroup_size = get_local_size(0) * get_local_size(1) * get_local_size(2); + uint _spv_linear_id = (get_local_id(2) * get_local_size(1) * get_local_size(0)) + (get_local_id(1) * get_local_size(0)) + get_local_id(0); + uint _spv_subgroup_size = 32u; + uint _spv_lane_id = _spv_linear_id % 32u; + uint _spv_subgroup_id = _spv_linear_id / 32u; + uint _spv_num_subgroups = _spv_linear_workgroup_size / 32u; + uint _spv_subgroup_base = _spv_subgroup_id * 32u; + __local uint _spv_subgroup_scratch[256]; + float cred_add_1 = spv_emulate_clustered_reduce_add_float(_spv_subgroup_scratch, _13->FragColor, 4u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + float cred_mul_1 = spv_emulate_clustered_reduce_mul_float(_spv_subgroup_scratch, _13->FragColor, 4u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + float cred_min_1 = spv_emulate_clustered_reduce_min_float(_spv_subgroup_scratch, _13->FragColor, 4u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + float cred_max_1 = spv_emulate_clustered_reduce_max_float(_spv_subgroup_scratch, _13->FragColor, 4u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + int cred_iadd_1 = spv_emulate_clustered_reduce_add_int(_spv_subgroup_scratch, _13->idat, 4u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint cred_umin_1 = spv_emulate_clustered_reduce_min_uint(_spv_subgroup_scratch, _13->udat, 4u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint cred_and_1 = spv_emulate_clustered_reduce_and_uint(_spv_subgroup_scratch, _13->udat, 4u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint cred_or_1 = spv_emulate_clustered_reduce_or_uint(_spv_subgroup_scratch, _13->udat, 4u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint cred_xor_1 = spv_emulate_clustered_reduce_xor_uint(_spv_subgroup_scratch, _13->udat, 4u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + bool cred_land_1 = spv_emulate_clustered_reduce_logical_and(_spv_subgroup_scratch, _13->udat > 0u, 4u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + bool cred_lor_1 = spv_emulate_clustered_reduce_logical_or(_spv_subgroup_scratch, _13->udat > 0u, 4u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + _13->FragColor = (((((((cred_add_1 + cred_mul_1) + cred_min_1) + cred_max_1) + convert_float(cred_iadd_1)) + convert_float(cred_umin_1)) + convert_float((cred_and_1 + cred_or_1) + cred_xor_1)) + (float)(cred_land_1)) + (float)(cred_lor_1); +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups-emulate.comp b/reference/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..784532d51 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,1094 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float FragColor; +}; + +typedef struct SSBO SSBO; + +static uint spv_emulate_broadcast_uint(__local uint* scratch, uint val, uint src_lane, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + src_lane]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_broadcast_first_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_uint(__local uint* scratch, uint val, uint index, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + index]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_xor_uint(__local uint* scratch, uint val, uint mask, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + (lane_id ^ mask)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_up_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id >= delta) ? scratch[linear_id - delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_down_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id + delta < subgroup_size) ? scratch[linear_id + delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + ((lane_id + delta) % subgroup_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint cluster_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base + ((lane_id - cluster_base + delta) % cluster_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_any(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all_equal_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint first = scratch[subgroup_base]; + bool r = true; + for (uint i = 1u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] == first); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_emulate_ballot(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint4 r = (uint4)(0u); + for (uint i = 0u; i < subgroup_size; i++) { + if (scratch[subgroup_base + i] != 0u) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_subgroup_eq_mask(uint lane_id) { + uint4 r = (uint4)(0u); + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + if (word == 0u) r.x = (1u << bit); + else if (word == 1u) r.y = (1u << bit); + else if (word == 2u) r.z = (1u << bit); + else r.w = (1u << bit); + return r; +} + +static uint4 spv_subgroup_ge_mask(uint lane_id, uint subgroup_size) { + uint4 r = (uint4)(0u); + for (uint i = lane_id; i < subgroup_size; i++) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + return r; +} + +static uint4 spv_subgroup_gt_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(lane_id + 1u, subgroup_size); +} + +static uint4 spv_subgroup_le_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(0u, lane_id + 1u); +} + +static uint4 spv_subgroup_lt_mask(uint lane_id) { + if (lane_id == 0u) return (uint4)(0u); + return spv_subgroup_ge_mask(0u, lane_id); +} + +static uint spv_emulate_reduce_add_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_add_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r + scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_mul_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 1u; + for (uint i = 0u; i < lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_mul_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r * scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_min_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = UINT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_min_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = min(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_max_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_max_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = max(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_and_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0xFFFFFFFFu; + for (uint i = 0u; i < lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_and_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r & scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_or_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_or_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r | scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_xor_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_xor_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r ^ scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_add_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_add_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_mul_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 1; + for (uint i = 0u; i < lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_mul_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_min_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_min_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = min(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_max_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MIN; + for (uint i = 0u; i < lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_max_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = max(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_and_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(0xFFFFFFFFu); + for (uint i = 0u; i < lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_and_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r & as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_or_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_or_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r | as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_xor_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_xor_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r ^ as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_add_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 0.0f; + for (uint i = 0u; i < lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_add_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_mul_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 1.0f; + for (uint i = 0u; i < lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_mul_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_min_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_min_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_max_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = -INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_max_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_and(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i <= lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_and(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = true; + for (uint i = 0u; i < cluster_size; i++) + r = r && (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_or(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_or(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r || (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_xor(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_xor(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r != (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inverse_ballot(uint4 ballot, uint lane_id) { + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static bool spv_emulate_ballot_bit_extract(uint4 ballot, uint index) { + uint word = index / 32u; + uint bit = index % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static uint spv_popcount4(uint4 v) { + return popcount(v.x) + popcount(v.y) + popcount(v.z) + popcount(v.w); +} + +static uint spv_emulate_ballot_bit_count(uint4 ballot) { + return spv_popcount4(ballot); +} + +static uint spv_emulate_ballot_inclusive_bit_count(uint4 ballot, uint lane_id) { + uint4 masked = ballot; + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint mask = (bit == 31u) ? 0xFFFFFFFFu : ((1u << (bit + 1u)) - 1u); + if (word == 0u) { masked.x &= mask; masked.y = 0u; masked.z = 0u; masked.w = 0u; } + else if (word == 1u) { masked.y &= mask; masked.z = 0u; masked.w = 0u; } + else if (word == 2u) { masked.z &= mask; masked.w = 0u; } + else { masked.w &= mask; } + return spv_popcount4(masked); +} + +static uint spv_emulate_ballot_exclusive_bit_count(uint4 ballot, uint lane_id) { + if (lane_id == 0u) return 0u; + return spv_emulate_ballot_inclusive_bit_count(ballot, lane_id - 1u); +} + +static uint spv_emulate_ballot_find_lsb(uint4 ballot) { + if (ballot.x != 0u) return (uint)clz(ballot.x & (0u - ballot.x)); + if (ballot.y != 0u) return 32u + (uint)clz(ballot.y & (0u - ballot.y)); + if (ballot.z != 0u) return 64u + (uint)clz(ballot.z & (0u - ballot.z)); + if (ballot.w != 0u) return 96u + (uint)clz(ballot.w & (0u - ballot.w)); + return ~0u; +} + +static uint spv_emulate_ballot_find_msb(uint4 ballot) { + if (ballot.w != 0u) return 127u - (uint)clz(ballot.w); + if (ballot.z != 0u) return 95u - (uint)clz(ballot.z); + if (ballot.y != 0u) return 63u - (uint)clz(ballot.y); + if (ballot.x != 0u) return 31u - (uint)clz(ballot.x); + return ~0u; +} + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global float* _19) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint _spv_linear_workgroup_size = get_local_size(0) * get_local_size(1) * get_local_size(2); + uint _spv_linear_id = (get_local_id(2) * get_local_size(1) * get_local_size(0)) + (get_local_id(1) * get_local_size(0)) + get_local_id(0); + uint _spv_subgroup_size = 32u; + uint _spv_lane_id = _spv_linear_id % 32u; + uint _spv_subgroup_id = _spv_linear_id / 32u; + uint _spv_num_subgroups = _spv_linear_workgroup_size / 32u; + uint _spv_subgroup_base = _spv_subgroup_id * 32u; + __local uint _spv_subgroup_scratch[256]; + uint rotated_1 = spv_emulate_rotate_uint(_spv_subgroup_scratch, 20u, 4u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + uint rotated_clustered_1 = spv_emulate_clustered_rotate_uint(_spv_subgroup_scratch, 20u, 4u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base, 8u); + _19[0] = convert_float(rotated_1) + convert_float(rotated_clustered_1); +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups-emulate.comp b/reference/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..a9a397230 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,1097 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float FragColor; + uint udat; +}; + +typedef struct SSBO SSBO; + +static uint spv_emulate_broadcast_uint(__local uint* scratch, uint val, uint src_lane, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + src_lane]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_broadcast_first_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_uint(__local uint* scratch, uint val, uint index, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + index]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_xor_uint(__local uint* scratch, uint val, uint mask, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + (lane_id ^ mask)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_up_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id >= delta) ? scratch[linear_id - delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_down_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id + delta < subgroup_size) ? scratch[linear_id + delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + ((lane_id + delta) % subgroup_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint cluster_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base + ((lane_id - cluster_base + delta) % cluster_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_any(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all_equal_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint first = scratch[subgroup_base]; + bool r = true; + for (uint i = 1u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] == first); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_emulate_ballot(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint4 r = (uint4)(0u); + for (uint i = 0u; i < subgroup_size; i++) { + if (scratch[subgroup_base + i] != 0u) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_subgroup_eq_mask(uint lane_id) { + uint4 r = (uint4)(0u); + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + if (word == 0u) r.x = (1u << bit); + else if (word == 1u) r.y = (1u << bit); + else if (word == 2u) r.z = (1u << bit); + else r.w = (1u << bit); + return r; +} + +static uint4 spv_subgroup_ge_mask(uint lane_id, uint subgroup_size) { + uint4 r = (uint4)(0u); + for (uint i = lane_id; i < subgroup_size; i++) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + return r; +} + +static uint4 spv_subgroup_gt_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(lane_id + 1u, subgroup_size); +} + +static uint4 spv_subgroup_le_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(0u, lane_id + 1u); +} + +static uint4 spv_subgroup_lt_mask(uint lane_id) { + if (lane_id == 0u) return (uint4)(0u); + return spv_subgroup_ge_mask(0u, lane_id); +} + +static uint spv_emulate_reduce_add_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_add_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r + scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_mul_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 1u; + for (uint i = 0u; i < lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_mul_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r * scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_min_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = UINT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_min_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = min(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_max_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_max_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = max(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_and_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0xFFFFFFFFu; + for (uint i = 0u; i < lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_and_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r & scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_or_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_or_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r | scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_xor_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_xor_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r ^ scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_add_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_add_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_mul_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 1; + for (uint i = 0u; i < lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_mul_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_min_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_min_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = min(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_max_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MIN; + for (uint i = 0u; i < lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_max_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = max(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_and_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(0xFFFFFFFFu); + for (uint i = 0u; i < lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_and_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r & as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_or_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_or_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r | as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_xor_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_xor_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r ^ as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_add_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 0.0f; + for (uint i = 0u; i < lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_add_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_mul_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 1.0f; + for (uint i = 0u; i < lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_mul_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_min_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_min_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_max_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = -INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_max_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_and(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i <= lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_and(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = true; + for (uint i = 0u; i < cluster_size; i++) + r = r && (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_or(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_or(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r || (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_xor(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_xor(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r != (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inverse_ballot(uint4 ballot, uint lane_id) { + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static bool spv_emulate_ballot_bit_extract(uint4 ballot, uint index) { + uint word = index / 32u; + uint bit = index % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static uint spv_popcount4(uint4 v) { + return popcount(v.x) + popcount(v.y) + popcount(v.z) + popcount(v.w); +} + +static uint spv_emulate_ballot_bit_count(uint4 ballot) { + return spv_popcount4(ballot); +} + +static uint spv_emulate_ballot_inclusive_bit_count(uint4 ballot, uint lane_id) { + uint4 masked = ballot; + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint mask = (bit == 31u) ? 0xFFFFFFFFu : ((1u << (bit + 1u)) - 1u); + if (word == 0u) { masked.x &= mask; masked.y = 0u; masked.z = 0u; masked.w = 0u; } + else if (word == 1u) { masked.y &= mask; masked.z = 0u; masked.w = 0u; } + else if (word == 2u) { masked.z &= mask; masked.w = 0u; } + else { masked.w &= mask; } + return spv_popcount4(masked); +} + +static uint spv_emulate_ballot_exclusive_bit_count(uint4 ballot, uint lane_id) { + if (lane_id == 0u) return 0u; + return spv_emulate_ballot_inclusive_bit_count(ballot, lane_id - 1u); +} + +static uint spv_emulate_ballot_find_lsb(uint4 ballot) { + if (ballot.x != 0u) return (uint)clz(ballot.x & (0u - ballot.x)); + if (ballot.y != 0u) return 32u + (uint)clz(ballot.y & (0u - ballot.y)); + if (ballot.z != 0u) return 64u + (uint)clz(ballot.z & (0u - ballot.z)); + if (ballot.w != 0u) return 96u + (uint)clz(ballot.w & (0u - ballot.w)); + return ~0u; +} + +static uint spv_emulate_ballot_find_msb(uint4 ballot) { + if (ballot.w != 0u) return 127u - (uint)clz(ballot.w); + if (ballot.z != 0u) return 95u - (uint)clz(ballot.z); + if (ballot.y != 0u) return 63u - (uint)clz(ballot.y); + if (ballot.x != 0u) return 31u - (uint)clz(ballot.x); + return ~0u; +} + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global SSBO* _12) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint _spv_linear_workgroup_size = get_local_size(0) * get_local_size(1) * get_local_size(2); + uint _spv_linear_id = (get_local_id(2) * get_local_size(1) * get_local_size(0)) + (get_local_id(1) * get_local_size(0)) + get_local_id(0); + uint _spv_subgroup_size = 32u; + uint _spv_lane_id = _spv_linear_id % 32u; + uint _spv_subgroup_id = _spv_linear_id / 32u; + uint _spv_num_subgroups = _spv_linear_workgroup_size / 32u; + uint _spv_subgroup_base = _spv_subgroup_id * 32u; + __local uint _spv_subgroup_scratch[256]; + float shuffled_1 = as_float(spv_emulate_shuffle_uint(_spv_subgroup_scratch, as_uint(_12->FragColor), 3u, _spv_linear_id, _spv_subgroup_base)); + float xored_1 = as_float(spv_emulate_shuffle_xor_uint(_spv_subgroup_scratch, as_uint(_12->FragColor), 1u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base)); + float up_1 = as_float(spv_emulate_shuffle_up_uint(_spv_subgroup_scratch, as_uint(_12->FragColor), 1u, _spv_lane_id, _spv_linear_id, _spv_subgroup_base)); + float down_1 = as_float(spv_emulate_shuffle_down_uint(_spv_subgroup_scratch, as_uint(_12->FragColor), 1u, _spv_lane_id, _spv_linear_id, _spv_subgroup_size)); + _12->FragColor = ((shuffled_1 + xored_1) + up_1) + down_1; +} + diff --git a/reference/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups-emulate.comp b/reference/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..2f3475ed6 --- /dev/null +++ b/reference/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,1098 @@ +// Generated from SPIR-V by SPIRV-Cross (OpenCL backend) + + +struct SSBO +{ + float FragColor; + uint udat; +}; + +typedef struct SSBO SSBO; + +static uint spv_emulate_broadcast_uint(__local uint* scratch, uint val, uint src_lane, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + src_lane]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_broadcast_first_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_uint(__local uint* scratch, uint val, uint index, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + index]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_xor_uint(__local uint* scratch, uint val, uint mask, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + (lane_id ^ mask)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_up_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id >= delta) ? scratch[linear_id - delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_shuffle_down_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = (lane_id + delta < subgroup_size) ? scratch[linear_id + delta] : val; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base + ((lane_id + delta) % subgroup_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_rotate_uint(__local uint* scratch, uint val, uint delta, uint lane_id, uint linear_id, uint subgroup_base, uint cluster_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base + ((lane_id - cluster_base + delta) % cluster_size)]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_any(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_all_equal_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint first = scratch[subgroup_base]; + bool r = true; + for (uint i = 1u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] == first); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_emulate_ballot(__local uint* scratch, bool predicate, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = predicate ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint4 r = (uint4)(0u); + for (uint i = 0u; i < subgroup_size; i++) { + if (scratch[subgroup_base + i] != 0u) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + } + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint4 spv_subgroup_eq_mask(uint lane_id) { + uint4 r = (uint4)(0u); + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + if (word == 0u) r.x = (1u << bit); + else if (word == 1u) r.y = (1u << bit); + else if (word == 2u) r.z = (1u << bit); + else r.w = (1u << bit); + return r; +} + +static uint4 spv_subgroup_ge_mask(uint lane_id, uint subgroup_size) { + uint4 r = (uint4)(0u); + for (uint i = lane_id; i < subgroup_size; i++) { + uint word = i / 32u; + uint bit = i % 32u; + if (word == 0u) r.x |= (1u << bit); + else if (word == 1u) r.y |= (1u << bit); + else if (word == 2u) r.z |= (1u << bit); + else r.w |= (1u << bit); + } + return r; +} + +static uint4 spv_subgroup_gt_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(lane_id + 1u, subgroup_size); +} + +static uint4 spv_subgroup_le_mask(uint lane_id, uint subgroup_size) { + return spv_subgroup_ge_mask(0u, lane_id + 1u); +} + +static uint4 spv_subgroup_lt_mask(uint lane_id) { + if (lane_id == 0u) return (uint4)(0u); + return spv_subgroup_ge_mask(0u, lane_id); +} + +static uint spv_emulate_reduce_add_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_add_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r + scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_add_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r + scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_mul_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_mul_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 1u; + for (uint i = 0u; i < lane_id; i++) + r = r * scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_mul_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r * scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_min_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_min_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = UINT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_min_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = min(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_max_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_max_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = max(r, scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_max_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = max(r, scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_and_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_and_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0xFFFFFFFFu; + for (uint i = 0u; i < lane_id; i++) + r = r & scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_and_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r & scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_or_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_or_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r | scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_or_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r | scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_reduce_xor_uint(__local uint* scratch, uint val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_inclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = scratch[subgroup_base]; + for (uint i = 1u; i <= lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_exclusive_scan_xor_uint(__local uint* scratch, uint val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint r = 0u; + for (uint i = 0u; i < lane_id; i++) + r = r ^ scratch[subgroup_base + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static uint spv_emulate_clustered_reduce_xor_uint(__local uint* scratch, uint val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + uint r = scratch[subgroup_base + cluster_base_in_sg]; + for (uint i = 1u; i < cluster_size; i++) + r = r ^ scratch[subgroup_base + cluster_base_in_sg + i]; + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_add_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_add_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r + as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_add_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_mul_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_mul_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 1; + for (uint i = 0u; i < lane_id; i++) + r = r * as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_mul_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_min_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_min_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MAX; + for (uint i = 0u; i < lane_id; i++) + r = min(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_min_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = min(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_max_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_max_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = INT_MIN; + for (uint i = 0u; i < lane_id; i++) + r = max(r, as_int(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_max_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = max(r, as_int(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_and_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_and_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(0xFFFFFFFFu); + for (uint i = 0u; i < lane_id; i++) + r = r & as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_and_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r & as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_or_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_or_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r | as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_or_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r | as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_reduce_xor_int(__local uint* scratch, int val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_inclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = as_int(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_exclusive_scan_xor_int(__local uint* scratch, int val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + int r = 0; + for (uint i = 0u; i < lane_id; i++) + r = r ^ as_int(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static int spv_emulate_clustered_reduce_xor_int(__local uint* scratch, int val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + int r = as_int(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r ^ as_int(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_add_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_add_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 0.0f; + for (uint i = 0u; i < lane_id; i++) + r = r + as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_add_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r + as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_mul_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_mul_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = 1.0f; + for (uint i = 0u; i < lane_id; i++) + r = r * as_float(scratch[subgroup_base + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_mul_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = r * as_float(scratch[subgroup_base + cluster_base_in_sg + i]); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_min_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_min_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmin(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_min_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmin(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_reduce_max_float(__local uint* scratch, float val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i < subgroup_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_inclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = as_float(scratch[subgroup_base]); + for (uint i = 1u; i <= lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_exclusive_scan_max_float(__local uint* scratch, float val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + float r = -INFINITY; + for (uint i = 0u; i < lane_id; i++) + r = fmax(r, as_float(scratch[subgroup_base + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static float spv_emulate_clustered_reduce_max_float(__local uint* scratch, float val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = as_uint(val); + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + float r = as_float(scratch[subgroup_base + cluster_base_in_sg]); + for (uint i = 1u; i < cluster_size; i++) + r = fmax(r, as_float(scratch[subgroup_base + cluster_base_in_sg + i])); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_and(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < subgroup_size; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i <= lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_and(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = true; + for (uint i = 0u; i < lane_id; i++) + r = r && (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_and(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = true; + for (uint i = 0u; i < cluster_size; i++) + r = r && (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_or(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_or(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r || (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_or(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r || (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_reduce_logical_xor(__local uint* scratch, bool val, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < subgroup_size; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i <= lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_exclusive_scan_logical_xor(__local uint* scratch, bool val, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + bool r = false; + for (uint i = 0u; i < lane_id; i++) + r = r != (scratch[subgroup_base + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_clustered_reduce_logical_xor(__local uint* scratch, bool val, uint cluster_size, uint lane_id, uint linear_id, uint subgroup_base, uint subgroup_size) { + scratch[linear_id] = val ? 1u : 0u; + barrier(CLK_LOCAL_MEM_FENCE); + uint cluster_base_in_sg = (lane_id / cluster_size) * cluster_size; + bool r = false; + for (uint i = 0u; i < cluster_size; i++) + r = r != (scratch[subgroup_base + cluster_base_in_sg + i] != 0u); + barrier(CLK_LOCAL_MEM_FENCE); + return r; +} + +static bool spv_emulate_inverse_ballot(uint4 ballot, uint lane_id) { + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static bool spv_emulate_ballot_bit_extract(uint4 ballot, uint index) { + uint word = index / 32u; + uint bit = index % 32u; + uint v = (word == 0u) ? ballot.x : (word == 1u) ? ballot.y : (word == 2u) ? ballot.z : ballot.w; + return (v & (1u << bit)) != 0u; +} + +static uint spv_popcount4(uint4 v) { + return popcount(v.x) + popcount(v.y) + popcount(v.z) + popcount(v.w); +} + +static uint spv_emulate_ballot_bit_count(uint4 ballot) { + return spv_popcount4(ballot); +} + +static uint spv_emulate_ballot_inclusive_bit_count(uint4 ballot, uint lane_id) { + uint4 masked = ballot; + uint word = lane_id / 32u; + uint bit = lane_id % 32u; + uint mask = (bit == 31u) ? 0xFFFFFFFFu : ((1u << (bit + 1u)) - 1u); + if (word == 0u) { masked.x &= mask; masked.y = 0u; masked.z = 0u; masked.w = 0u; } + else if (word == 1u) { masked.y &= mask; masked.z = 0u; masked.w = 0u; } + else if (word == 2u) { masked.z &= mask; masked.w = 0u; } + else { masked.w &= mask; } + return spv_popcount4(masked); +} + +static uint spv_emulate_ballot_exclusive_bit_count(uint4 ballot, uint lane_id) { + if (lane_id == 0u) return 0u; + return spv_emulate_ballot_inclusive_bit_count(ballot, lane_id - 1u); +} + +static uint spv_emulate_ballot_find_lsb(uint4 ballot) { + if (ballot.x != 0u) return (uint)clz(ballot.x & (0u - ballot.x)); + if (ballot.y != 0u) return 32u + (uint)clz(ballot.y & (0u - ballot.y)); + if (ballot.z != 0u) return 64u + (uint)clz(ballot.z & (0u - ballot.z)); + if (ballot.w != 0u) return 96u + (uint)clz(ballot.w & (0u - ballot.w)); + return ~0u; +} + +static uint spv_emulate_ballot_find_msb(uint4 ballot) { + if (ballot.w != 0u) return 127u - (uint)clz(ballot.w); + if (ballot.z != 0u) return 95u - (uint)clz(ballot.z); + if (ballot.y != 0u) return 63u - (uint)clz(ballot.y); + if (ballot.x != 0u) return 31u - (uint)clz(ballot.x); + return ~0u; +} + +__attribute__((reqd_work_group_size(256, 1, 1))) +__kernel void comp_main(__global SSBO* _16) +{ + uint3 spvWorkgroupSize = (uint3)(get_local_size(0), get_local_size(1), get_local_size(2)); + uint _spv_linear_workgroup_size = get_local_size(0) * get_local_size(1) * get_local_size(2); + uint _spv_linear_id = (get_local_id(2) * get_local_size(1) * get_local_size(0)) + (get_local_id(1) * get_local_size(0)) + get_local_id(0); + uint _spv_subgroup_size = 32u; + uint _spv_lane_id = _spv_linear_id % 32u; + uint _spv_subgroup_id = _spv_linear_id / 32u; + uint _spv_num_subgroups = _spv_linear_workgroup_size / 32u; + uint _spv_subgroup_base = _spv_subgroup_id * 32u; + __local uint _spv_subgroup_scratch[256]; + bool elected_1 = (_spv_lane_id == 0u); + bool all_eq_float_1 = spv_emulate_all_equal_uint(_spv_subgroup_scratch, as_uint(_16->FragColor), _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + bool all_eq_uint_1 = spv_emulate_all_equal_uint(_spv_subgroup_scratch, _16->udat, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + bool all_result_1 = spv_emulate_all(_spv_subgroup_scratch, _16->udat > 0u, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + bool any_result_1 = spv_emulate_any(_spv_subgroup_scratch, _16->udat > 0u, _spv_linear_id, _spv_subgroup_base, _spv_subgroup_size); + _16->FragColor = ((((float)(elected_1) + (float)(all_eq_float_1)) + (float)(all_eq_uint_1)) + (float)(all_result_1)) + (float)(any_result_1); +} + diff --git a/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups-emulate.comp b/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..31e7e9c89 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-arithmetic.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,39 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_arithmetic : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; + int idat; + uint udat; +}; + +void main() +{ + // Mul (non-uniform arithmetic extension) + float fmul = subgroupMul(FragColor); + int imul = subgroupMul(idat); + uint umul = subgroupMul(udat); + + // Bitwise + uint band = subgroupAnd(udat); + uint bor = subgroupOr(udat); + uint bxor = subgroupXor(udat); + + // Logical + bool land = subgroupAnd(udat > 0u); + bool lor = subgroupOr(udat > 0u); + bool lxor = subgroupXor(udat > 0u); + + // Inclusive mul + float fmul_inc = subgroupInclusiveMul(FragColor); + // Exclusive mul + float fmul_exc = subgroupExclusiveMul(FragColor); + + FragColor = fmul + fmul_inc + fmul_exc + + float(imul + int(umul)) + + float(band + bor + bxor) + + float(land) + float(lor) + float(lxor); +} diff --git a/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups-emulate.comp b/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..a2ad427a7 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-ballot.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,43 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_ballot : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; + uint udat; +}; + +void main() +{ + // Ballot + uvec4 ballot = subgroupBallot(gl_SubgroupInvocationID < 16u); + + // BroadcastFirst + float first = subgroupBroadcastFirst(FragColor); + + // Ballot bit ops + bool extracted = subgroupBallotBitExtract(ballot, 5u); + uint bit_count = subgroupBallotBitCount(ballot); + uint inclusive_count = subgroupBallotInclusiveBitCount(ballot); + uint exclusive_count = subgroupBallotExclusiveBitCount(ballot); + uint find_lsb = subgroupBallotFindLSB(ballot); + uint find_msb = subgroupBallotFindMSB(ballot); + + // InverseBallot + bool inv_ballot = subgroupInverseBallot(ballot); + + // Mask builtins + uvec4 eq_mask = gl_SubgroupEqMask; + uvec4 ge_mask = gl_SubgroupGeMask; + uvec4 gt_mask = gl_SubgroupGtMask; + uvec4 le_mask = gl_SubgroupLeMask; + uvec4 lt_mask = gl_SubgroupLtMask; + + FragColor = first + + float(ballot.x + ballot.y + ballot.z + ballot.w) + + float(bit_count + inclusive_count + exclusive_count + find_lsb + find_msb) + + float(extracted) + float(inv_ballot) + + float(eq_mask.x) + float(ge_mask.x) + float(gt_mask.x) + float(le_mask.x) + float(lt_mask.x); +} diff --git a/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups-emulate.comp b/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..213043519 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,77 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_arithmetic : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; + int idat; + uint udat; +}; + +float helper(float val) +{ + float reduced = subgroupAdd(val); + bool elected = subgroupElect(); + return elected ? reduced : 0.0; +} + +void main() +{ + // Builtins + FragColor = float(gl_NumSubgroups); + FragColor = float(gl_SubgroupID); + FragColor = float(gl_SubgroupSize); + FragColor = float(gl_SubgroupInvocationID); + + // Elect + bool elected = subgroupElect(); + + // Barriers + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + + // Vote (uniform) + bool has_all = subgroupAll(true); + bool has_any = subgroupAny(true); + + // Broadcast + uint broadcasted = subgroupBroadcast(42u, 0u); + + // Reduce + float fadd = subgroupAdd(FragColor); + int iadd = subgroupAdd(idat); + float fmin = subgroupMin(FragColor); + float fmax = subgroupMax(FragColor); + int smin = subgroupMin(idat); + int smax = subgroupMax(idat); + uint umin = subgroupMin(udat); + uint umax = subgroupMax(udat); + + // Inclusive scan + float finc_add = subgroupInclusiveAdd(FragColor); + int iinc_add = subgroupInclusiveAdd(idat); + uint uinc_min = subgroupInclusiveMin(udat); + uint uinc_max = subgroupInclusiveMax(udat); + + // Exclusive scan + float fexc_add = subgroupExclusiveAdd(FragColor); + int iexc_add = subgroupExclusiveAdd(idat); + uint uexc_min = subgroupExclusiveMin(udat); + uint uexc_max = subgroupExclusiveMax(udat); + + // Call helper function that uses subgroup ops + float from_helper = helper(FragColor); + + // Write results to prevent dead-code elimination + FragColor = fadd + fmin + fmax + finc_add + fexc_add + + float(iadd + smin + smax + iinc_add + iexc_add) + + float(umin + umax + uinc_min + uinc_max + uexc_min + uexc_max + broadcasted) + + float(has_all) + float(has_any) + float(elected) + from_helper; +} diff --git a/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.subgroups-emulate.comp b/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.subgroups-emulate.comp new file mode 100644 index 000000000..213043519 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-basic.nocompat.vk.subgroups.subgroups-emulate.comp @@ -0,0 +1,77 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_ballot : require +#extension GL_KHR_shader_subgroup_arithmetic : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; + int idat; + uint udat; +}; + +float helper(float val) +{ + float reduced = subgroupAdd(val); + bool elected = subgroupElect(); + return elected ? reduced : 0.0; +} + +void main() +{ + // Builtins + FragColor = float(gl_NumSubgroups); + FragColor = float(gl_SubgroupID); + FragColor = float(gl_SubgroupSize); + FragColor = float(gl_SubgroupInvocationID); + + // Elect + bool elected = subgroupElect(); + + // Barriers + subgroupBarrier(); + subgroupMemoryBarrier(); + subgroupMemoryBarrierBuffer(); + subgroupMemoryBarrierShared(); + subgroupMemoryBarrierImage(); + + // Vote (uniform) + bool has_all = subgroupAll(true); + bool has_any = subgroupAny(true); + + // Broadcast + uint broadcasted = subgroupBroadcast(42u, 0u); + + // Reduce + float fadd = subgroupAdd(FragColor); + int iadd = subgroupAdd(idat); + float fmin = subgroupMin(FragColor); + float fmax = subgroupMax(FragColor); + int smin = subgroupMin(idat); + int smax = subgroupMax(idat); + uint umin = subgroupMin(udat); + uint umax = subgroupMax(udat); + + // Inclusive scan + float finc_add = subgroupInclusiveAdd(FragColor); + int iinc_add = subgroupInclusiveAdd(idat); + uint uinc_min = subgroupInclusiveMin(udat); + uint uinc_max = subgroupInclusiveMax(udat); + + // Exclusive scan + float fexc_add = subgroupExclusiveAdd(FragColor); + int iexc_add = subgroupExclusiveAdd(idat); + uint uexc_min = subgroupExclusiveMin(udat); + uint uexc_max = subgroupExclusiveMax(udat); + + // Call helper function that uses subgroup ops + float from_helper = helper(FragColor); + + // Write results to prevent dead-code elimination + FragColor = fadd + fmin + fmax + finc_add + fexc_add + + float(iadd + smin + smax + iinc_add + iexc_add) + + float(umin + umax + uinc_min + uinc_max + uexc_min + uexc_max + broadcasted) + + float(has_all) + float(has_any) + float(elected) + from_helper; +} diff --git a/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups-emulate.comp b/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..7161ed2d4 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-clustered.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,34 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_clustered : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; + int idat; + uint udat; +}; + +void main() +{ + float cred_add = subgroupClusteredAdd(FragColor, 4u); + float cred_mul = subgroupClusteredMul(FragColor, 4u); + float cred_min = subgroupClusteredMin(FragColor, 4u); + float cred_max = subgroupClusteredMax(FragColor, 4u); + + int cred_iadd = subgroupClusteredAdd(idat, 4u); + uint cred_umin = subgroupClusteredMin(udat, 4u); + + uint cred_and = subgroupClusteredAnd(udat, 4u); + uint cred_or = subgroupClusteredOr(udat, 4u); + uint cred_xor = subgroupClusteredXor(udat, 4u); + + bool cred_land = subgroupClusteredAnd(udat > 0u, 4u); + bool cred_lor = subgroupClusteredOr(udat > 0u, 4u); + + FragColor = cred_add + cred_mul + cred_min + cred_max + + float(cred_iadd) + float(cred_umin) + + float(cred_and + cred_or + cred_xor) + + float(cred_land) + float(cred_lor); +} diff --git a/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups-emulate.comp b/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..bd225cf23 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-rotate.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,17 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_rotate : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; +}; + +void main() +{ + uint rotated = subgroupRotate(20u, 4u); + uint rotated_clustered = subgroupClusteredRotate(20u, 4u, 8u); + + FragColor = float(rotated) + float(rotated_clustered); +} diff --git a/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups-emulate.comp b/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..5afa50288 --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-shuffle.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,22 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_shuffle : require +#extension GL_KHR_shader_subgroup_shuffle_relative : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; + uint udat; +}; + +void main() +{ + // Shuffle + float shuffled = subgroupShuffle(FragColor, 3u); + float xored = subgroupShuffleXor(FragColor, 1u); + float up = subgroupShuffleUp(FragColor, 1u); + float down = subgroupShuffleDown(FragColor, 1u); + + FragColor = shuffled + xored + up + down; +} diff --git a/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups-emulate.comp b/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups-emulate.comp new file mode 100644 index 000000000..18664539b --- /dev/null +++ b/shaders-opencl-no-opt/comp/subgroups-vote.nocompat.vk.subgroups-emulate.comp @@ -0,0 +1,27 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_vote : require +layout(local_size_x = 256) in; + +layout(std430, binding = 0) buffer SSBO +{ + float FragColor; + uint udat; +}; + +void main() +{ + // Elect + bool elected = subgroupElect(); + + // AllEqual + bool all_eq_float = subgroupAllEqual(FragColor); + bool all_eq_uint = subgroupAllEqual(udat); + + // All / Any + bool all_result = subgroupAll(udat > 0u); + bool any_result = subgroupAny(udat > 0u); + + FragColor = float(elected) + float(all_eq_float) + float(all_eq_uint) + + float(all_result) + float(any_result); +} From ee8cceb21a9cea424f9af39f18cee4bdde1c1e82 Mon Sep 17 00:00:00 2001 From: Garrick Meeker Date: Thu, 19 Mar 2026 07:33:14 -0700 Subject: [PATCH 16/16] OpenCL: Fix --opencl-subgroups-all --- main.cpp | 2 +- spirv_opencl.cpp | 20 -------------------- spirv_opencl.hpp | 3 +++ 3 files changed, 4 insertions(+), 21 deletions(-) diff --git a/main.cpp b/main.cpp index bb8163b70..7e7b308ec 100644 --- a/main.cpp +++ b/main.cpp @@ -2007,7 +2007,7 @@ static int main_inner(int argc, char *argv[]) cbs.add("--opencl-fp64", [&args](CLIParser &) { args.opencl_enable_fp64 = true; }); cbs.add("--opencl-64bit-atomics", [&args](CLIParser &) { args.opencl_enable_64bit_atomics = true; }); cbs.add("--opencl-subgroups", [&args](CLIParser &) { args.opencl_enable_subgroups = true; }); - cbs.add("--opencl-subgroups-all", [&args](CLIParser &) { args.opencl_enable_subgroups = true; }); + cbs.add("--opencl-subgroups-all", [&args](CLIParser &) { args.opencl_enable_subgroups_all = true; }); cbs.add("--opencl-emulate-subgroups", [&args](CLIParser &) { args.opencl_emulate_subgroups = true; }); cbs.add("--opencl-fixed-subgroup-size", [&args](CLIParser &parser) { args.opencl_fixed_subgroup_size = parser.next_uint(); }); diff --git a/spirv_opencl.cpp b/spirv_opencl.cpp index b820f447c..38ff7f075 100644 --- a/spirv_opencl.cpp +++ b/spirv_opencl.cpp @@ -1626,25 +1626,6 @@ void CompilerOpenCL::prepass_discover_matrix_types() // Scan all instructions for matrix operations to discover helpers needed. // We can resolve the matrix type from the SPIR-V type of operands at pre-scan time. - auto get_id_type = [&](uint32_t id) -> const SPIRType & - { - // For value IDs, look up the type from variable, constant, or the instruction result. - auto *var = maybe_get(id); - if (var) - return get_variable_data_type(*var); - auto *c = maybe_get(id); - if (c) - return get(c->constant_type); - // For instruction results, the type is stored in the expression or type_id. - if (ir.ids[id].get_type() == TypeExpression) - return get(get(id).expression_type); - // For types themselves - if (ir.ids[id].get_type() == TypeType) - return get(id); - // Fallback: check if there's a result type mapping - return get(id); - }; - ir.for_each_typed_id( [&](uint32_t, SPIRFunction &f) { @@ -7058,7 +7039,6 @@ void CompilerOpenCL::emit_instruction(const Instruction &instruction) uint32_t result_id = ops[1]; uint32_t base_id = ops[2]; - auto &base_type = expression_type(base_id); TypeID base_type_id = expression_type_id(base_id); // Check if custom stride pointer arithmetic is needed. diff --git a/spirv_opencl.hpp b/spirv_opencl.hpp index 3dea7039a..ed6c4ff9c 100644 --- a/spirv_opencl.hpp +++ b/spirv_opencl.hpp @@ -90,6 +90,9 @@ class CompilerOpenCL : public CompilerGLSL void set_opencl_options(const Options &opts) { opencl_options = opts; + // subgroups_all implies subgroups is on. + if (opencl_options.enable_subgroups_all) + opencl_options.enable_subgroups = true; } std::string compile() override;