|
| 1 | +#--- source.hlsl |
| 2 | +// ints |
| 3 | +StructuredBuffer<int4> In : register(t0); |
| 4 | +RWStructuredBuffer<int4> Out1 : register(u1); |
| 5 | +RWStructuredBuffer<int4> Out2 : register(u2); |
| 6 | +RWStructuredBuffer<int4> Out3 : register(u3); |
| 7 | +RWStructuredBuffer<int4> Out4 : register(u4); |
| 8 | + |
| 9 | +// uints |
| 10 | +StructuredBuffer<uint4> UIn : register(t5); |
| 11 | +RWStructuredBuffer<uint4> UOut1 : register(u6); |
| 12 | +RWStructuredBuffer<uint4> UOut2 : register(u7); |
| 13 | +RWStructuredBuffer<uint4> UOut3 : register(u8); |
| 14 | +RWStructuredBuffer<uint4> UOut4 : register(u9); |
| 15 | + |
| 16 | +// floats |
| 17 | +StructuredBuffer<float4> FIn : register(t10); |
| 18 | +RWStructuredBuffer<float4> FOut1 : register(u11); |
| 19 | +RWStructuredBuffer<float4> FOut2 : register(u12); |
| 20 | +RWStructuredBuffer<float4> FOut3 : register(u13); |
| 21 | +RWStructuredBuffer<float4> FOut4 : register(u14); |
| 22 | + |
| 23 | +[numthreads(2,2,1)] |
| 24 | +void main(uint3 dtid : SV_DispatchThreadID) { |
| 25 | + uint index = dtid.y * 2 + dtid.x; |
| 26 | + |
| 27 | + // int case |
| 28 | + int4 v = In[index]; |
| 29 | + int scalar = QuadReadAcrossDiagonal(v.x); |
| 30 | + int2 vec2 = QuadReadAcrossDiagonal(v.xy); |
| 31 | + int3 vec3 = QuadReadAcrossDiagonal(v.xyz); |
| 32 | + int4 vec4 = QuadReadAcrossDiagonal(v); |
| 33 | + |
| 34 | + Out1[index].x = scalar; |
| 35 | + Out2[index].xy = vec2; |
| 36 | + Out3[index].xyz = vec3; |
| 37 | + Out4[index] = vec4; |
| 38 | + |
| 39 | + // uint case |
| 40 | + uint4 uv = UIn[index]; |
| 41 | + uint uscalar = QuadReadAcrossDiagonal(uv.x); |
| 42 | + uint2 uvec2 = QuadReadAcrossDiagonal(uv.xy); |
| 43 | + uint3 uvec3 = QuadReadAcrossDiagonal(uv.xyz); |
| 44 | + uint4 uvec4 = QuadReadAcrossDiagonal(uv); |
| 45 | + |
| 46 | + UOut1[index].x = uscalar; |
| 47 | + UOut2[index].xy = uvec2; |
| 48 | + UOut3[index].xyz = uvec3; |
| 49 | + UOut4[index] = uvec4; |
| 50 | + |
| 51 | + // float case |
| 52 | + float4 fv = FIn[index]; |
| 53 | + float fscalar = QuadReadAcrossDiagonal(fv.x); |
| 54 | + float2 fvec2 = QuadReadAcrossDiagonal(fv.xy); |
| 55 | + float3 fvec3 = QuadReadAcrossDiagonal(fv.xyz); |
| 56 | + float4 fvec4 = QuadReadAcrossDiagonal(fv); |
| 57 | + |
| 58 | + FOut1[index].x = fscalar; |
| 59 | + FOut2[index].xy = fvec2; |
| 60 | + FOut3[index].xyz = fvec3; |
| 61 | + FOut4[index] = fvec4; |
| 62 | +} |
| 63 | + |
| 64 | +//--- pipeline.yaml |
| 65 | + |
| 66 | +--- |
| 67 | +Shaders: |
| 68 | + - Stage: Compute |
| 69 | + Entry: main |
| 70 | + DispatchSize: [ 1, 1, 1 ] |
| 71 | +Buffers: |
| 72 | + - Name: In |
| 73 | + Format: Int32 |
| 74 | + Stride: 16 |
| 75 | + Data: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ] |
| 76 | + - Name: Out1 |
| 77 | + Format: Int32 |
| 78 | + Stride: 16 |
| 79 | + FillSize: 64 |
| 80 | + - Name: Out2 |
| 81 | + Format: Int32 |
| 82 | + Stride: 16 |
| 83 | + FillSize: 64 |
| 84 | + - Name: Out3 |
| 85 | + Format: Int32 |
| 86 | + Stride: 16 |
| 87 | + FillSize: 64 |
| 88 | + - Name: Out4 |
| 89 | + Format: Int32 |
| 90 | + Stride: 16 |
| 91 | + FillSize: 64 |
| 92 | + - Name: ExpectedOut1 |
| 93 | + Format: Int32 |
| 94 | + Stride: 16 |
| 95 | + Data: [ 13, 0, 0, 0, 9, 0, 0, 0, 5, 0, 0, 0, 1, 0, 0, 0 ] |
| 96 | + - Name: ExpectedOut2 |
| 97 | + Format: Int32 |
| 98 | + Stride: 16 |
| 99 | + Data: [ 13, 14, 0, 0, 9, 10, 0, 0, 5, 6, 0, 0, 1, 2, 0, 0 ] |
| 100 | + - Name: ExpectedOut3 |
| 101 | + Format: Int32 |
| 102 | + Stride: 16 |
| 103 | + Data: [ 13, 14, 15, 0, 9, 10, 11, 0, 5, 6, 7, 0, 1, 2, 3, 0 ] |
| 104 | + - Name: ExpectedOut4 |
| 105 | + Format: Int32 |
| 106 | + Stride: 16 |
| 107 | + Data: [ 13, 14, 15, 16, 9, 10, 11, 12, 5, 6, 7, 8, 1, 2, 3, 4 ] |
| 108 | + - Name: UIn |
| 109 | + Format: UInt32 |
| 110 | + Stride: 16 |
| 111 | + Data: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ] |
| 112 | + - Name: UOut1 |
| 113 | + Format: UInt32 |
| 114 | + Stride: 16 |
| 115 | + FillSize: 64 |
| 116 | + - Name: UOut2 |
| 117 | + Format: UInt32 |
| 118 | + Stride: 16 |
| 119 | + FillSize: 64 |
| 120 | + - Name: UOut3 |
| 121 | + Format: UInt32 |
| 122 | + Stride: 16 |
| 123 | + FillSize: 64 |
| 124 | + - Name: UOut4 |
| 125 | + Format: UInt32 |
| 126 | + Stride: 16 |
| 127 | + FillSize: 64 |
| 128 | + - Name: UExpectedOut1 |
| 129 | + Format: UInt32 |
| 130 | + Stride: 16 |
| 131 | + Data: [ 13, 0, 0, 0, 9, 0, 0, 0, 5, 0, 0, 0, 1, 0, 0, 0 ] |
| 132 | + - Name: UExpectedOut2 |
| 133 | + Format: UInt32 |
| 134 | + Stride: 16 |
| 135 | + Data: [ 13, 14, 0, 0, 9, 10, 0, 0, 5, 6, 0, 0, 1, 2, 0, 0 ] |
| 136 | + - Name: UExpectedOut3 |
| 137 | + Format: UInt32 |
| 138 | + Stride: 16 |
| 139 | + Data: [ 13, 14, 15, 0, 9, 10, 11, 0, 5, 6, 7, 0, 1, 2, 3, 0 ] |
| 140 | + - Name: UExpectedOut4 |
| 141 | + Format: UInt32 |
| 142 | + Stride: 16 |
| 143 | + Data: [ 13, 14, 15, 16, 9, 10, 11, 12, 5, 6, 7, 8, 1, 2, 3, 4 ] |
| 144 | + - Name: FIn |
| 145 | + Format: Float32 |
| 146 | + Stride: 16 |
| 147 | + Data: [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 ] |
| 148 | + - Name: FOut1 |
| 149 | + Format: Float32 |
| 150 | + Stride: 16 |
| 151 | + FillSize: 64 |
| 152 | + - Name: FOut2 |
| 153 | + Format: Float32 |
| 154 | + Stride: 16 |
| 155 | + FillSize: 64 |
| 156 | + - Name: FOut3 |
| 157 | + Format: Float32 |
| 158 | + Stride: 16 |
| 159 | + FillSize: 64 |
| 160 | + - Name: FOut4 |
| 161 | + Format: Float32 |
| 162 | + Stride: 16 |
| 163 | + FillSize: 64 |
| 164 | + - Name: FExpectedOut1 |
| 165 | + Format: Float32 |
| 166 | + Stride: 16 |
| 167 | + Data: [ 13.0, 0.0, 0.0, 0.0, 9.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0 ] |
| 168 | + - Name: FExpectedOut2 |
| 169 | + Format: Float32 |
| 170 | + Stride: 16 |
| 171 | + Data: [ 13.0, 14.0, 0.0, 0.0, 9.0, 10.0, 0.0, 0.0, 5.0, 6.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0 ] |
| 172 | + - Name: FExpectedOut3 |
| 173 | + Format: Float32 |
| 174 | + Stride: 16 |
| 175 | + Data: [ 13.0, 14.0, 15.0, 0.0, 9.0, 10.0, 11.0, 0.0, 5.0, 6.0, 7.0, 0.0, 1.0, 2.0, 3.0, 0.0 ] |
| 176 | + - Name: FExpectedOut4 |
| 177 | + Format: Float32 |
| 178 | + Stride: 16 |
| 179 | + Data: [ 13.0, 14.0, 15.0, 16.0, 9.0, 10.0, 11.0, 12.0, 5.0, 6.0, 7.0, 8.0, 1.0, 2.0, 3.0, 4.0 ] |
| 180 | +Results: |
| 181 | + - Result: ExpectedOut1 |
| 182 | + Rule: BufferExact |
| 183 | + Actual: Out1 |
| 184 | + Expected: ExpectedOut1 |
| 185 | + - Result: ExpectedOut2 |
| 186 | + Rule: BufferExact |
| 187 | + Actual: Out2 |
| 188 | + Expected: ExpectedOut2 |
| 189 | + - Result: ExpectedOut3 |
| 190 | + Rule: BufferExact |
| 191 | + Actual: Out3 |
| 192 | + Expected: ExpectedOut3 |
| 193 | + - Result: ExpectedOut4 |
| 194 | + Rule: BufferExact |
| 195 | + Actual: Out4 |
| 196 | + Expected: ExpectedOut4 |
| 197 | + - Result: UExpectedOut1 |
| 198 | + Rule: BufferExact |
| 199 | + Actual: UOut1 |
| 200 | + Expected: UExpectedOut1 |
| 201 | + - Result: UExpectedOut2 |
| 202 | + Rule: BufferExact |
| 203 | + Actual: UOut2 |
| 204 | + Expected: UExpectedOut2 |
| 205 | + - Result: UExpectedOut3 |
| 206 | + Rule: BufferExact |
| 207 | + Actual: UOut3 |
| 208 | + Expected: UExpectedOut3 |
| 209 | + - Result: UExpectedOut4 |
| 210 | + Rule: BufferExact |
| 211 | + Actual: UOut4 |
| 212 | + Expected: UExpectedOut4 |
| 213 | + - Result: FExpectedOut1 |
| 214 | + Rule: BufferExact |
| 215 | + Actual: FOut1 |
| 216 | + Expected: FExpectedOut1 |
| 217 | + - Result: FExpectedOut2 |
| 218 | + Rule: BufferExact |
| 219 | + Actual: FOut2 |
| 220 | + Expected: FExpectedOut2 |
| 221 | + - Result: FExpectedOut3 |
| 222 | + Rule: BufferExact |
| 223 | + Actual: FOut3 |
| 224 | + Expected: FExpectedOut3 |
| 225 | + - Result: FExpectedOut4 |
| 226 | + Rule: BufferExact |
| 227 | + Actual: FOut4 |
| 228 | + Expected: FExpectedOut4 |
| 229 | +DescriptorSets: |
| 230 | + - Resources: |
| 231 | + - Name: In |
| 232 | + Kind: StructuredBuffer |
| 233 | + DirectXBinding: |
| 234 | + Register: 0 |
| 235 | + Space: 0 |
| 236 | + VulkanBinding: |
| 237 | + Binding: 0 |
| 238 | + - Name: Out1 |
| 239 | + Kind: RWStructuredBuffer |
| 240 | + DirectXBinding: |
| 241 | + Register: 1 |
| 242 | + Space: 0 |
| 243 | + VulkanBinding: |
| 244 | + Binding: 1 |
| 245 | + - Name: Out2 |
| 246 | + Kind: RWStructuredBuffer |
| 247 | + DirectXBinding: |
| 248 | + Register: 2 |
| 249 | + Space: 0 |
| 250 | + VulkanBinding: |
| 251 | + Binding: 2 |
| 252 | + - Name: Out3 |
| 253 | + Kind: RWStructuredBuffer |
| 254 | + DirectXBinding: |
| 255 | + Register: 3 |
| 256 | + Space: 0 |
| 257 | + VulkanBinding: |
| 258 | + Binding: 3 |
| 259 | + - Name: Out4 |
| 260 | + Kind: RWStructuredBuffer |
| 261 | + DirectXBinding: |
| 262 | + Register: 4 |
| 263 | + Space: 0 |
| 264 | + VulkanBinding: |
| 265 | + Binding: 4 |
| 266 | + - Name: UIn |
| 267 | + Kind: StructuredBuffer |
| 268 | + DirectXBinding: |
| 269 | + Register: 5 |
| 270 | + Space: 0 |
| 271 | + VulkanBinding: |
| 272 | + Binding: 5 |
| 273 | + - Name: UOut1 |
| 274 | + Kind: RWStructuredBuffer |
| 275 | + DirectXBinding: |
| 276 | + Register: 6 |
| 277 | + Space: 0 |
| 278 | + VulkanBinding: |
| 279 | + Binding: 6 |
| 280 | + - Name: UOut2 |
| 281 | + Kind: RWStructuredBuffer |
| 282 | + DirectXBinding: |
| 283 | + Register: 7 |
| 284 | + Space: 0 |
| 285 | + VulkanBinding: |
| 286 | + Binding: 7 |
| 287 | + - Name: UOut3 |
| 288 | + Kind: RWStructuredBuffer |
| 289 | + DirectXBinding: |
| 290 | + Register: 8 |
| 291 | + Space: 0 |
| 292 | + VulkanBinding: |
| 293 | + Binding: 8 |
| 294 | + - Name: UOut4 |
| 295 | + Kind: RWStructuredBuffer |
| 296 | + DirectXBinding: |
| 297 | + Register: 9 |
| 298 | + Space: 0 |
| 299 | + VulkanBinding: |
| 300 | + Binding: 9 |
| 301 | + - Name: FIn |
| 302 | + Kind: StructuredBuffer |
| 303 | + DirectXBinding: |
| 304 | + Register: 10 |
| 305 | + Space: 0 |
| 306 | + VulkanBinding: |
| 307 | + Binding: 10 |
| 308 | + - Name: FOut1 |
| 309 | + Kind: RWStructuredBuffer |
| 310 | + DirectXBinding: |
| 311 | + Register: 11 |
| 312 | + Space: 0 |
| 313 | + VulkanBinding: |
| 314 | + Binding: 11 |
| 315 | + - Name: FOut2 |
| 316 | + Kind: RWStructuredBuffer |
| 317 | + DirectXBinding: |
| 318 | + Register: 12 |
| 319 | + Space: 0 |
| 320 | + VulkanBinding: |
| 321 | + Binding: 12 |
| 322 | + - Name: FOut3 |
| 323 | + Kind: RWStructuredBuffer |
| 324 | + DirectXBinding: |
| 325 | + Register: 13 |
| 326 | + Space: 0 |
| 327 | + VulkanBinding: |
| 328 | + Binding: 13 |
| 329 | + - Name: FOut4 |
| 330 | + Kind: RWStructuredBuffer |
| 331 | + DirectXBinding: |
| 332 | + Register: 14 |
| 333 | + Space: 0 |
| 334 | + VulkanBinding: |
| 335 | + Binding: 14 |
| 336 | + |
| 337 | +... |
| 338 | +#--- end |
| 339 | + |
| 340 | +# Unsupported in Clang, I have a working branch for QuadReadAcrossDiagonal intrinsic support |
| 341 | +# waiting on https://github.com/llvm/llvm-project/pull/187440 to be merged, so I can open a PR for it |
| 342 | +# XFAIL: Clang |
| 343 | + |
| 344 | +# Bug: https://github.com/llvm/offload-test-suite/issues/986 |
| 345 | +# XFAIL: Intel && Vulkan && DXC |
| 346 | + |
| 347 | +# Bug: https://github.com/llvm/offload-test-suite/issues/989 |
| 348 | +# XFAIL: Metal |
| 349 | + |
| 350 | +# RUN: split-file %s %t |
| 351 | +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl |
| 352 | +# RUN: %offloader %t/pipeline.yaml %t.o |
0 commit comments