Skip to content
Open
142 changes: 142 additions & 0 deletions test/Feature/HLSLLib/fma.matrix.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#--- source.hlsl

StructuredBuffer<double4> A : register(t0);
StructuredBuffer<double4> B : register(t1);
StructuredBuffer<double4> C : register(t2);

RWStructuredBuffer<double4> Out : register(u3);

[numthreads(1,1,1)]
void main() {
double3x2 r32 = fma(double3x2(A[0].xyz, A[1].xyz),
double3x2(B[0].xyz, B[1].xyz),
double3x2(C[0].xyz, C[1].xyz));

double2x4 r24 = fma(double2x4(A[2], A[3]),
double2x4(B[2], B[3]),
double2x4(C[2], C[3]));

double3x1 r31 = fma(double3x1(A[4].xyz),
double3x1(B[4].xyz),
double3x1(C[4].xyz));

double4x4 r44 = fma(double4x4(A[5], A[6], A[7], A[8]),
double4x4(B[5], B[6], B[7], B[8]),
double4x4(C[5], C[6], C[7], C[8]));

Out[0] = double4(r32[0], r32[1]);
Out[1] = double4(r32[2], 0.0, 0.0);
Out[2] = r24[0];
Out[3] = r24[1];
Out[4] = double4(r31[0][0], r31[1][0], r31[2][0], 0.0);
Out[5] = r44[0];
Out[6] = r44[1];
Out[7] = r44[2];
Out[8] = r44[3];
}

//--- pipeline.yaml

---
Shaders:
- Stage: Compute
Entry: main
DispatchSize: [1, 1, 1]
Buffers:
- Name: A
Format: Float64
Stride: 32
Data: [ 1.5, -2.5, 0.5, 0.0,
-0.5, 2.0, 3.0, 0.0,
0.25, -0.25, 10.4, -10.6,
0.0, 10.0, -10.0, 10.5,
1.25, -4.0, 2.5, 0.0,
1.0, 2.0, 3.0, 4.0,
-1.0, -2.0, -3.0, -4.0,
0.5, 1.5, -0.5, -1.5,
10.0, -10.0, 2.25, -2.25 ]
- Name: B
Format: Float64
Stride: 32
Data: [ 2.0, -1.0, 4.0, 0.0,
4.0, -0.5, 1.5, 0.0,
1.0, 1.0, 1.0, 1.0,
1.0, 2.0, -1.0, 1.0,
2.0, -0.5, 3.0, 0.0,
2.0, -1.0, 0.5, 3.0,
4.0, 0.5, -2.0, -1.0,
1.0, 2.0, 3.0, 4.0,
-0.5, 1.5, -1.0, 2.0 ]
- Name: C
Format: Float64
Stride: 32
Data: [ 0.25, 0.75, -1.5, 0.0,
1.5, 1.0, -2.0, 0.0,
0.0, 0.0, 0.0, 0.0,
0.0, 0.5, 0.5, 0.0,
0.5, 1.0, -1.5, 0.0,
0.5, 1.0, -1.5, 0.0,
-0.5, 2.0, 1.0, 3.0,
1.5, -2.0, 0.5, -4.0,
5.0, -5.0, 0.25, -0.25 ]
- Name: Out
Format: Float64
Stride: 32
FillSize: 288
- Name: Expected
Format: Float64
Stride: 32
Data: [ 3.25, 3.25, 0.5, -0.5,
0.0, 2.5, 0.0, 0.0,
0.25, -0.25, 10.4, -10.6,
0.0, 20.5, 10.5, 10.5,
3.0, 3.0, 6.0, 0.0,
2.5, -1.0, 0.0, 12.0,
-4.5, 1.0, 7.0, 7.0,
2.0, 1.0, -1.0, -10.0,
0.0, -20.0, -2.0, -4.75 ]
Results:
- Result: Result
Rule: BufferFloatULP
ULPT: 0
Actual: Out
Expected: Expected
DescriptorSets:
- Resources:
- Name: A
Kind: StructuredBuffer
DirectXBinding:
Register: 0
Space: 0
VulkanBinding:
Binding: 0
- Name: B
Kind: StructuredBuffer
DirectXBinding:
Register: 1
Space: 0
VulkanBinding:
Binding: 1
- Name: C
Kind: StructuredBuffer
DirectXBinding:
Register: 2
Space: 0
VulkanBinding:
Binding: 2
- Name: Out
Kind: RWStructuredBuffer
DirectXBinding:
Register: 3
Space: 0
VulkanBinding:
Binding: 3
#--- end

# Unimplemented https://github.com/llvm/llvm-project/issues/99117
# XFAIL: Clang

# REQUIRES: Double
# RUN: split-file %s %t
# RUN: %dxc_target -Gis -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl
# RUN: %offloader %t/pipeline.yaml %t.o
98 changes: 98 additions & 0 deletions test/Feature/HLSLLib/fma.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#--- source.hlsl

StructuredBuffer<double4> A : register(t0);
StructuredBuffer<double4> B : register(t1);
StructuredBuffer<double4> C : register(t2);

RWStructuredBuffer<double4> Out : register(u3);

[numthreads(1,1,1)]
void main() {
Out[0] = fma(A[0], B[0], C[0]);
Out[1] = double4(fma(A[1].xyz, B[1].xyz, C[1].xyz), fma(A[1].w, B[1].w, C[1].w));
Out[2] = double4(fma(A[2].xy, B[2].xy, C[2].xy), fma(A[2].zw, B[2].zw, C[2].zw));
Out[3] = fma(double4(0.25, -0.25, 10.0, -10.0), double4(1.0, 1.0, 1.0, 1.0), double4(0.0, 0.0, 0.0, 0.0));
}

//--- pipeline.yaml

---
Shaders:
- Stage: Compute
Entry: main
DispatchSize: [1, 1, 1]
Buffers:
- Name: A
Format: Float64
Stride: 32
Data: [ 0.25, -0.25, 10.4, -10.6,
1.5, -2.5, 0.5, -0.5,
0.0, 10.0, -10.0, 10.5 ]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you need to use more meaningful values in these tests. The purpose of fma is that it's more precise than just a*b+c - it only rounds once at the end of the operation while a*b+c rounds twice (once after multiply, once after add). Your current test values are mostly trivial (there's lots of x*1.0+0.0 identity operations) and all the values are exactly representable in fp64, so we're not testing any of the intermediate precision loss that matters with fma.

Having some simple tests is important, but I do think we need some more robust ones here

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do i need to do this for matrix too or only here is fine?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should get other people's opinions on this who are more familiar with matrices. But from my understanding, matrix tests in general are more about verifying dimensions/shapes, so they don't have to be as comprehensive as the scalar/vector ones

- Name: B
Format: Float64
Stride: 32
Data: [ 1.0, 1.0, 1.0, 1.0,
2.0, -1.0, 4.0, 4.0,
1.0, 2.0, -1.0, 1.0 ]
- Name: C
Format: Float64
Stride: 32
Data: [ 0.0, 0.0, 0.0, 0.0,
0.25, 0.75, -1.5, 1.5,
0.0, 0.5, 0.5, 0.0 ]
- Name: Out
Format: Float64
Stride: 32
FillSize: 128
- Name: Expected
Format: Float64
Stride: 32
Data: [ 0.25, -0.25, 10.4, -10.6,
3.25, 3.25, 0.5, -0.5,
0.0, 20.5, 10.5, 10.5,
0.25, -0.25, 10.0, -10.0 ]
Results:
- Result: Result
Rule: BufferFloatULP
ULPT: 0
Actual: Out
Expected: Expected
DescriptorSets:
- Resources:
- Name: A
Kind: StructuredBuffer
DirectXBinding:
Register: 0
Space: 0
VulkanBinding:
Binding: 0
- Name: B
Kind: StructuredBuffer
DirectXBinding:
Register: 1
Space: 0
VulkanBinding:
Binding: 1
- Name: C
Kind: StructuredBuffer
DirectXBinding:
Register: 2
Space: 0
VulkanBinding:
Binding: 2
- Name: Out
Kind: RWStructuredBuffer
DirectXBinding:
Register: 3
Space: 0
VulkanBinding:
Binding: 3
#--- end

# Unimplemented https://github.com/llvm/llvm-project/issues/99117
# XFAIL: Clang

# REQUIRES: Double
# RUN: split-file %s %t
# RUN: %dxc_target -Gis -HV 202x -T cs_6_5 -Fo %t.o %t/source.hlsl
# RUN: %offloader %t/pipeline.yaml %t.o
Loading