From 15dfc61a0cfe0e958538240d3569a64044f975e3 Mon Sep 17 00:00:00 2001 From: Jeff Fessler Date: Tue, 30 Dec 2025 13:57:06 -0500 Subject: [PATCH 01/10] Allow `convolve!` with real arrays --- src/convolution.jl | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/src/convolution.jl b/src/convolution.jl index b4a8df19..71975852 100644 --- a/src/convolution.jl +++ b/src/convolution.jl @@ -1,5 +1,25 @@ +#= +convolution.jl +Convolution (aka interpolation) methods and their adjoints. +These are core NFFT operations. + +The `convolve!` method allows both real and complex types +because sampling density compensation works with real vectors. +=# + +#= +using NFFT: NFFTPlan, @cthreads, @nloops_ +using LinearAlgebra: mul! +import AbstractNFFTs +=# + + +function AbstractNFFTs.convolve!( + p::NFFTPlan{Tp,D,1}, + g::AbstractArray{<: Union{Tg, Complex{Tg}}, D}, + fHat::StridedVector{U}, +) where {D, Tp <: Real, Tg <: Real, U} -function AbstractNFFTs.convolve!(p::NFFTPlan{T,D,1}, g::AbstractArray{Complex{T},D}, fHat::StridedVector{U}) where {D,T,U} if isempty(p.B) if p.params.blocking convolve_blocking!(p, g, fHat) @@ -27,11 +47,20 @@ function _convolve_nonblocking!(p::NFFTPlan, g, fHat, L, winPoly) end -@generated function _convolve_nonblocking(p::NFFTPlan{T,D,1}, g::AbstractArray{Complex{T},D}, L::Val{Z}, winPoly, scale, j) where {D,T,Z} +@generated function _convolve_nonblocking( + p::NFFTPlan{Tp,D,1}, + g::AbstractArray{<: Union{Tg, Complex{Tg}}, D}, + L::Val{Z}, + winPoly, + scale, + j, +) where {D, Tp <: Real, Tg <: Real, Z} + quote @nexprs $(D) d -> ((tmpIdx_d, tmpWin_d) = precomputeOneNode(p.windowLinInterp, winPoly, p.k, p.Ñ, p.params.m, p.params.σ, scale, j, d, L, p.params.LUTSize) ) + T = promote_type(Tp, Tg) fHat = zero(Complex{T}) @nexprs 1 d -> prodWin_{$D} = one(T) @@ -47,11 +76,17 @@ end end end -function convolve_sparse_matrix!(p::NFFTPlan{T,D,1}, g::AbstractArray{Complex{T},D}, fHat::StridedVector{U}) where {D,T,U} +function convolve_sparse_matrix!( + p::NFFTPlan{Tp,D,1}, + g::AbstractArray{<: Union{Tg, Complex{Tg}}, D}, + fHat::StridedVector{U}, +) where {D, Tp <: Real, Tg <: Real, U} + threaded_mul!(fHat, transpose(p.B), vec(g)) #mul!(fHat, transpose(p.B), vec(g)) end + ########## convolve adjoint ########## function AbstractNFFTs.convolve_transpose!(p::NFFTPlan{T,D,1}, fHat::AbstractVector{U}, g::StridedArray{Complex{T},D}) where {D,T,U} From 9f7655f2aae384b190c2e9730053946f967b3efe Mon Sep 17 00:00:00 2001 From: Jeff Fessler Date: Tue, 30 Dec 2025 14:17:33 -0500 Subject: [PATCH 02/10] Allow adjoint to work with real vectors too --- src/convolution.jl | 154 ++++++++++++++++++++++++++------------------- 1 file changed, 91 insertions(+), 63 deletions(-) diff --git a/src/convolution.jl b/src/convolution.jl index 71975852..9c391302 100644 --- a/src/convolution.jl +++ b/src/convolution.jl @@ -3,8 +3,8 @@ convolution.jl Convolution (aka interpolation) methods and their adjoints. These are core NFFT operations. -The `convolve!` method allows both real and complex types -because sampling density compensation works with real vectors. +The methods allow both real and complex types +because sampling density compensation vectors are real. =# #= @@ -29,15 +29,17 @@ function AbstractNFFTs.convolve!( else convolve_sparse_matrix!(p, g, fHat) end - return + return fHat end + function convolve_nonblocking!(p::NFFTPlan, g, fHat) L = Val(2*p.params.m) winPoly = makePolyArrayStatic(p) _convolve_nonblocking!(p, g, fHat, L, winPoly) end + function _convolve_nonblocking!(p::NFFTPlan, g, fHat, L, winPoly) scale = Int(p.params.LUTSize/(p.params.m)) @@ -57,9 +59,9 @@ end ) where {D, Tp <: Real, Tg <: Real, Z} quote - @nexprs $(D) d -> ((tmpIdx_d, tmpWin_d) = precomputeOneNode(p.windowLinInterp, winPoly, p.k, p.Ñ, + @nexprs $(D) d -> ((tmpIdx_d, tmpWin_d) = precomputeOneNode(p.windowLinInterp, winPoly, p.k, p.Ñ, p.params.m, p.params.σ, scale, j, d, L, p.params.LUTSize) ) - + T = promote_type(Tp, Tg) fHat = zero(Complex{T}) @@ -67,7 +69,7 @@ end @nloops_ $D l d -> 1:$Z d->begin # preexpr prodWin_{d-1} = prodWin_d * tmpWin_d[l_d] - gidx_d = tmpIdx_d[l_d] + gidx_d = tmpIdx_d[l_d] end begin # bodyexpr fHat += prodWin_0 * @nref $D g gidx @@ -76,6 +78,7 @@ end end end + function convolve_sparse_matrix!( p::NFFTPlan{Tp,D,1}, g::AbstractArray{<: Union{Tg, Complex{Tg}}, D}, @@ -89,7 +92,12 @@ end ########## convolve adjoint ########## -function AbstractNFFTs.convolve_transpose!(p::NFFTPlan{T,D,1}, fHat::AbstractVector{U}, g::StridedArray{Complex{T},D}) where {D,T,U} +function AbstractNFFTs.convolve_transpose!( + p::NFFTPlan{Tp,D,1}, + fHat::AbstractVector{U}, + g::StridedArray{<: Union{Tg, Complex{Tg}}, D}, +) where {D, Tp <: Real, Tg <: Real, U} + if isempty(p.B) if p.params.blocking convolve_transpose_blocking!(p, fHat, g) @@ -99,6 +107,7 @@ function AbstractNFFTs.convolve_transpose!(p::NFFTPlan{T,D,1}, fHat::AbstractVec else convolve_transpose_sparse_matrix!(p, fHat, g) end + return g end @@ -109,7 +118,7 @@ function convolve_transpose_nonblocking!(p::NFFTPlan, fHat, g) end -function _convolve_transpose_nonblocking!(p::NFFTPlan{T,D,1}, fHat, g, L, winPoly) where {D,T} +function _convolve_transpose_nonblocking!(p::NFFTPlan{T,D,1}, fHat, g, L, winPoly) where {D,T} fill!(g, zero(T)) scale = Int(p.params.LUTSize/(p.params.m)) @@ -119,25 +128,40 @@ function _convolve_transpose_nonblocking!(p::NFFTPlan{T,D,1}, fHat, g, L, winPol end -@generated function __convolve_transpose_nonblocking!(p::NFFTPlan{T,D,1}, fHat::AbstractVector{U}, g::StridedArray{Complex{T},D}, L::Val{Z}, winPoly, scale, j) where {D,T,U,Z} +@generated function __convolve_transpose_nonblocking!( + p::NFFTPlan{Tp,D,1}, + fHat::AbstractVector{U}, + g::StridedArray{<: Union{Tg, Complex{Tg}}, D}, + L::Val{Z}, + winPoly, + scale, + j, +) where {D, Tp <: Real, Tg <: Real, U, Z} + quote - @nexprs $(D) d -> ((tmpIdx_d, tmpWin_d) = precomputeOneNode(p.windowLinInterp, winPoly, p.k, p.Ñ, + @nexprs $(D) d -> ((tmpIdx_d, tmpWin_d) = precomputeOneNode(p.windowLinInterp, winPoly, p.k, p.Ñ, p.params.m, p.params.σ, scale, j, d, L, p.params.LUTSize) ) - @nexprs 1 d -> prodWin_{$D} = one(T) + T = promote_type(Tp, Tg) + @nexprs 1 d -> prodWin_{$D} = one(Tp) @nloops_ $D l d -> 1:$Z d->begin # preexpr prodWin_{d-1} = prodWin_d * tmpWin_d[l_d] - gidx_d = tmpIdx_d[l_d] + gidx_d = tmpIdx_d[l_d] end begin # bodyexpr - (@nref $D g gidx) += prodWin_0 * fHat[j] + (@nref $D g gidx) += prodWin_0 * fHat[j] end end end -function convolve_transpose_sparse_matrix!(p::NFFTPlan{T,D,1}, - fHat::AbstractVector{U}, g::StridedArray{Complex{T},D}) where {D,T,U} + +function convolve_transpose_sparse_matrix!( + p::NFFTPlan{Tp,D,1}, + fHat::AbstractVector{U}, + g::StridedArray{<:Union{Tg, Complex{Tg}}, D}, +) where {D, Tp <: Real, Tg <: Real, U} + #threaded_mul!(vec(g), p.B, fHat) mul!(vec(g), p.B, fHat) end @@ -171,7 +195,7 @@ function _convolve_blocking!(p::NFFTPlan{T,D,1}, g, fHat, L, winPoly) where {D,T if !isempty(p.nodesInBlock[l]) toBlock!(p, g, p.blocks[l], p.blockOffsets[l]) winTensor = (p.params.precompute == TENSOR) ? p.windowTensor[l] : nothing - calcOneBlock!(p, fHat, p.nodesInBlock[l], p.blocks[l], p.blockOffsets[l], L, + calcOneBlock!(p, fHat, p.nodesInBlock[l], p.blocks[l], p.blockOffsets[l], L, scale, p.idxInBlock[l], winTensor, winPoly) end end @@ -180,8 +204,8 @@ end @generated function toBlock!(p::NFFTPlan{T,D,1}, g, block, off) where {T,D} quote if off[1] + 2 < 1 - LA = -(off[1] + 2) + 1 - offA = size(g,1) - LA + LA = -(off[1] + 2) + 1 + offA = size(g,1) - LA offB = -LA else LA = 0 @@ -189,7 +213,7 @@ quote end if offB + size(block,1) > size(g,1) - LB = size(g,1) - (offB ) + LB = size(g,1) - (offB ) offC = -LB else LB = size(block,1) @@ -197,31 +221,31 @@ quote if LB != size(block,1) && offC + size(block,1) > size(g,1) # out of bounds indices: second (or first) wrapping - LC = size(g,1) - (offC) + LC = size(g,1) - (offC) offD = -LC else # no out of bounds indices LC = size(block,1) end - + @nloops_ $(D-1) (d->l_{d+1}) (d -> 1:size(block,d+1)) d->begin # preexpr - idx_{d+1} = ( mod(l_{d+1} + off[d+1] + p.Ñ[d+1], p.Ñ[d+1]) + 1) + idx_{d+1} = ( mod(l_{d+1} + off[d+1] + p.Ñ[d+1], p.Ñ[d+1]) + 1) end begin # bodyexpr - @inbounds @simd for l_1 = 1:LA + @inbounds @simd for l_1 in 1:LA idx_1 = l_1 + offA (@nref $D block l) = (@nref $D g idx) end - @inbounds @simd for l_1 = (LA+1):LB + @inbounds @simd for l_1 in (LA+1):LB idx_1 = l_1 + offB (@nref $D block l) = (@nref $D g idx) end - @inbounds @simd for l_1 = (LB+1):LC + @inbounds @simd for l_1 in (LB+1):LC idx_1 = l_1 + offC (@nref $D block l) = (@nref $D g idx) end - @inbounds @simd for l_1 = (LC+1):size(block,1) + @inbounds @simd for l_1 in (LC+1):size(block,1) idx_1 = l_1 + offD (@nref $D block l) = (@nref $D g idx) end @@ -233,41 +257,42 @@ end @noinline function calcOneBlock!(p::NFFTPlan{T,D,1}, fHat, nodesInBlock, block, off, L, scale, idxInBlock, winTensor, winPoly) where {D,T} for (jLocal,j) in enumerate(nodesInBlock) - fHat[j] = calcOneNode!(p, block, off, L, scale, j, jLocal, + fHat[j] = calcOneNode!(p, block, off, L, scale, j, jLocal, idxInBlock, winTensor, winPoly) end return end -@generated function calcOneNode!(p::NFFTPlan{T,D,1}, block, off, L::Val{Z}, scale, +@generated function calcOneNode!(p::NFFTPlan{T,D,1}, block, off, L::Val{Z}, scale, j, jLocal, idxInBlock, winTensor, winPoly) where {D,T,Z} quote - @nexprs $(D) d -> ((off_d, tmpWin_d) = precomputeOneNodeBlocking(p.windowLinInterp, winTensor, winPoly, scale, + @nexprs $(D) d -> ((off_d, tmpWin_d) = precomputeOneNodeBlocking(p.windowLinInterp, winTensor, winPoly, scale, jLocal, d, L, idxInBlock) ) @nexprs 1 d -> prodWin_{$D} = one(T) @nexprs 1 d -> fHat_{$D} = zero(Complex{T}) @nloops_ $(D-1) (d->l_{d+1}) (d -> 1:$Z) d->begin # preexpr - block_idx_{d+1} = off_{d+1} + l_{d+1} + block_idx_{d+1} = off_{d+1} + l_{d+1} fHat_{d} = zero(Complex{T}) end d->begin # postexpr fHat_{d+1} += tmpWin_{d+1}[l_{d+1}] * fHat_{d} end begin # bodyexpr - @inbounds @simd for l_1 = 1:$Z + @inbounds @simd for l_1 in 1:$Z block_idx_1 = off_1 + l_1 fHat_1 += tmpWin_1[l_1] * (@nref $D block block_idx) end end - @nexprs 1 d -> fHat = fHat_{$D} + @nexprs 1 d -> fHat = fHat_{$D} return fHat end end + ########################## function convolve_transpose_blocking!(p::NFFTPlan, fHat, g) @@ -276,11 +301,12 @@ function convolve_transpose_blocking!(p::NFFTPlan, fHat, g) _convolve_transpose_blocking!(p, fHat, g, L, winPoly) end + function _convolve_transpose_blocking!(p::NFFTPlan{T,D,1}, fHat, g, L, winPoly) where {D,T} #g .= zero(T) ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), g, 0, sizeof(g)) scale = Int(p.params.LUTSize/(p.params.m)) - + lk = ReentrantLock() @cthreads for l in CartesianIndices(size(p.blocks)) if !isempty(p.nodesInBlock[l]) @@ -288,9 +314,9 @@ function _convolve_transpose_blocking!(p::NFFTPlan{T,D,1}, fHat, g, L, winPoly) ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), p.blocks[l], 0, sizeof(p.blocks[l])) winTensor = (p.params.precompute == TENSOR) ? p.windowTensor[l] : nothing - fillBlock!(p, fHat, p.blocks[l], p.nodesInBlock[l], p.blockOffsets[l], L, scale, + fillBlock!(p, fHat, p.blocks[l], p.nodesInBlock[l], p.blockOffsets[l], L, scale, p.idxInBlock[l], winTensor, winPoly) - + lock(lk) do addBlock!(p, g, p.blocks[l], p.blockOffsets[l]) end @@ -298,6 +324,7 @@ function _convolve_transpose_blocking!(p::NFFTPlan{T,D,1}, fHat, g, L, winPoly) end end + @generated function addBlock!(p::NFFTPlan{T,D,1}, g, block, off) where {T,D} quote # addBlock! needs to wrap indices. The easies is to apply a modulo operation (rem) @@ -311,8 +338,8 @@ end if off[1] + 2 < 1 # negative indices: first wrapping - LA = -(off[1] + 2) + 1 - offA = size(g,1) - LA + LA = -(off[1] + 2) + 1 + offA = size(g,1) - LA offB = -LA else # no negative indices: skip first sum @@ -322,7 +349,7 @@ end if offB + size(block,1) > size(g,1) # out of bounds indices: second (or first) wrapping - LB = size(g,1) - (offB ) + LB = size(g,1) - (offB ) offC = -LB else # no out of bounds indices @@ -331,31 +358,31 @@ end if LB != size(block,1) && offC + size(block,1) > size(g,1) # out of bounds indices: second (or first) wrapping - LC = size(g,1) - (offC) + LC = size(g,1) - (offC) offD = -LC else # no out of bounds indices LC = size(block,1) end - + @nloops_ $(D-1) (d->l_{d+1}) (d -> 1:size(block,d+1)) d->begin # preexpr - idx_{d+1} = ( mod(l_{d+1} + off[d+1] + p.Ñ[d+1], p.Ñ[d+1]) + 1) + idx_{d+1} = ( mod(l_{d+1} + off[d+1] + p.Ñ[d+1], p.Ñ[d+1]) + 1) end begin # bodyexpr - @inbounds @simd for l_1 = 1:LA + @inbounds @simd for l_1 in 1:LA idx_1 = l_1 + offA (@nref $D g idx) += (@nref $D block l) end - @inbounds @simd for l_1 = (LA+1):LB + @inbounds @simd for l_1 in (LA+1):LB idx_1 = l_1 + offB (@nref $D g idx) += (@nref $D block l) end - @inbounds @simd for l_1 = (LB+1):LC + @inbounds @simd for l_1 in (LB+1):LC idx_1 = l_1 + offC (@nref $D g idx) += (@nref $D block l) end - @inbounds @simd for l_1 = (LC+1):size(block,1) + @inbounds @simd for l_1 in (LC+1):size(block,1) idx_1 = l_1 + offD (@nref $D g idx) += (@nref $D block l) end @@ -364,10 +391,11 @@ end end end -@noinline function fillBlock!(p::NFFTPlan{T,D,1}, fHat, block, nodesInBlock, off, L::Val{Z}, scale, + +@noinline function fillBlock!(p::NFFTPlan{T,D,1}, fHat, block, nodesInBlock, off, L::Val{Z}, scale, idxInBlock, winTensor, winPoly) where {T,D,Z} if (Threads.nthreads() == 1 || !NFFT._use_threads[]) && - (D >= 3 && Z >= 16) || (D == 2 && Z >= 16) # magic + (D >= 3 && Z >= 16) || (D == 2 && Z >= 16) # magic for (jLocal,j) in enumerate(nodesInBlock) fillOneNode2!(p, fHat, block, off, L, scale, j, jLocal, idxInBlock, winTensor, winPoly) end @@ -380,7 +408,7 @@ end end -@generated function fillOneNode!(p::NFFTPlan{T,D,1}, fHat, block, off, L::Val{Z}, scale, +@generated function fillOneNode!(p::NFFTPlan{T,D,1}, fHat, block, off, L::Val{Z}, scale, j, jLocal, idxInBlock, winTensor, winPoly) where {D,T,Z} quote fHat_ = fHat[j] @@ -394,11 +422,11 @@ end @nloops_ $(D-1) (d->l_{d+1}) (d -> 1:$Z) d->begin # preexpr prodWin_{d} = prodWin_{d+1} * tmpWin_{d+1}[l_{d+1}] - block_idx_{d+1} = off_{d+1} + l_{d+1} + block_idx_{d+1} = off_{d+1} + l_{d+1} end begin # bodyexpr - @inbounds @simd for l_1 = 1:$(Z) - block_idx_1 = off_1 + l_1 + @inbounds @simd for l_1 in 1:$(Z) + block_idx_1 = off_1 + l_1 (@nref $D block block_idx) += innerWin[l_1] * prodWin_1 end end @@ -406,7 +434,8 @@ end end end -@generated function fillOneNode2!(p::NFFTPlan{T,D,1}, fHat, block, off, L::Val{Z}, scale, + +@generated function fillOneNode2!(p::NFFTPlan{T,D,1}, fHat, block, off, L::Val{Z}, scale, j, jLocal, idxInBlock, winTensor, winPoly) where {D,T,Z} quote fHat_ = fHat[j] @@ -418,11 +447,11 @@ end @nloops_ $(D-1) (d->l_{d+1}) (d -> 1:$Z) d->begin # preexpr prodWin_{d} = prodWin_{d+1} * tmpWin_{d+1}[l_{d+1}] - block_idx_{d+1} = off_{d+1} + l_{d+1} + block_idx_{d+1} = off_{d+1} + l_{d+1} end begin # bodyexpr - @inbounds @simd for l_1 = 1:$(Z) - block_idx_1 = off_1 + l_1 + @inbounds @simd for l_1 in 1:$(Z) + block_idx_1 = off_1 + l_1 (@nref $D block block_idx) += (tmpWin_1[l_1] * prodWin_1) * fHat_ end end @@ -431,17 +460,16 @@ end end - -@generated function fillOneNodeReal!(p::NFFTPlan{T,D,1}, fHat, block, off, L::Val{Z}, scale, j, +@generated function fillOneNodeReal!(p::NFFTPlan{T,D,1}, fHat, block, off, L::Val{Z}, scale, j, jLocal, idxInBlock, winTensor, winPoly) where {D,T,Z} quote fHat_ = fHat[j] - @nexprs $(D) d -> ((off_d, tmpWin_d) = precomputeOneNodeBlocking(p.windowLinInterp, winTensor, winPoly, scale, jLocal, + @nexprs $(D) d -> ((off_d, tmpWin_d) = precomputeOneNodeBlocking(p.windowLinInterp, winTensor, winPoly, scale, jLocal, d, L, idxInBlock) ) innerWin = Vector{Float64}(undef,$(2*Z)) # Probably cache me somewhere - for l=1:$Z + for l in 1:$Z innerWin[2*(l-1)+1] = tmpWin_1[l] * real(fHat_) innerWin[2*(l-1)+2] = tmpWin_1[l] * imag(fHat_) end @@ -450,7 +478,7 @@ end # Convert to real # We use unsafe_wrap here because reinterpret(T, block) is much slower # and reinterpret(T, reshape, block) is slightly slower than unsafe_wrap - # In fact the conversion to real only pays off when using unsafe_wrap, + # In fact the conversion to real only pays off when using unsafe_wrap, # otherwise sticking with complex is faster U = @ntuple $(D) d -> d==1 ? 2*size(block,1) : size(block,d) blockReal = unsafe_wrap(Array,reinterpret(Ptr{T},pointer(block)), U) @@ -460,11 +488,11 @@ end @nloops_ $(D-1) (d->l_{d+1}) (d -> 1:$Z) d->begin # preexpr prodWin_{d} = prodWin_{d+1} * tmpWin_{d+1}[l_{d+1}] - block_idx_{d+1} = off_{d+1} + l_{d+1} + block_idx_{d+1} = off_{d+1} + l_{d+1} end begin # bodyexpr - @inbounds @simd for l_1 = 1:$(2*Z) - block_idx_1 = off_1 + l_1 + @inbounds @simd for l_1 in 1:$(2*Z) + block_idx_1 = off_1 + l_1 (@nref $(D) blockReal block_idx) += innerWin[l_1] * prodWin_1 end end From 52f9e12577f9e89ff99adf7aec99b287159d5128 Mon Sep 17 00:00:00 2001 From: Jeff Fessler Date: Wed, 31 Dec 2025 14:30:59 -0500 Subject: [PATCH 03/10] Refine type declarations --- src/convolution.jl | 141 ++++++++++++++++++++++++++++----------------- 1 file changed, 87 insertions(+), 54 deletions(-) diff --git a/src/convolution.jl b/src/convolution.jl index 9c391302..2c4210c5 100644 --- a/src/convolution.jl +++ b/src/convolution.jl @@ -10,15 +10,20 @@ because sampling density compensation vectors are real. #= using NFFT: NFFTPlan, @cthreads, @nloops_ using LinearAlgebra: mul! +import Base.Cartesian: @nexprs, @nref import AbstractNFFTs =# +const RealOrComplex = Union{T, Complex{T}} where {T <: Real} function AbstractNFFTs.convolve!( - p::NFFTPlan{Tp,D,1}, - g::AbstractArray{<: Union{Tg, Complex{Tg}}, D}, - fHat::StridedVector{U}, -) where {D, Tp <: Real, Tg <: Real, U} + p::NFFTPlan{<:Real,D,1}, + g::AbstractArray{<:RealOrComplex, D}, + fHat::StridedVector{<:RealOrComplex}, # mutated +) where {D} + + (eltype(g) <: Complex) && (eltype(fHat) <: Real) && + throw(ArgumentError("Complex input g requires Complex output fHat")) if isempty(p.B) if p.params.blocking @@ -41,7 +46,7 @@ end function _convolve_nonblocking!(p::NFFTPlan, g, fHat, L, winPoly) - scale = Int(p.params.LUTSize/(p.params.m)) + scale = Int(p.params.LUTSize / p.params.m) @cthreads for j in 1:p.J fHat[j] = _convolve_nonblocking(p, g, L, winPoly, scale, j) @@ -54,13 +59,13 @@ end g::AbstractArray{<: Union{Tg, Complex{Tg}}, D}, L::Val{Z}, winPoly, - scale, - j, + scale::Int, + j::Int, ) where {D, Tp <: Real, Tg <: Real, Z} quote @nexprs $(D) d -> ((tmpIdx_d, tmpWin_d) = precomputeOneNode(p.windowLinInterp, winPoly, p.k, p.Ñ, - p.params.m, p.params.σ, scale, j, d, L, p.params.LUTSize) ) + p.params.m, p.params.σ, scale, j, d, L, p.params.LUTSize) ) T = promote_type(Tp, Tg) fHat = zero(Complex{T}) @@ -80,10 +85,10 @@ end function convolve_sparse_matrix!( - p::NFFTPlan{Tp,D,1}, - g::AbstractArray{<: Union{Tg, Complex{Tg}}, D}, - fHat::StridedVector{U}, -) where {D, Tp <: Real, Tg <: Real, U} + p::NFFTPlan{<:Real,D,1}, + g::AbstractArray{<:RealOrComplex, D}, + fHat::StridedVector{<:RealOrComplex}, +) where {D} threaded_mul!(fHat, transpose(p.B), vec(g)) #mul!(fHat, transpose(p.B), vec(g)) @@ -93,10 +98,13 @@ end ########## convolve adjoint ########## function AbstractNFFTs.convolve_transpose!( - p::NFFTPlan{Tp,D,1}, - fHat::AbstractVector{U}, - g::StridedArray{<: Union{Tg, Complex{Tg}}, D}, -) where {D, Tp <: Real, Tg <: Real, U} + p::NFFTPlan{<:Real,D,1}, + fHat::AbstractVector{<:RealOrComplex}, + g::StridedArray{<:RealOrComplex, D}, # mutated +) where {D} + + (eltype(fHat) <: Complex) && (eltype(g) <: Real) && + throw(ArgumentError("Complex input fHat requires Complex output g")) if isempty(p.B) if p.params.blocking @@ -118,9 +126,16 @@ function convolve_transpose_nonblocking!(p::NFFTPlan, fHat, g) end -function _convolve_transpose_nonblocking!(p::NFFTPlan{T,D,1}, fHat, g, L, winPoly) where {D,T} - fill!(g, zero(T)) - scale = Int(p.params.LUTSize/(p.params.m)) +function _convolve_transpose_nonblocking!( + p::NFFTPlan{<:Real,D,1}, + fHat::AbstractVector{<:RealOrComplex}, + g::StridedArray{<: Union{Tg, Complex{Tg}}, D}, + L::Val, + winPoly, +) where {D, Tg <: Real} + + fill!(g, zero(Tg)) + scale = Int(p.params.LUTSize / p.params.m) @inbounds @simd for j in 1:p.J __convolve_transpose_nonblocking!(p, fHat, g, L, winPoly, scale, j) @@ -129,21 +144,20 @@ end @generated function __convolve_transpose_nonblocking!( - p::NFFTPlan{Tp,D,1}, - fHat::AbstractVector{U}, + p::NFFTPlan{<:Real,D,1}, + fHat::AbstractVector{<:RealOrComplex}, g::StridedArray{<: Union{Tg, Complex{Tg}}, D}, L::Val{Z}, winPoly, - scale, - j, -) where {D, Tp <: Real, Tg <: Real, U, Z} + scale::Int, + j::Int, +) where {D, Tg <: Real, Z} quote @nexprs $(D) d -> ((tmpIdx_d, tmpWin_d) = precomputeOneNode(p.windowLinInterp, winPoly, p.k, p.Ñ, - p.params.m, p.params.σ, scale, j, d, L, p.params.LUTSize) ) + p.params.m, p.params.σ, scale, j, d, L, p.params.LUTSize) ) - T = promote_type(Tp, Tg) - @nexprs 1 d -> prodWin_{$D} = one(Tp) + @nexprs 1 d -> prodWin_{$D} = one(Tg) @nloops_ $D l d -> 1:$Z d->begin # preexpr prodWin_{d-1} = prodWin_d * tmpWin_d[l_d] @@ -157,10 +171,10 @@ end function convolve_transpose_sparse_matrix!( - p::NFFTPlan{Tp,D,1}, - fHat::AbstractVector{U}, - g::StridedArray{<:Union{Tg, Complex{Tg}}, D}, -) where {D, Tp <: Real, Tg <: Real, U} + p::NFFTPlan{<:Real,D,1}, + fHat::AbstractVector{<:RealOrComplex}, + g::StridedArray{<:RealOrComplex, D}, +) where {D} #threaded_mul!(vec(g), p.B, fHat) mul!(vec(g), p.B, fHat) @@ -188,8 +202,8 @@ function convolve_blocking!(p::NFFTPlan, g, fHat) _convolve_blocking!(p, g, fHat, L, winPoly) end -function _convolve_blocking!(p::NFFTPlan{T,D,1}, g, fHat, L, winPoly) where {D,T} - scale = Int(p.params.LUTSize/(p.params.m)) +function _convolve_blocking!(p::NFFTPlan{<:Real,D,1}, g, fHat, L, winPoly) where {D} + scale = Int(p.params.LUTSize / p.params.m) @cthreads for l in CartesianIndices(size(p.blocks)) if !isempty(p.nodesInBlock[l]) @@ -201,7 +215,7 @@ function _convolve_blocking!(p::NFFTPlan{T,D,1}, g, fHat, L, winPoly) where {D,T end end -@generated function toBlock!(p::NFFTPlan{T,D,1}, g, block, off) where {T,D} +@generated function toBlock!(p::NFFTPlan{<:Real,D,1}, g, block, off) where {D} quote if off[1] + 2 < 1 LA = -(off[1] + 2) + 1 @@ -213,7 +227,7 @@ quote end if offB + size(block,1) > size(g,1) - LB = size(g,1) - (offB ) + LB = size(g,1) - offB offC = -LB else LB = size(block,1) @@ -221,7 +235,7 @@ quote if LB != size(block,1) && offC + size(block,1) > size(g,1) # out of bounds indices: second (or first) wrapping - LC = size(g,1) - (offC) + LC = size(g,1) - offC offD = -LC else # no out of bounds indices @@ -254,8 +268,12 @@ quote end end -@noinline function calcOneBlock!(p::NFFTPlan{T,D,1}, fHat, nodesInBlock, block, - off, L, scale, idxInBlock, winTensor, winPoly) where {D,T} +@noinline function calcOneBlock!( + p::NFFTPlan{<:Real,D,1}, + fHat, nodesInBlock, block, + off, L, scale, idxInBlock, winTensor, winPoly, +) where {D} + for (jLocal,j) in enumerate(nodesInBlock) fHat[j] = calcOneNode!(p, block, off, L, scale, j, jLocal, idxInBlock, winTensor, winPoly) @@ -264,8 +282,11 @@ end end -@generated function calcOneNode!(p::NFFTPlan{T,D,1}, block, off, L::Val{Z}, scale, - j, jLocal, idxInBlock, winTensor, winPoly) where {D,T,Z} +@generated function calcOneNode!( + p::NFFTPlan{T,D,1}, block, off, L::Val{Z}, scale, + j, jLocal, idxInBlock, winTensor, winPoly, +) where {D, T <: Real, Z} + quote @nexprs $(D) d -> ((off_d, tmpWin_d) = precomputeOneNodeBlocking(p.windowLinInterp, winTensor, winPoly, scale, jLocal, d, L, idxInBlock) ) @@ -302,10 +323,10 @@ function convolve_transpose_blocking!(p::NFFTPlan, fHat, g) end -function _convolve_transpose_blocking!(p::NFFTPlan{T,D,1}, fHat, g, L, winPoly) where {D,T} +function _convolve_transpose_blocking!(p::NFFTPlan{T,D,1}, fHat, g, L, winPoly) where {D, T <: Real} #g .= zero(T) ccall(:memset, Ptr{Cvoid}, (Ptr{Cvoid}, Cint, Csize_t), g, 0, sizeof(g)) - scale = Int(p.params.LUTSize/(p.params.m)) + scale = Int(p.params.LUTSize / p.params.m) lk = ReentrantLock() @cthreads for l in CartesianIndices(size(p.blocks)) @@ -325,7 +346,7 @@ function _convolve_transpose_blocking!(p::NFFTPlan{T,D,1}, fHat, g, L, winPoly) end -@generated function addBlock!(p::NFFTPlan{T,D,1}, g, block, off) where {T,D} +@generated function addBlock!(p::NFFTPlan{<:Real,D,1}, g, block, off) where {D} quote # addBlock! needs to wrap indices. The easies is to apply a modulo operation (rem) # but doing so for each index is slow. We thus only do this for the outer dimensions @@ -349,7 +370,7 @@ end if offB + size(block,1) > size(g,1) # out of bounds indices: second (or first) wrapping - LB = size(g,1) - (offB ) + LB = size(g,1) - offB offC = -LB else # no out of bounds indices @@ -358,7 +379,7 @@ end if LB != size(block,1) && offC + size(block,1) > size(g,1) # out of bounds indices: second (or first) wrapping - LC = size(g,1) - (offC) + LC = size(g,1) - offC offD = -LC else # no out of bounds indices @@ -392,8 +413,11 @@ end end -@noinline function fillBlock!(p::NFFTPlan{T,D,1}, fHat, block, nodesInBlock, off, L::Val{Z}, scale, - idxInBlock, winTensor, winPoly) where {T,D,Z} +@noinline function fillBlock!( + p::NFFTPlan{<:Real,D,1}, fHat, block, nodesInBlock, off, L::Val{Z}, scale, + idxInBlock, winTensor, winPoly, +) where {D, Z} + if (Threads.nthreads() == 1 || !NFFT._use_threads[]) && (D >= 3 && Z >= 16) || (D == 2 && Z >= 16) # magic for (jLocal,j) in enumerate(nodesInBlock) @@ -408,8 +432,11 @@ end end -@generated function fillOneNode!(p::NFFTPlan{T,D,1}, fHat, block, off, L::Val{Z}, scale, - j, jLocal, idxInBlock, winTensor, winPoly) where {D,T,Z} +@generated function fillOneNode!( + p::NFFTPlan{T,D,1}, fHat, block, off, L::Val{Z}, scale, + j, jLocal, idxInBlock, winTensor, winPoly, +) where {D, T <: Real, Z} + quote fHat_ = fHat[j] @@ -435,8 +462,11 @@ end end -@generated function fillOneNode2!(p::NFFTPlan{T,D,1}, fHat, block, off, L::Val{Z}, scale, - j, jLocal, idxInBlock, winTensor, winPoly) where {D,T,Z} +@generated function fillOneNode2!( + p::NFFTPlan{T,D,1}, fHat, block, off, L::Val{Z}, scale, + j, jLocal, idxInBlock, winTensor, winPoly, +) where {D, T <: Real, Z} + quote fHat_ = fHat[j] @@ -460,12 +490,15 @@ end end -@generated function fillOneNodeReal!(p::NFFTPlan{T,D,1}, fHat, block, off, L::Val{Z}, scale, j, - jLocal, idxInBlock, winTensor, winPoly) where {D,T,Z} +@generated function fillOneNodeReal!( + p::NFFTPlan{T,D,1}, fHat, block, off, L::Val{Z}, scale, j, + jLocal, idxInBlock, winTensor, winPoly, +) where {D, T <: Real, Z} + quote fHat_ = fHat[j] - @nexprs $(D) d -> ((off_d, tmpWin_d) = precomputeOneNodeBlocking(p.windowLinInterp, winTensor, winPoly, scale, jLocal, + @nexprs $(D) d -> ((off_d, tmpWin_d) = precomputeOneNodeBlocking(p.windowLinInterp, winTensor, winPoly, scale, jLocal, d, L, idxInBlock) ) innerWin = Vector{Float64}(undef,$(2*Z)) # Probably cache me somewhere From 975df61410a10ce8b66a7369f2957a5ce75fe234 Mon Sep 17 00:00:00 2001 From: Jeff Fessler Date: Wed, 31 Dec 2025 14:33:56 -0500 Subject: [PATCH 04/10] To v0.14.2 --- Project.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 2535b12b..dc228061 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "NFFT" uuid = "efe261a4-0d2b-5849-be55-fc731d526b0d" -authors = ["Tobias Knopp "] -version = "0.14.1" +authors = ["Tobias Knopp and contributors"] +version = "0.14.2" [deps] AbstractNFFTs = "7f219486-4aa7-41d6-80a7-e08ef20ceed7" From 0c1640fa2582b9bfe963bdd5a8756ac5be804737 Mon Sep 17 00:00:00 2001 From: Jeff Fessler Date: Thu, 1 Jan 2026 11:38:08 -0500 Subject: [PATCH 05/10] Test convolve! --- test/convolve.jl | 92 ++++++++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 2 files changed, 93 insertions(+) create mode 100644 test/convolve.jl diff --git a/test/convolve.jl b/test/convolve.jl new file mode 100644 index 00000000..0248b60b --- /dev/null +++ b/test/convolve.jl @@ -0,0 +1,92 @@ +#= +Test convolution (interpolation) and its adjoint in isolation + +One must be careful with Float16 output arrays here, +because plan.windowLinInterp has values above 1e8 +that exceed the 6e4 maximum of Float16. + +FFTW does not support Float16 plans as of 2026-01-01. +=# + +using NFFT: plan_nfft +using AbstractNFFTs: convolve!, convolve_transpose! +import NFFT # LINEAR, FULL, TENSOR, POLYNOMIAL +using Test: @test, @testset + +# determine worst precision of two types +function worst(T1::Type{<:Number}, T2::Type{<:Number}) + return promote_type(T1, T2) == T1 ? T2 : T1 +end +worst(a, b, c) = worst(worst(a, b), c) + + +@testset "convolve!" begin + + N = (9, 8) # 9×8 grid of equally spaced sampling points + Tb = BigFloat + fun = N -> @. Tb((0:(N-1)) / N - 0.5) # 1D grid + nodes = hcat([[x; y] for x in fun(N[1]), y in fun(N[2])]...) + + T16 = Complex{Float16} + T32 = Complex{Float32} + T64 = Complex{Float64} + @test worst(T16, T64) == T16 + + J = prod(N) # for f + Ñ = 2 .* N # for g + + for pre in [NFFT.LINEAR, NFFT.FULL, NFFT.TENSOR, NFFT.POLYNOMIAL] + p32 = plan_nfft(Float32.(nodes), N, m = 5, σ = 2.0, precompute=pre) + p64 = plan_nfft(Float64.(nodes), N, m = 5, σ = 2.0, precompute=pre) + + @assert Ñ == p64.Ñ + f16 = Vector{T16}(undef, J) + + g64 = rand(T64, Ñ) + g32 = T32.(g64) + g16 = T16.(g64) + @test g32 ≈ g64 + @test g16 ≈ g64 + + fff = Vector{T64}(undef, J) # ground-truth reference + @test fff == convolve!(p64, g64, fff) + + fmax = maximum(abs, fff) # need to avoid avoid overflow with T16 + + for Tg in (T16, T32, T64), Tf in (T32, T64) + f = Vector{Tf}(undef, J) + @test f == convolve!(p64, Tg.(g64), f) + T = worst(Tg, Tf) + @test f/fmax ≈ T.(fff/fmax) + + @test f == convolve!(p32, Tg.(g64), f) + T = worst(Tg, Tf, T32) + @test f/fmax ≈ T.(fff/fmax) + end + + # Float16 test with careful scaling: + @test f16 == convolve!(p64, g64/fmax, f16) + @test f16 ≈ fff/fmax + @test f16 == convolve!(p32, g64/fmax, f16) + @test f16 ≈ fff/fmax + + end + +end # @testset + +#= + + for pre in [NFFT.LINEAR, NFFT.FULL, NFFT.TENSOR, NFFT.POLYNOMIAL] +@show pre + p = plan_nfft(nodes, N, m = 5, σ = 2.0, precompute=pre) + + # ensure that convolve! works with mixed precisions + for Tf in (Float16, Float64), Tg in (Float16, Float64) + f = rand(Complex{Tf}, p.J) + g = rand(Complex{Tg}, p.N) + @test f == convolve!(p, g, f) + @test g == convolve_transpose!(p, g, f) + end + end +=# + diff --git a/test/runtests.jl b/test/runtests.jl index 1b47482b..6b215d7c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -17,6 +17,7 @@ arrayTypes = areTypesDefined ? arrayTypes : [JLArray] include("issues.jl") include("accuracy.jl") include("constructors.jl") + include("convolve.jl") include("performance.jl") include("testToeplitz.jl") include("samplingDensity.jl") From 21f0e22bfd1f16766751e9249021026050ea1eb0 Mon Sep 17 00:00:00 2001 From: Jeff Fessler Date: Thu, 1 Jan 2026 12:34:33 -0500 Subject: [PATCH 06/10] size checks --- src/convolution.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/convolution.jl b/src/convolution.jl index 2c4210c5..ea5c67c2 100644 --- a/src/convolution.jl +++ b/src/convolution.jl @@ -22,6 +22,10 @@ function AbstractNFFTs.convolve!( fHat::StridedVector{<:RealOrComplex}, # mutated ) where {D} + size(g) == p.Ñ || + throw(DimensionMismatch("size(g)=$(size(g)) ≠ Ñ = $(p.Ñ)")) + size(fHat) == (p.J,) || + throw(DimensionMismatch("size(fHat)=$(size(fHat)) ≠ J = $(p.J)")) (eltype(g) <: Complex) && (eltype(fHat) <: Real) && throw(ArgumentError("Complex input g requires Complex output fHat")) @@ -103,6 +107,10 @@ function AbstractNFFTs.convolve_transpose!( g::StridedArray{<:RealOrComplex, D}, # mutated ) where {D} + size(g) == p.Ñ || + throw(DimensionMismatch("size(g)=$(size(g)) ≠ Ñ = $(p.Ñ)")) + size(fHat) == (p.J,) || + throw(DimensionMismatch("size(fHat)=$(size(fHat)) ≠ J = $(p.J)")) (eltype(fHat) <: Complex) && (eltype(g) <: Real) && throw(ArgumentError("Complex input fHat requires Complex output g")) From 6294d39b7cc626b0a9d8987d6a732f9ed7fad5d4 Mon Sep 17 00:00:00 2001 From: Jeff Fessler Date: Thu, 1 Jan 2026 12:45:10 -0500 Subject: [PATCH 07/10] Add convolve_transpose tests and real tests --- test/convolve.jl | 158 ++++++++++++++++++++++++++++++----------------- 1 file changed, 100 insertions(+), 58 deletions(-) diff --git a/test/convolve.jl b/test/convolve.jl index 0248b60b..220809fa 100644 --- a/test/convolve.jl +++ b/test/convolve.jl @@ -11,82 +11,124 @@ FFTW does not support Float16 plans as of 2026-01-01. using NFFT: plan_nfft using AbstractNFFTs: convolve!, convolve_transpose! import NFFT # LINEAR, FULL, TENSOR, POLYNOMIAL -using Test: @test, @testset +using Test: @test, @testset, @test_throws -# determine worst precision of two types +# Determine worst precision of two types +# (Does Julia already have such a method?) function worst(T1::Type{<:Number}, T2::Type{<:Number}) return promote_type(T1, T2) == T1 ? T2 : T1 end worst(a, b, c) = worst(worst(a, b), c) -@testset "convolve!" begin - - N = (9, 8) # 9×8 grid of equally spaced sampling points - Tb = BigFloat - fun = N -> @. Tb((0:(N-1)) / N - 0.5) # 1D grid - nodes = hcat([[x; y] for x in fun(N[1]), y in fun(N[2])]...) - - T16 = Complex{Float16} - T32 = Complex{Float32} - T64 = Complex{Float64} - @test worst(T16, T64) == T16 - - J = prod(N) # for f - Ñ = 2 .* N # for g - - for pre in [NFFT.LINEAR, NFFT.FULL, NFFT.TENSOR, NFFT.POLYNOMIAL] - p32 = plan_nfft(Float32.(nodes), N, m = 5, σ = 2.0, precompute=pre) - p64 = plan_nfft(Float64.(nodes), N, m = 5, σ = 2.0, precompute=pre) +N = (9, 8) # 9×8 grid of equally spaced sampling points +Tb = BigFloat +fun = N -> @. Tb((0:(N-1)) / N - 0.5) # 1D grid +nodes = hcat([[x; y] for x in fun(N[1]), y in fun(N[2])]...) - @assert Ñ == p64.Ñ - f16 = Vector{T16}(undef, J) +F16 = Float16 +F32 = Float32 +F64 = Float64 +C16 = Complex{F16} +C32 = Complex{F32} - g64 = rand(T64, Ñ) - g32 = T32.(g64) - g16 = T16.(g64) - @test g32 ≈ g64 - @test g16 ≈ g64 +@testset "worst" begin + @test worst(F64, F16, F32) == F16 + @test worst(C16, C32) == C16 +end - fff = Vector{T64}(undef, J) # ground-truth reference - @test fff == convolve!(p64, g64, fff) +J = prod(N) # for f +Ñ = 2 .* N # for g - fmax = maximum(abs, fff) # need to avoid avoid overflow with T16 +@testset "convolve-throws" begin + # throw error when input is complex and output is real + p = plan_nfft(Float32.(nodes), N, m = 5, σ = 2.0, precompute=NFFT.LINEAR) + @test_throws ArgumentError convolve!(p, ones(C32, Ñ), zeros(Float32, J)) + @test_throws ArgumentError convolve_transpose!(p, ones(C32, J), zeros(Float32, Ñ)) - for Tg in (T16, T32, T64), Tf in (T32, T64) - f = Vector{Tf}(undef, J) - @test f == convolve!(p64, Tg.(g64), f) - T = worst(Tg, Tf) - @test f/fmax ≈ T.(fff/fmax) + # or when dimensions mismatch + @test_throws DimensionMismatch convolve!(p, ones(C32, Ñ), zeros(C32, 2)) + @test_throws DimensionMismatch convolve!(p, ones(C32, N), zeros(C32, J)) + @test_throws DimensionMismatch convolve_transpose!(p, ones(C32, 2), zeros(C32, Ñ)) + @test_throws DimensionMismatch convolve_transpose!(p, ones(C32, J), zeros(C32, N)) +end - @test f == convolve!(p32, Tg.(g64), f) - T = worst(Tg, Tf, T32) - @test f/fmax ≈ T.(fff/fmax) - end - # Float16 test with careful scaling: - @test f16 == convolve!(p64, g64/fmax, f16) - @test f16 ≈ fff/fmax - @test f16 == convolve!(p32, g64/fmax, f16) - @test f16 ≈ fff/fmax +@testset "convolve!" begin - end + for pre in [NFFT.LINEAR, NFFT.FULL, NFFT.TENSOR, NFFT.POLYNOMIAL] + p32 = plan_nfft(Float32.(nodes), N, m = 5, σ = 2.0, precompute=pre) + p64 = plan_nfft(Float64.(nodes), N, m = 5, σ = 2.0, precompute=pre) + @assert Ñ == p64.Ñ + for tfun in (identity, t -> Complex{t}) # test both Real and Complex + T16 = tfun(F16) + T32 = tfun(F32) + T64 = tfun(F64) + + g64 = rand(T64, Ñ) + fff = Vector{T64}(undef, J) # ground-truth reference + @test fff == convolve!(p64, g64, fff) + + fmax = maximum(abs, fff) # need to avoid avoid overflow with Float16 + + for Tg in (T16, T32, T64), Tf in (T32, T64) + f = Vector{Tf}(undef, J) + @test f == convolve!(p64, Tg.(g64), f) + T = worst(Tg, Tf) + @test f/fmax ≈ T.(fff/fmax) + + @test f == convolve!(p32, Tg.(g64), f) + T = worst(Tg, Tf, T32) + @test f / fmax ≈ T.(fff / fmax) + end + + # Float16 test with careful scaling: + f16 = Vector{T16}(undef, J) + @test f16 == convolve!(p64, g64/fmax, f16) + @test f16 ≈ fff / fmax + @test f16 == convolve!(p32, g64/fmax, f16) + @test f16 ≈ fff / fmax + end # tfun + end # pre end # @testset -#= + +@testset "convolve_transpose!" begin for pre in [NFFT.LINEAR, NFFT.FULL, NFFT.TENSOR, NFFT.POLYNOMIAL] -@show pre - p = plan_nfft(nodes, N, m = 5, σ = 2.0, precompute=pre) - - # ensure that convolve! works with mixed precisions - for Tf in (Float16, Float64), Tg in (Float16, Float64) - f = rand(Complex{Tf}, p.J) - g = rand(Complex{Tg}, p.N) - @test f == convolve!(p, g, f) - @test g == convolve_transpose!(p, g, f) - end - end -=# + p32 = plan_nfft(Float32.(nodes), N, m = 5, σ = 2.0, precompute=pre) + p64 = plan_nfft(Float64.(nodes), N, m = 5, σ = 2.0, precompute=pre) + @assert Ñ == p64.Ñ + for tfun in (identity, t -> Complex{t}) # test both Real and Complex + T16 = tfun(F16) + T32 = tfun(F32) + T64 = tfun(F64) + + f64 = rand(T64, J) + ggg = Array{T64}(undef, Ñ) # ground-truth reference + @test ggg == convolve_transpose!(p64, f64, ggg) + + gmax = maximum(abs, ggg) # need to avoid avoid overflow with Float16 + + for Tg in (T32, T64), Tf in (T16, T32, T64) + g = similar(ggg, Tg) + @test g == convolve_transpose!(p64, Tf.(f64), g) + T = worst(Tg, Tf) + @test g / gmax ≈ T.(ggg / gmax) + + @test g == convolve_transpose!(p32, Tf.(f64), g) + T = worst(Tg, Tf, T32) + @test g / gmax ≈ T.(ggg / gmax) + end + + # Float16 test with careful scaling: + g16 = Array{T16}(undef, Ñ) + @test g16 == convolve_transpose!(p64, f64/gmax, g16) + @test g16 ≈ ggg / gmax + @test g16 == convolve_transpose!(p32, f64/gmax, g16) + @test g16 ≈ ggg / gmax + end # pre + end # tfun +end # @testset From de750c59b9d05a655a6120828dcedfa38cf01a98 Mon Sep 17 00:00:00 2001 From: Jeff Fessler Date: Thu, 1 Jan 2026 12:51:14 -0500 Subject: [PATCH 08/10] =?UTF-8?q?Fix=20size=20to=20=C3=91=20in=20comment?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AbstractNFFTs/src/interface.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AbstractNFFTs/src/interface.jl b/AbstractNFFTs/src/interface.jl index fd8c7913..6f10b7cd 100644 --- a/AbstractNFFTs/src/interface.jl +++ b/AbstractNFFTs/src/interface.jl @@ -225,7 +225,7 @@ Overwrite `R`-dimensional array `fHat` (often R=1, and `fHat` has length `p.J`) with the result of "convolution" (i.e., interpolation) between `D`-dimensional equispaced input array `g` -(often of size `p.N`) +(often of size `p.Ñ`) and the interpolation kernel in NFFT plan `p`. """ @mustimplement convolve!(p::AbstractNFFTPlan, g::AbstractArray, fHat::AbstractArray) From 18e76ec5ca7e9622d82f780fd430895ea1d76efd Mon Sep 17 00:00:00 2001 From: Jeff Fessler Date: Fri, 2 Jan 2026 09:34:12 -0500 Subject: [PATCH 09/10] Multiple dispatch for complex/real mismatch --- src/convolution.jl | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/convolution.jl b/src/convolution.jl index ea5c67c2..c771a54e 100644 --- a/src/convolution.jl +++ b/src/convolution.jl @@ -5,6 +5,7 @@ These are core NFFT operations. The methods allow both real and complex types because sampling density compensation vectors are real. +However, a Complex input with a Real output is verboten. =# #= @@ -26,8 +27,10 @@ function AbstractNFFTs.convolve!( throw(DimensionMismatch("size(g)=$(size(g)) ≠ Ñ = $(p.Ñ)")) size(fHat) == (p.J,) || throw(DimensionMismatch("size(fHat)=$(size(fHat)) ≠ J = $(p.J)")) - (eltype(g) <: Complex) && (eltype(fHat) <: Real) && - throw(ArgumentError("Complex input g requires Complex output fHat")) + +# multiple dispatch (next function) obviates need for this test +# (eltype(g) <: Complex) && (eltype(fHat) <: Real) && +# throw(ArgumentError("Complex input g requires Complex output fHat")) if isempty(p.B) if p.params.blocking @@ -41,6 +44,14 @@ function AbstractNFFTs.convolve!( return fHat end +function AbstractNFFTs.convolve!( + ::NFFTPlan{<:Real,D,1}, + ::AbstractArray{<:Complex, D}, + ::StridedVector{<:Real}, +) where {D} + throw(ArgumentError("Complex input g requires Complex output fHat")) +end + function convolve_nonblocking!(p::NFFTPlan, g, fHat) L = Val(2*p.params.m) @@ -111,8 +122,10 @@ function AbstractNFFTs.convolve_transpose!( throw(DimensionMismatch("size(g)=$(size(g)) ≠ Ñ = $(p.Ñ)")) size(fHat) == (p.J,) || throw(DimensionMismatch("size(fHat)=$(size(fHat)) ≠ J = $(p.J)")) - (eltype(fHat) <: Complex) && (eltype(g) <: Real) && - throw(ArgumentError("Complex input fHat requires Complex output g")) + +# multiple dispatch (next function) obviates need for this test +# (eltype(fHat) <: Complex) && (eltype(g) <: Real) && +# throw(ArgumentError("Complex input fHat requires Complex output g")) if isempty(p.B) if p.params.blocking @@ -127,6 +140,15 @@ function AbstractNFFTs.convolve_transpose!( end +function AbstractNFFTs.convolve_transpose!( + ::NFFTPlan{<:Real,D,1}, + ::StridedVector{<:Real}, + ::AbstractArray{<:Complex, D}, +) where {D} + throw(ArgumentError("Complex input fHat requires Complex output g")) +end + + function convolve_transpose_nonblocking!(p::NFFTPlan, fHat, g) L = Val(2*p.params.m) winPoly = makePolyArrayStatic(p) From 47a8c92bc1fc4251b4f2f8834ee90a1b570a8c44 Mon Sep 17 00:00:00 2001 From: Jeff Fessler Date: Fri, 2 Jan 2026 10:33:45 -0500 Subject: [PATCH 10/10] Fix --- src/convolution.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/convolution.jl b/src/convolution.jl index c771a54e..7106bb24 100644 --- a/src/convolution.jl +++ b/src/convolution.jl @@ -142,8 +142,8 @@ end function AbstractNFFTs.convolve_transpose!( ::NFFTPlan{<:Real,D,1}, - ::StridedVector{<:Real}, - ::AbstractArray{<:Complex, D}, + ::AbstractVector{<:Complex}, + ::StridedArray{<:Real, D}, ) where {D} throw(ArgumentError("Complex input fHat requires Complex output g")) end