diff --git a/src/memory.jl b/src/memory.jl index 32f73ffee..42e786614 100644 --- a/src/memory.jl +++ b/src/memory.jl @@ -26,6 +26,12 @@ Base.:(+)(x::Integer, y::MtlPtr{T}) where {T} = y + x Base.convert(::Type{Ptr{T}}, ptr::MtlPtr) where {T} = convert(Ptr{T}, ptr.buffer) + ptr.offset +# return the GPU virtual address, so that alignment checks like +# `UInt(ptr) % N == 0` work the same as for a regular Ptr. note that this is +# the GPU-side address, distinct from the CPU-side `contents` pointer. +Base.UInt(ptr::MtlPtr) = UInt(ptr.buffer.gpuAddress) + ptr.offset +Base.Int(ptr::MtlPtr) = Int(UInt(ptr)) + ## operations diff --git a/test/array.jl b/test/array.jl index b5512ce62..ee96740f2 100644 --- a/test/array.jl +++ b/test/array.jl @@ -22,6 +22,22 @@ end @test (pointer(xs2) + 3) == (3 + pointer(xs2)) @test (pointer(xs2) + 3) - 3 == pointer(xs2) + # GPU address exposed via UInt/Int, so `UInt(ptr) % N == 0` alignment + # checks work the same as for a regular Ptr. value must track pointer + # arithmetic on the offset. + for SM in STORAGEMODES + ys = MtlArray{Int8,1,SM}(undef, 128) + p = pointer(ys) + @test p isa Metal.MtlPtr + @test UInt(p) isa UInt + @test Int(p) == UInt(p) % Int + @test UInt(p) == UInt(ys.data[].gpuAddress) + ys.offset + @test UInt(p + 7) == UInt(p) + 7 + # fresh allocations should be aligned enough for typical SIMD use + @test UInt(p) % 16 == 0 + @test UInt(p + 16) % 16 == 0 + end + @test collect(MtlArray([1 2; 3 4])) == [1 2; 3 4] @test collect(mtl([1, 2, 3])) == [1, 2, 3] @test testf(vec, rand(Float32, 5,3))