diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 2bb038a39..3d086fe8d 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -6,6 +6,11 @@ on: push: branches: - master + types: + - opened + - reopened + - synchronize + - ready_for_review tags: '*' schedule: - cron: '00 04 * * 1' # 4am every Monday @@ -14,6 +19,7 @@ jobs: test: name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} runs-on: ${{ matrix.os }} + if: ${{ github.event_name == 'push' || !github.event.pull_request.draft }} strategy: fail-fast: false matrix: @@ -49,6 +55,7 @@ jobs: docs: name: Documentation runs-on: ubuntu-latest + if: ${{ github.event_name == 'push' || !github.event.pull_request.draft }} steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 diff --git a/Project.toml b/Project.toml index 8d4add60a..6994e0bdb 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Joe G Greener "] version = "0.13.0" [deps] +AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a" BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" @@ -32,6 +33,7 @@ UnitfulChainRules = "f31437dd-25a7-4345-875f-756556e6935d" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [compat] +AMDGPU = "0.4" AtomsBase = "0.2" BioStructures = "1" CUDA = "3" diff --git a/src/Molly.jl b/src/Molly.jl index 2ab59c2e1..a59899303 100644 --- a/src/Molly.jl +++ b/src/Molly.jl @@ -7,6 +7,15 @@ import Chemfiles using Colors using Combinatorics using CUDA +if has_cuda_gpu() + CUDA.allowscalar(false) +end + +using AMDGPU +if has_rocm_gpu() + AMDGPU.allowscalar(false) +end + using DataStructures using Distances using Distributions diff --git a/src/chain_rules.jl b/src/chain_rules.jl index 2cec54fc5..263c043cd 100644 --- a/src/chain_rules.jl +++ b/src/chain_rules.jl @@ -109,7 +109,7 @@ function ChainRulesCore.rrule(::typeof(unsafe_getindex), arr, inds) end # Not faster on CPU -function ChainRulesCore.rrule(::typeof(getindices_i), arr::CuArray, neighbors) +function ChainRulesCore.rrule(::typeof(getindices_i), arr::AT, neighbors) where AT <: Union{CuArray, ROCArray} Y = getindices_i(arr, neighbors) @views @inbounds function getindices_i_pullback(Ȳ) return NoTangent(), accumulate_bounds(Ȳ, neighbors.atom_bounds_i), nothing @@ -117,7 +117,7 @@ function ChainRulesCore.rrule(::typeof(getindices_i), arr::CuArray, neighbors) return Y, getindices_i_pullback end -function ChainRulesCore.rrule(::typeof(getindices_j), arr::CuArray, neighbors) +function ChainRulesCore.rrule(::typeof(getindices_j), arr::AT, neighbors) where AT <: Union{CuArray, ROCArray} Y = getindices_j(arr, neighbors) @views @inbounds function getindices_j_pullback(Ȳ) return NoTangent(), accumulate_bounds(Ȳ[neighbors.sortperm_j], neighbors.atom_bounds_j), nothing diff --git a/src/gradients.jl b/src/gradients.jl index 5faf4f01f..798cd7f9a 100644 --- a/src/gradients.jl +++ b/src/gradients.jl @@ -88,9 +88,10 @@ Allows gradients for individual parameters to be tracked. Returns atoms, pairwise interactions, specific interaction lists and general interactions. """ -function inject_gradients(sys, params_dic, gpu::Bool=isa(sys.coords, CuArray)) +function inject_gradients(sys, params_dic; AT = find_array_type(sys.coords), + gpu::Bool = (AT <: Union{CuArray, ROCArray})) if gpu - atoms_grad = CuArray(inject_atom.(Array(sys.atoms), sys.atoms_data, (params_dic,))) + atoms_grad = AT(inject_atom.(Array(sys.atoms), sys.atoms_data, (params_dic,))) else atoms_grad = inject_atom.(sys.atoms, sys.atoms_data, (params_dic,)) end @@ -100,7 +101,7 @@ function inject_gradients(sys, params_dic, gpu::Bool=isa(sys.coords, CuArray)) pis_grad = sys.pairwise_inters end if length(sys.specific_inter_lists) > 0 - sis_grad = inject_interaction_list.(sys.specific_inter_lists, (params_dic,), gpu) + sis_grad = inject_interaction_list.(sys.specific_inter_lists, (params_dic,), gpu, AT) else sis_grad = sys.specific_inter_lists end @@ -127,36 +128,40 @@ function inject_atom(at, at_data, params_dic) ) end -function inject_interaction_list(inter::InteractionList1Atoms, params_dic, gpu) +function inject_interaction_list(inter::InteractionList1Atoms, params_dic, gpu, + AT) if gpu - inters_grad = CuArray(inject_interaction.(Array(inter.inters), inter.types, (params_dic,))) + inters_grad = AT(inject_interaction.(Array(inter.inters), inter.types, (params_dic,))) else inters_grad = inject_interaction.(inter.inters, inter.types, (params_dic,)) end InteractionList1Atoms(inter.is, inter.types, inters_grad) end -function inject_interaction_list(inter::InteractionList2Atoms, params_dic, gpu) +function inject_interaction_list(inter::InteractionList2Atoms, params_dic, gpu, + AT) if gpu - inters_grad = CuArray(inject_interaction.(Array(inter.inters), inter.types, (params_dic,))) + inters_grad = AT(inject_interaction.(Array(inter.inters), inter.types, (params_dic,))) else inters_grad = inject_interaction.(inter.inters, inter.types, (params_dic,)) end InteractionList2Atoms(inter.is, inter.js, inter.types, inters_grad) end -function inject_interaction_list(inter::InteractionList3Atoms, params_dic, gpu) +function inject_interaction_list(inter::InteractionList3Atoms, params_dic, gpu, + AT) if gpu - inters_grad = CuArray(inject_interaction.(Array(inter.inters), inter.types, (params_dic,))) + inters_grad = AT(inject_interaction.(Array(inter.inters), inter.types, (params_dic,))) else inters_grad = inject_interaction.(inter.inters, inter.types, (params_dic,)) end InteractionList3Atoms(inter.is, inter.js, inter.ks, inter.types, inters_grad) end -function inject_interaction_list(inter::InteractionList4Atoms, params_dic, gpu) +function inject_interaction_list(inter::InteractionList4Atoms, params_dic, gpu, + AT) if gpu - inters_grad = CuArray(inject_interaction.(Array(inter.inters), inter.types, (params_dic,))) + inters_grad = AT(inject_interaction.(Array(inter.inters), inter.types, (params_dic,))) else inters_grad = inject_interaction.(inter.inters, inter.types, (params_dic,)) end diff --git a/src/interactions/implicit_solvent.jl b/src/interactions/implicit_solvent.jl index a26356488..49e44fea8 100644 --- a/src/interactions/implicit_solvent.jl +++ b/src/interactions/implicit_solvent.jl @@ -410,6 +410,10 @@ function ImplicitSolventOBC(atoms::AbstractArray{Atom{T, M, D, E}}, or = CuArray(offset_radii) sor = CuArray(scaled_offset_radii) is, js = CuArray(inds_i), CuArray(inds_j) + elseif isa(atoms, ROCArray) + or = ROCArray(offset_radii) + sor = ROCArray(scaled_offset_radii) + is, js = ROCArray(inds_i), ROCArrayArray(inds_j) else or = offset_radii sor = scaled_offset_radii @@ -555,6 +559,12 @@ function ImplicitSolventGBN2(atoms::AbstractArray{Atom{T, M, D, E}}, is, js = CuArray(inds_i), CuArray(inds_j) d0s, m0s = CuArray(table_d0), CuArray(table_m0) αs, βs, γs = CuArray(αs_cpu), CuArray(βs_cpu), CuArray(γs_cpu) + elseif isa(atoms, ROCArray) + or = ROCArray(offset_radii) + sor = ROCArray(scaled_offset_radii) + is, js = ROCArray(inds_i), ROCArray(inds_j) + d0s, m0s = ROCArray(table_d0), ROCArray(table_m0) + αs, βs, γs = ROCArray(αs_cpu), ROCArray(βs_cpu), ROCArray(γs_cpu) else or = offset_radii sor = scaled_offset_radii diff --git a/src/neighbors.jl b/src/neighbors.jl index bcf6d9124..3233bd8cb 100644 --- a/src/neighbors.jl +++ b/src/neighbors.jl @@ -114,6 +114,10 @@ function DistanceVecNeighborFinder(; is = CuArray(hcat([collect(1:n_atoms) for i in 1:n_atoms]...)) js = CuArray(permutedims(is, (2, 1))) m14 = CuArray(matrix_14) + elseif isa(nb_matrix, ROCArray) + is = ROCArray(hcat([collect(1:n_atoms) for i in 1:n_atoms]...)) + js = ROCArray(permutedims(is, (2, 1))) + m14 = ROCArray(matrix_14) else is = hcat([collect(1:n_atoms) for i in 1:n_atoms]...) js = permutedims(is, (2, 1)) diff --git a/src/setup.jl b/src/setup.jl index 3ed9ea0f2..786cc3c32 100644 --- a/src/setup.jl +++ b/src/setup.jl @@ -13,6 +13,24 @@ export is_heavy_atom, add_position_restraints +# Creating default Array Type (AT) for users who did not specify +function configure_array_type(gpu) + if !gpu + AT = Array + elseif has_rocm_gpu() && has_cuda_gpu() + @warn("Both AMD and NVIDIA gpus available!\n"* + "Defaulting to CuArray...\n"* + "If you would like to use your AMD GPU, please specify " * + "System(...; AT = ROCArray)") + AT = CuArray + elseif has_cuda_gpu() + AT = CuArray + elseif has_rocm_gpu() + AT = ROCArray + end + return AT +end + """ place_atoms(n_atoms, boundary; min_dist=nothing, max_attempts=100) @@ -372,7 +390,8 @@ function System(coord_file::AbstractString, implicit_solvent=nothing, center_coords::Bool=true, rename_terminal_res::Bool=true, - kappa=0.0u"nm^-1") + kappa=0.0u"nm^-1", + AT = configure_array_type(gpu)) T = typeof(force_field.weight_14_coulomb) # Chemfiles uses zero-based indexing, be careful @@ -721,26 +740,25 @@ function System(coord_file::AbstractString, specific_inter_array = [] if length(bonds.is) > 0 push!(specific_inter_array, InteractionList2Atoms( - bonds.is, bonds.js, bonds.types, - gpu ? CuArray([bonds.inters...]) : [bonds.inters...], + bonds.is, bonds.js, bonds.types, AT([bonds.inters...]), )) end if length(angles.is) > 0 push!(specific_inter_array, InteractionList3Atoms( angles.is, angles.js, angles.ks, angles.types, - gpu ? CuArray([angles.inters...]) : [angles.inters...], + AT([angles.inters...]), )) end if length(torsions.is) > 0 push!(specific_inter_array, InteractionList4Atoms( torsions.is, torsions.js, torsions.ks, torsions.ls, torsions.types, - gpu ? CuArray(torsion_inters_pad) : torsion_inters_pad, + AT(torsion_inters_pad), )) end if length(impropers.is) > 0 push!(specific_inter_array, InteractionList4Atoms( impropers.is, impropers.js, impropers.ks, impropers.ls, impropers.types, - gpu ? CuArray(improper_inters_pad) : improper_inters_pad, + AT(improper_inters_pad), )) end specific_inter_lists = tuple(specific_inter_array...) @@ -771,8 +789,8 @@ function System(coord_file::AbstractString, atoms = [atoms...] if gpu_diff_safe neighbor_finder = DistanceVecNeighborFinder( - nb_matrix=gpu ? CuArray(nb_matrix) : nb_matrix, - matrix_14=gpu ? CuArray(matrix_14) : matrix_14, + nb_matrix=AT(nb_matrix), + matrix_14=AT(matrix_14), n_steps=10, dist_cutoff=T(dist_neighbors), ) @@ -787,8 +805,8 @@ function System(coord_file::AbstractString, ) end if gpu - atoms = CuArray(atoms) - coords = CuArray(coords) + atoms = AT(atoms) + coords = AT(coords) end if isnothing(velocities) @@ -845,7 +863,9 @@ function System(T::Type, gpu_diff_safe::Bool=gpu, dist_cutoff=units ? 1.0u"nm" : 1.0, dist_neighbors=units ? 1.2u"nm" : 1.2, - center_coords::Bool=true) + center_coords::Bool=true, + AT = configure_array_type(gpu)) + # Read force field and topology file atomtypes = Dict{String, Atom}() bondtypes = Dict{String, HarmonicBond}() @@ -1108,20 +1128,19 @@ function System(T::Type, specific_inter_array = [] if length(bonds.is) > 0 push!(specific_inter_array, InteractionList2Atoms( - bonds.is, bonds.js, bonds.types, - gpu ? CuArray([bonds.inters...]) : [bonds.inters...], + bonds.is, bonds.js, bonds.types, AT([bonds.inters...]), )) end if length(angles.is) > 0 push!(specific_inter_array, InteractionList3Atoms( angles.is, angles.js, angles.ks, angles.types, - gpu ? CuArray([angles.inters...]) : [angles.inters...], + AT([angles.inters...]), )) end if length(torsions.is) > 0 push!(specific_inter_array, InteractionList4Atoms( torsions.is, torsions.js, torsions.ks, torsions.ls, torsions.types, - gpu ? CuArray([torsions.inters...]) : [torsions.inters...], + AT([torsions.inters...]), )) end specific_inter_lists = tuple(specific_inter_array...) @@ -1130,8 +1149,8 @@ function System(T::Type, if gpu_diff_safe neighbor_finder = DistanceVecNeighborFinder( - nb_matrix=gpu ? CuArray(nb_matrix) : nb_matrix, - matrix_14=gpu ? CuArray(matrix_14) : matrix_14, + nb_matrix=AT(nb_matrix), + matrix_14=AT(matrix_14), n_steps=10, dist_cutoff=T(dist_neighbors), ) @@ -1146,8 +1165,8 @@ function System(T::Type, ) end if gpu - atoms = CuArray(atoms) - coords = CuArray(coords) + atoms = AT(atoms) + coords = AT(coords) end if isnothing(velocities) diff --git a/src/types.jl b/src/types.jl index b90c7493f..436930105 100644 --- a/src/types.jl +++ b/src/types.jl @@ -299,7 +299,8 @@ interface described there. - `k::K=Unitful.k`: the Boltzmann constant, which may be modified in some simulations. - `gpu_diff_safe::Bool`: whether to use the code path suitable for the - GPU and taking gradients. Defaults to `isa(coords, CuArray)`. + GPU and taking gradients. Defaults to + `isa(coords, AT) where AT <: Union{CuArray, ROCArray}`. """ mutable struct System{D, G, T, CU, A, AD, PI, SI, GI, CN, C, V, B, NF, L, F, E, K} <: AbstractSystem{D} atoms::A @@ -318,6 +319,10 @@ mutable struct System{D, G, T, CU, A, AD, PI, SI, GI, CN, C, V, B, NF, L, F, E, k::K end +function find_array_type(a) + return typeof(a).name.wrapper +end + function System(; atoms, atoms_data=[], @@ -333,11 +338,12 @@ function System(; force_units=u"kJ * mol^-1 * nm^-1", energy_units=u"kJ * mol^-1", k=Unitful.k, - gpu_diff_safe=isa(coords, CuArray)) + AT=find_array_type(coords), + gpu_diff_safe=(AT <: Union{CuArray, ROCArray})) D = n_dimensions(boundary) G = gpu_diff_safe T = float_type(boundary) - CU = isa(coords, CuArray) + CU = AT <: Union{CuArray, ROCArray} A = typeof(atoms) AD = typeof(atoms_data) PI = typeof(pairwise_inters) @@ -372,26 +378,26 @@ function System(; throw(ArgumentError("There are $(length(atoms)) atoms but $(length(atoms_data)) atom data entries")) end - if isa(atoms, CuArray) && !isa(coords, CuArray) - throw(ArgumentError("The atoms are on the GPU but the coordinates are not")) + if isa(atoms, AT) && !isa(coords, AT) + throw(ArgumentError("The atoms and coordinates are on different devices!")) end - if isa(coords, CuArray) && !isa(atoms, CuArray) - throw(ArgumentError("The coordinates are on the GPU but the atoms are not")) + if isa(coords, AT) && !isa(atoms, AT) + throw(ArgumentError("The coordinates and atoms are on different devices!")) end - if isa(atoms, CuArray) && !isa(vels, CuArray) - throw(ArgumentError("The atoms are on the GPU but the velocities are not")) + if isa(atoms, AT) && !isa(vels, AT) + throw(ArgumentError("The atoms and velocities are on different devices!")) end - if isa(vels, CuArray) && !isa(atoms, CuArray) - throw(ArgumentError("The velocities are on the GPU but the atoms are not")) + if isa(vels, AT) && !isa(atoms, AT) + throw(ArgumentError("The velocities and atoms are on different devices!")) end k_converted = convert_k_units(T, k, energy_units) K = typeof(k_converted) return System{D, G, T, CU, A, AD, PI, SI, GI, CN, C, V, B, NF, L, F, E, K}( - atoms, atoms_data, pairwise_inters, specific_inter_lists, - general_inters, constraints, coords, vels, boundary, neighbor_finder, - loggers, force_units, energy_units, k_converted) + atoms, atoms_data, pairwise_inters, specific_inter_lists, + general_inters, constraints, coords, vels, boundary, neighbor_finder, + loggers, force_units, energy_units, k_converted) end """ @@ -456,7 +462,8 @@ of replicas and the neighbor finder should be set up to be same. This can be don - `k::K=Unitful.k`: the Boltzmann constant, which may be modified in some simulations. - `gpu_diff_safe::Bool`: whether to use the code path suitable for the - GPU and taking gradients. Defaults to `isa(replica_coords[1], CuArray)`. + GPU and taking gradients. Defaults to + `isa(replica_coords[1], AT) where AT <: Union{CuArray, ROCArray}`. """ mutable struct ReplicaSystem{D, G, T, CU, A, AD, RS, B, EL, F, E, K} <: AbstractSystem{D} atoms::A @@ -491,11 +498,12 @@ function ReplicaSystem(; force_units=u"kJ * mol^-1 * nm^-1", energy_units=u"kJ * mol^-1", k=Unitful.k, - gpu_diff_safe=isa(replica_coords[1], CuArray)) + AT=find_array_type(replica_coords[1]), + gpu_diff_safe = (AT <: Union{CuArray, ROCArray})) D = n_dimensions(boundary) G = gpu_diff_safe T = float_type(boundary) - CU = isa(replica_coords[1], CuArray) + CU = AT <: Union{CuArray, ROCArray} A = typeof(atoms) AD = typeof(atoms_data) C = typeof(replica_coords[1]) @@ -585,26 +593,26 @@ function ReplicaSystem(; throw(ArgumentError("There are $(length(atoms)) atoms but $(length(atoms_data)) atom data entries")) end - n_cuarray = sum(y -> isa(y, CuArray), replica_coords) + n_cuarray = sum(y -> isa(y, AT), replica_coords) if !(n_cuarray == n_replicas || n_cuarray == 0) - throw(ArgumentError("The coordinates for $n_cuarray out of $n_replicas replicas are on GPU")) + throw(ArgumentError("The coordinates for $n_cuarray out of $n_replicas replicas are on a different device!")) end - if isa(atoms, CuArray) && n_cuarray != n_replicas - throw(ArgumentError("The atoms are on the GPU but the coordinates are not")) + if isa(atoms, AT) && n_cuarray != n_replicas + throw(ArgumentError("The atoms and coordinates are on different devices!")) end - if n_cuarray == n_replicas && !isa(atoms, CuArray) - throw(ArgumentError("The coordinates are on the GPU but the atoms are not")) + if n_cuarray == n_replicas && !isa(atoms, AT) + throw(ArgumentError("The coordinates and atoms are on different devices!")) end - n_cuarray = sum(y -> isa(y, CuArray), replica_velocities) + n_cuarray = sum(y -> isa(y, AT), replica_velocities) if !(n_cuarray == n_replicas || n_cuarray == 0) - throw(ArgumentError("The velocities for $n_cuarray out of $n_replicas replicas are on GPU")) + throw(ArgumentError("The velocities for $n_cuarray out of $n_replicas replicas are on a different device!")) end - if isa(atoms, CuArray) && n_cuarray != n_replicas - throw(ArgumentError("The atoms are on the GPU but the velocities are not")) + if isa(atoms, AT) && n_cuarray != n_replicas + throw(ArgumentError("The atoms and velocities are on different devices!")) end - if n_cuarray == n_replicas && !isa(atoms, CuArray) - throw(ArgumentError("The velocities are on the GPU but the atoms are not")) + if n_cuarray == n_replicas && !isa(atoms, AT) + throw(ArgumentError("The velocities and atoms are on different devices!")) end k_converted = convert_k_units(T, k, energy_units) @@ -654,7 +662,9 @@ masses(s::Union{System, ReplicaSystem}) = mass.(s.atoms) # Move an array to the GPU depending on whether the system is on the GPU move_array(arr, ::System{D, G, T, false}) where {D, G, T} = arr -move_array(arr, ::System{D, G, T, true }) where {D, G, T} = CuArray(arr) +function move_array(arr, sys::System{D, G, T, true }) where {D, G, T} + find_array_type(sys.coords)(arr) +end AtomsBase.species_type(s::Union{System, ReplicaSystem}) = eltype(s.atoms) diff --git a/src/zygote.jl b/src/zygote.jl index ae2a14f70..33adbf1d1 100644 --- a/src/zygote.jl +++ b/src/zygote.jl @@ -10,6 +10,8 @@ Zygote.accum(x::AbstractArray{<:SVector}, ys::AbstractArray{<:SizedVector}...) = Zygote.accum(x::Vector{<:SVector} , y::CuArray{<:SVector}) = Zygote.accum(CuArray(x), y) Zygote.accum(x::CuArray{<:SVector}, y::Vector{<:SVector} ) = Zygote.accum(x, CuArray(y)) +Zygote.accum(x::Vector{<:SVector} , y::ROCArray{<:SVector}) = Zygote.accum(ROCArray(x), y) +Zygote.accum(x::ROCArray{<:SVector}, y::Vector{<:SVector} ) = Zygote.accum(x, ROCArray(y)) Zygote.accum(x::SVector{D, T}, y::T) where {D, T} = x .+ y @@ -136,12 +138,13 @@ end # Slower version than in Zygote but doesn't give wrong gradients on the GPU for repeated indices # Here we just move it to the CPU then move it back # See https://github.com/FluxML/Zygote.jl/pull/1131 -Zygote.∇getindex(x::CuArray, inds::Tuple{AbstractArray{<:Integer}}) = dy -> begin +Zygote.∇getindex(x::Union{CuArray, ROCArray}, inds::Tuple{AbstractArray{<:Integer}}) = dy -> begin inds1_cpu = Array(inds[1]) dx = zeros(eltype(dy), length(x)) dxv = view(dx, inds1_cpu) dxv .= Zygote.accum.(dxv, Zygote._droplike(Array(dy), dxv)) - return Zygote._project(x, CuArray(dx)), nothing + AT = find_array_type(x) + return Zygote._project(x, AT(dx)), nothing end # Extend to add extra empty partials before (B) and after (A) the SVector partials @@ -163,15 +166,16 @@ end sized_to_static(v::SizedVector{3, T, Vector{T}}) where {T} = SVector{3, T}(v[1], v[2], v[3]) sized_to_static(v::SizedVector{2, T, Vector{T}}) where {T} = SVector{2, T}(v[1], v[2]) -function modify_grad(ȳ_in::AbstractArray{SizedVector{D, T, Vector{T}}}, arg::CuArray) where {D, T} - CuArray(sized_to_static.(ȳ_in)) +function modify_grad(ȳ_in::AbstractArray{SizedVector{D, T, Vector{T}}}, arg::Union{CuArray, ROCArray}) where {D, T} + AT = find_array_type(arg) + AT(sized_to_static.(ȳ_in)) end function modify_grad(ȳ_in::AbstractArray{SizedVector{D, T, Vector{T}}}, arg) where {D, T} sized_to_static.(ȳ_in) end -modify_grad(ȳ_in, arg::CuArray) = CuArray(ȳ_in) +modify_grad(ȳ_in, arg::AT) where AT <: Union{CuArray, ROCArray} = find_array_type(arg)(ȳ_in) modify_grad(ȳ_in, arg) = ȳ_in # Dualize a value with extra partials diff --git a/test/basic.jl b/test/basic.jl index d89c78e47..be55ef96b 100644 --- a/test/basic.jl +++ b/test/basic.jl @@ -189,9 +189,12 @@ end coords_1 = SVector{3, Float64}.(eachcol(cm_1)) / 10 * u"nm" coords_2 = SVector{3, Float64}.(eachcol(cm_2)) / 10 * u"nm" @test rmsd(coords_1, coords_2) ≈ 2.54859467758795u"Å" - if run_gpu_tests + if run_cuda_tests @test rmsd(CuArray(coords_1), CuArray(coords_2)) ≈ 2.54859467758795u"Å" end + if run_rocm_tests + @test rmsd(ROCArray(coords_1), ROCArray(coords_2)) ≈ 2.54859467758795u"Å" + end bb_atoms = BioStructures.collectatoms(struc[1], BioStructures.backboneselector) coords = SVector{3, Float64}.(eachcol(BioStructures.coordarray(bb_atoms))) / 10 * u"nm" diff --git a/test/minimization.jl b/test/minimization.jl index 0979ea159..c77095ba2 100644 --- a/test/minimization.jl +++ b/test/minimization.jl @@ -43,25 +43,27 @@ atol=1e-4u"kJ * mol^-1") if run_gpu_tests - coords = CuArray([ - SVector(1.0, 1.0, 1.0)u"nm", - SVector(1.6, 1.0, 1.0)u"nm", - SVector(1.4, 1.6, 1.0)u"nm", - ]) - sys = System( - atoms=CuArray([Atom(σ=(0.4 / (2 ^ (1 / 6)))u"nm", ϵ=1.0u"kJ * mol^-1") for i in 1:3]), - pairwise_inters=(LennardJones(),), - coords=coords, - boundary=CubicBoundary(5.0u"nm", 5.0u"nm", 5.0u"nm"), - ) - sim = SteepestDescentMinimizer(tol=1.0u"kJ * mol^-1 * nm^-1") + for AT in gpu_array_types + coords = AT([ + SVector(1.0, 1.0, 1.0)u"nm", + SVector(1.6, 1.0, 1.0)u"nm", + SVector(1.4, 1.6, 1.0)u"nm", + ]) + sys = System( + atoms=AT([Atom(σ=(0.4 / (2 ^ (1 / 6)))u"nm", ϵ=1.0u"kJ * mol^-1") for i in 1:3]), + pairwise_inters=(LennardJones(),), + coords=coords, + boundary=CubicBoundary(5.0u"nm", 5.0u"nm", 5.0u"nm"), + ) + sim = SteepestDescentMinimizer(tol=1.0u"kJ * mol^-1 * nm^-1") - simulate!(sys, sim) - dists = distances(sys.coords, sys.boundary) - dists_flat = dists[triu(trues(3, 3), 1)] - @test all(x -> isapprox(x, 0.4u"nm"; atol=1e-3u"nm"), dists_flat) - neighbors = find_neighbors(sys) - @test isapprox(potential_energy(sys, neighbors), -3.0u"kJ * mol^-1"; - atol=1e-4u"kJ * mol^-1") + simulate!(sys, sim) + dists = distances(sys.coords, sys.boundary) + dists_flat = dists[triu(trues(3, 3), 1)] + @test all(x -> isapprox(x, 0.4u"nm"; atol=1e-3u"nm"), dists_flat) + neighbors = find_neighbors(sys) + @test isapprox(potential_energy(sys, neighbors), -3.0u"kJ * mol^-1"; + atol=1e-4u"kJ * mol^-1") + end end end diff --git a/test/protein.jl b/test/protein.jl index f74c9543c..d93e94757 100644 --- a/test/protein.jl +++ b/test/protein.jl @@ -161,53 +161,55 @@ end # Test the same simulation on the GPU if run_gpu_tests - sys = System( - joinpath(data_dir, "6mrr_equil.pdb"), - ff; - velocities=CuArray(deepcopy(velocities_start)), - gpu=true, - center_coords=false, - ) - @test kinetic_energy(sys) ≈ 65521.87288132431u"kJ * mol^-1" - @test temperature(sys) ≈ 329.3202932884933u"K" - - neighbors = find_neighbors(sys) - @test isapprox(potential_energy(sys, neighbors), E_openmm; atol=1e-5u"kJ * mol^-1") - - simulate!(sys, simulator, n_steps) - - coords_diff = Array(sys.coords) .- wrap_coords.(coords_openmm, (sys.boundary,)) - vels_diff = Array(sys.velocities) .- vels_openmm - @test maximum(maximum(abs.(v)) for v in coords_diff) < 1e-9u"nm" - @test maximum(maximum(abs.(v)) for v in vels_diff ) < 1e-6u"nm * ps^-1" - - sys_nounits = System( - joinpath(data_dir, "6mrr_equil.pdb"), - ff_nounits; - velocities=CuArray(deepcopy(ustrip_vec.(velocities_start))), - units=false, - gpu=true, - center_coords=false, - ) - @test kinetic_energy(sys_nounits)u"kJ * mol^-1" ≈ 65521.87288132431u"kJ * mol^-1" - @test temperature(sys_nounits)u"K" ≈ 329.3202932884933u"K" + for AT in gpu_array_types + sys = System( + joinpath(data_dir, "6mrr_equil.pdb"), + ff; + velocities=AT(deepcopy(velocities_start)), + gpu=true, + center_coords=false, + ) + @test kinetic_energy(sys) ≈ 65521.87288132431u"kJ * mol^-1" + @test temperature(sys) ≈ 329.3202932884933u"K" - neighbors_nounits = find_neighbors(sys_nounits) - @test isapprox(potential_energy(sys_nounits, neighbors_nounits) * u"kJ * mol^-1", - E_openmm; atol=1e-5u"kJ * mol^-1") - - simulate!(sys_nounits, simulator_nounits, n_steps) - - coords_diff = Array(sys_nounits.coords * u"nm") .- wrap_coords.(coords_openmm, (sys.boundary,)) - vels_diff = Array(sys_nounits.velocities * u"nm * ps^-1") .- vels_openmm - @test maximum(maximum(abs.(v)) for v in coords_diff) < 1e-9u"nm" - @test maximum(maximum(abs.(v)) for v in vels_diff ) < 1e-6u"nm * ps^-1" - - params_dic_gpu = extract_parameters(sys_nounits, ff_nounits) - @test params_dic == params_dic_gpu - atoms_grad, pis_grad, sis_grad, gis_grad = inject_gradients(sys_nounits, params_dic_gpu) - @test atoms_grad == sys_nounits.atoms - @test pis_grad == sys_nounits.pairwise_inters + neighbors = find_neighbors(sys) + @test isapprox(potential_energy(sys, neighbors), E_openmm; atol=1e-5u"kJ * mol^-1") + + simulate!(sys, simulator, n_steps) + + coords_diff = Array(sys.coords) .- wrap_coords.(coords_openmm, (sys.boundary,)) + vels_diff = Array(sys.velocities) .- vels_openmm + @test maximum(maximum(abs.(v)) for v in coords_diff) < 1e-9u"nm" + @test maximum(maximum(abs.(v)) for v in vels_diff ) < 1e-6u"nm * ps^-1" + + sys_nounits = System( + joinpath(data_dir, "6mrr_equil.pdb"), + ff_nounits; + velocities=AT(deepcopy(ustrip_vec.(velocities_start))), + units=false, + gpu=true, + center_coords=false, + ) + @test kinetic_energy(sys_nounits)u"kJ * mol^-1" ≈ 65521.87288132431u"kJ * mol^-1" + @test temperature(sys_nounits)u"K" ≈ 329.3202932884933u"K" + + neighbors_nounits = find_neighbors(sys_nounits) + @test isapprox(potential_energy(sys_nounits, neighbors_nounits) * u"kJ * mol^-1", + E_openmm; atol=1e-5u"kJ * mol^-1") + + simulate!(sys_nounits, simulator_nounits, n_steps) + + coords_diff = Array(sys_nounits.coords * u"nm") .- wrap_coords.(coords_openmm, (sys.boundary,)) + vels_diff = Array(sys_nounits.velocities * u"nm * ps^-1") .- vels_openmm + @test maximum(maximum(abs.(v)) for v in coords_diff) < 1e-9u"nm" + @test maximum(maximum(abs.(v)) for v in vels_diff ) < 1e-6u"nm * ps^-1" + + params_dic_gpu = extract_parameters(sys_nounits, ff_nounits) + @test params_dic == params_dic_gpu + atoms_grad, pis_grad, sis_grad, gis_grad = inject_gradients(sys_nounits, params_dic_gpu) + @test atoms_grad == sys_nounits.atoms + @test pis_grad == sys_nounits.pairwise_inters + end end end diff --git a/test/runtests.jl b/test/runtests.jl index 557e5a1e5..043416188 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,7 @@ using Molly using Aqua import BioStructures # Imported to avoid clashing names using CUDA +using AMDGPU using FiniteDifferences using ForwardDiff using Zygote @@ -42,16 +43,37 @@ else @warn "The parallel tests will not be run as Julia is running on 1 thread" end -run_gpu_tests = CUDA.functional() -if run_gpu_tests +run_cuda_tests = CUDA.functional() +if run_cuda_tests device!(parse(Int, DEVICE)) @info "The GPU tests will be run on device $DEVICE" else - @warn "The GPU tests will not be run as a CUDA-enabled device is not available" + @warn "The CUDA tests will not be run as a CUDA-enabled device is not available" end CUDA.allowscalar(false) # Check that we never do scalar indexing on the GPU +run_rocm_tests = AMDGPU.functional() +if run_rocm_tests + AMDGPU.default_device_id!(parse(Int, DEVICE)+1) + @info "The GPU tests will be run on device " * string(parse(Int, DEVICE) + 1) +else + @warn "The ROCM tests will not be run as a ROCM-enabled device is not available" +end + +AMDGPU.allowscalar(false) + +run_gpu_tests = run_cuda_tests || run_rocm_tests +gpu_array_types = [] +if run_gpu_tests + if run_cuda_tests + push!(gpu_array_types, CuArray) + end + if run_rocm_tests + push!(gpu_array_types, ROCArray) + end +end + data_dir = normpath(@__DIR__, "..", "data") ff_dir = joinpath(data_dir, "force_fields") diff --git a/test/simulation.jl b/test/simulation.jl index a990606fa..fcbc0e7d1 100644 --- a/test/simulation.jl +++ b/test/simulation.jl @@ -384,35 +384,37 @@ end @testset "Position restraints" begin gpu_list = run_gpu_tests ? (false, true) : (false,) - for gpu in gpu_list - n_atoms = 10 - n_atoms_res = n_atoms ÷ 2 - n_steps = 2_000 - boundary = CubicBoundary(2.0u"nm", 2.0u"nm", 2.0u"nm") - starting_coords = place_atoms(n_atoms, boundary; min_dist=0.3u"nm") - atoms = [Atom(charge=0.0, mass=10.0u"u", σ=0.2u"nm", ϵ=0.2u"kJ * mol^-1") for i in 1:n_atoms] - atoms_data = [AtomData(atom_type=(i <= n_atoms_res ? "A1" : "A2")) for i in 1:n_atoms] - sim = Langevin(dt=0.001u"ps", temperature=300.0u"K", friction=1.0u"ps^-1") - - sys = System( - atoms=gpu ? CuArray(atoms) : atoms, - atoms_data=atoms_data, - pairwise_inters=(LennardJones(),), - coords=gpu ? CuArray(deepcopy(starting_coords)) : deepcopy(starting_coords), - boundary=boundary, - loggers=(coords=CoordinateLogger(100),), - ) + for AT in gpu_array_types + for gpu in gpu_list + n_atoms = 10 + n_atoms_res = n_atoms ÷ 2 + n_steps = 2_000 + boundary = CubicBoundary(2.0u"nm", 2.0u"nm", 2.0u"nm") + starting_coords = place_atoms(n_atoms, boundary; min_dist=0.3u"nm") + atoms = [Atom(charge=0.0, mass=10.0u"u", σ=0.2u"nm", ϵ=0.2u"kJ * mol^-1") for i in 1:n_atoms] + atoms_data = [AtomData(atom_type=(i <= n_atoms_res ? "A1" : "A2")) for i in 1:n_atoms] + sim = Langevin(dt=0.001u"ps", temperature=300.0u"K", friction=1.0u"ps^-1") + + sys = System( + atoms=gpu ? AT(atoms) : atoms, + atoms_data=atoms_data, + pairwise_inters=(LennardJones(),), + coords=gpu ? AT(deepcopy(starting_coords)) : deepcopy(starting_coords), + boundary=boundary, + loggers=(coords=CoordinateLogger(100),), + ) - atom_selector(at, at_data) = at_data.atom_type == "A1" + atom_selector(at, at_data) = at_data.atom_type == "A1" - sys_res = add_position_restraints(sys, 100_000.0u"kJ * mol^-1 * nm^-2"; - atom_selector=atom_selector) + sys_res = add_position_restraints(sys, 100_000.0u"kJ * mol^-1 * nm^-2"; + atom_selector=atom_selector) - @time simulate!(sys_res, sim, n_steps) + @time simulate!(sys_res, sim, n_steps) - dists = norm.(vector.(starting_coords, Array(sys_res.coords), (boundary,))) - @test maximum(dists[1:n_atoms_res]) < 0.1u"nm" - @test median(dists[(n_atoms_res + 1):end]) > 0.2u"nm" + dists = norm.(vector.(starting_coords, Array(sys_res.coords), (boundary,))) + @test maximum(dists[1:n_atoms_res]) < 0.1u"nm" + @test median(dists[(n_atoms_res + 1):end]) > 0.2u"nm" + end end end @@ -736,7 +738,7 @@ end starting_coords_f32 = [Float32.(c) for c in starting_coords] starting_velocities_f32 = [Float32.(c) for c in starting_velocities] - function test_sim(nl::Bool, parallel::Bool, gpu_diff_safe::Bool, f32::Bool, gpu::Bool) + function test_sim(nl::Bool, parallel::Bool, gpu_diff_safe::Bool, f32::Bool, gpu::Bool, array_type) n_atoms = 400 n_steps = 200 atom_mass = f32 ? 10.0f0u"u" : 10.0u"u" @@ -749,7 +751,7 @@ end InteractionList2Atoms(collect(1:2:n_atoms), collect(2:2:n_atoms), fill("", length(bonds)), - gpu ? CuArray(bonds) : bonds, + gpu ? array_type(bonds) : bonds, ),) neighbor_finder = NoNeighborFinder() @@ -758,7 +760,7 @@ end if nl if gpu_diff_safe neighbor_finder = DistanceVecNeighborFinder( - nb_matrix=gpu ? CuArray(trues(n_atoms, n_atoms)) : trues(n_atoms, n_atoms), + nb_matrix=gpu ? array_type(trues(n_atoms, n_atoms)) : trues(n_atoms, n_atoms), n_steps=10, dist_cutoff=f32 ? 1.5f0u"nm" : 1.5u"nm", ) @@ -774,9 +776,9 @@ end show(devnull, neighbor_finder) if gpu - coords = CuArray(deepcopy(f32 ? starting_coords_f32 : starting_coords)) - velocities = CuArray(deepcopy(f32 ? starting_velocities_f32 : starting_velocities)) - atoms = CuArray([Atom(charge=f32 ? 0.0f0 : 0.0, mass=atom_mass, σ=f32 ? 0.2f0u"nm" : 0.2u"nm", + coords = array_type(deepcopy(f32 ? starting_coords_f32 : starting_coords)) + velocities = array_type(deepcopy(f32 ? starting_velocities_f32 : starting_velocities)) + atoms = array_type([Atom(charge=f32 ? 0.0f0 : 0.0, mass=atom_mass, σ=f32 ? 0.2f0u"nm" : 0.2u"nm", ϵ=f32 ? 0.2f0u"kJ * mol^-1" : 0.2u"kJ * mol^-1") for i in 1:n_atoms]) else coords = deepcopy(f32 ? starting_coords_f32 : starting_coords) @@ -808,22 +810,30 @@ end end runs = [ - ("in-place" , [false, false, false, false, false]), - ("in-place NL" , [true , false, false, false, false]), - ("in-place f32" , [false, false, false, true , false]), - ("out-of-place" , [false, false, true , false, false]), - ("out-of-place NL" , [true , false, true , false, false]), - ("out-of-place f32", [false, false, true , true , false]), + ("in-place" , [false, false, false, false, false, Array]), + ("in-place NL" , [true , false, false, false, false, Array]), + ("in-place f32" , [false, false, false, true , false, Array]), + ("out-of-place" , [false, false, true , false, false, Array]), + ("out-of-place NL" , [true , false, true , false, false, Array]), + ("out-of-place f32", [false, false, true , true , false, Array]), ] if run_parallel_tests - push!(runs, ("in-place parallel" , [false, true , false, false, false])) - push!(runs, ("in-place NL parallel", [true , true , false, false, false])) + push!(runs, ("in-place parallel" , [false, true , false, false, false, Array])) + push!(runs, ("in-place NL parallel", [true , true , false, false, false, Array])) end if run_gpu_tests - push!(runs, ("out-of-place gpu" , [false, false, true , false, true ])) - push!(runs, ("out-of-place gpu f32" , [false, false, true , true , true ])) - push!(runs, ("out-of-place gpu NL" , [true , false, true , false, true ])) - push!(runs, ("out-of-place gpu f32 NL", [true , false, true , true , true ])) + if run_cuda_tests + push!(runs, ("out-of-place gpu" , [false, false, true , false, true, CuArray])) + push!(runs, ("out-of-place gpu f32" , [false, false, true , true , true, CuArray])) + push!(runs, ("out-of-place gpu NL" , [true , false, true , false, true, CuArray])) + push!(runs, ("out-of-place gpu f32 NL", [true , false, true , true , true, CuArray])) + end + if run_rocm_tests + push!(runs, ("out-of-place gpu" , [false, false, true , false, true, ROCArray])) + push!(runs, ("out-of-place gpu f32" , [false, false, true , true , true, ROCArray])) + push!(runs, ("out-of-place gpu NL" , [true , false, true , false, true, ROCArray])) + push!(runs, ("out-of-place gpu f32 NL", [true , false, true , true , true, ROCArray])) + end end final_coords_ref, E_start_ref = test_sim(runs[1][2]...) diff --git a/test/zygote.jl b/test/zygote.jl index bda9beb3f..4b4465ffd 100644 --- a/test/zygote.jl +++ b/test/zygote.jl @@ -37,7 +37,7 @@ end function test_grad(gpu::Bool, forward::Bool, f32::Bool, pis::Bool, - sis::Bool, obc2::Bool, gbn2::Bool) + sis::Bool, obc2::Bool, gbn2::Bool, array_type) n_atoms = 50 n_steps = 100 atom_mass = f32 ? 10.0f0 : 10.0 @@ -75,7 +75,7 @@ collect(16:30), collect(31:45), fill("", 15), - gpu ? CuArray(angles_inner) : angles_inner, + gpu ? array_type(angles_inner) : angles_inner, ) torsions_inner = [PeriodicTorsion( periodicities=[1, 2, 3], @@ -89,12 +89,12 @@ collect(21:30), collect(31:40), fill("", 10), - gpu ? CuArray(torsions_inner) : torsions_inner, + gpu ? array_type(torsions_inner) : torsions_inner, ) atoms_setup = [Atom(charge=f32 ? 0.0f0 : 0.0, σ=f32 ? 0.0f0 : 0.0) for i in 1:n_atoms] if obc2 imp_obc2 = ImplicitSolventOBC( - gpu ? CuArray(atoms_setup) : atoms_setup, + gpu ? array_type(atoms_setup) : atoms_setup, [AtomData(element="O") for i in 1:n_atoms], InteractionList2Atoms(bond_is, bond_js, [""], nothing); use_OBC2=true, @@ -102,7 +102,7 @@ general_inters = (imp_obc2,) elseif gbn2 imp_gbn2 = ImplicitSolventGBN2( - gpu ? CuArray(atoms_setup) : atoms_setup, + gpu ? array_type(atoms_setup) : atoms_setup, [AtomData(element="O") for i in 1:n_atoms], InteractionList2Atoms(bond_is, bond_js, [""], nothing), ) @@ -111,7 +111,7 @@ general_inters = () end neighbor_finder = DistanceVecNeighborFinder( - nb_matrix=gpu ? CuArray(trues(n_atoms, n_atoms)) : trues(n_atoms, n_atoms), + nb_matrix=gpu ? array_type(trues(n_atoms, n_atoms)) : trues(n_atoms, n_atoms), n_steps=10, dist_cutoff=f32 ? 1.5f0 : 1.5, ) @@ -128,18 +128,18 @@ bond_is, bond_js, fill("", length(bonds_inner)), - gpu ? CuArray(bonds_inner) : bonds_inner, + gpu ? array_type(bonds_inner) : bonds_inner, ) cs = deepcopy(forward ? coords_dual : coords) vs = deepcopy(forward ? velocities_dual : velocities) s = System( - atoms=gpu ? CuArray(atoms) : atoms, + atoms=gpu ? array_type(atoms) : atoms, pairwise_inters=pairwise_inters, specific_inter_lists=sis ? (bonds, angles, torsions) : (), general_inters=general_inters, - coords=gpu ? CuArray(cs) : cs, - velocities=gpu ? CuArray(vs) : vs, + coords=gpu ? array_type(cs) : cs, + velocities=gpu ? array_type(vs) : vs, boundary=boundary, neighbor_finder=neighbor_finder, gpu_diff_safe=true, @@ -150,30 +150,42 @@ simulate!(s, simulator, n_steps) return mean_min_separation(s.coords, boundary) - end + enu return loss end runs = [ # gpu fwd f32 pis sis obc2 gbn2 - ("cpu" , [false, false, false, true , true , false, false], 0.1 , 0.25), - ("cpu forward" , [false, true , false, true , true , false, false], 0.01, 0.01), - ("cpu f32" , [false, false, true , true , true , false, false], 0.2 , 10.0), - ("cpu nospecific" , [false, false, false, true , false, false, false], 0.1 , 0.0 ), - ("cpu nopairwise" , [false, false, false, false, true , false, false], 0.0 , 0.25), - ("cpu obc2" , [false, false, false, true , true , true , false], 0.1 , 0.25), - ("cpu gbn2" , [false, false, false, true , true , false, true ], 0.1 , 0.25), - ("cpu gbn2 forward", [false, true , false, true , true , false, true ], 0.02, 0.02), + ("cpu" , [false, false, false, true , true , false, false, Array], 0.1 , 0.25), + ("cpu forward" , [false, true , false, true , true , false, false, Array], 0.01, 0.01), + ("cpu f32" , [false, false, true , true , true , false, false, Array], 0.2 , 10.0), + ("cpu nospecific" , [false, false, false, true , false, false, false, Array], 0.1 , 0.0 ), + ("cpu nopairwise" , [false, false, false, false, true , false, false, Array], 0.0 , 0.25), + ("cpu obc2" , [false, false, false, true , true , true , false, Array], 0.1 , 0.25), + ("cpu gbn2" , [false, false, false, true , true , false, true , Array], 0.1 , 0.25), + ("cpu gbn2 forward", [false, true , false, true , true , false, true , Array], 0.02, 0.02), ] - if run_gpu_tests # gpu fwd f32 pis sis obc2 gbn2 - push!(runs, ("gpu" , [true , false, false, true , true , false, false], 0.25, 20.0)) - push!(runs, ("gpu forward" , [true , true , false, true , true , false, false], 0.01, 0.01)) - push!(runs, ("gpu f32" , [true , false, true , true , true , false, false], 0.5 , 50.0)) - push!(runs, ("gpu nospecific" , [true , false, false, true , false, false, false], 0.25, 0.0 )) - push!(runs, ("gpu nopairwise" , [true , false, false, false, true , false, false], 0.0 , 10.0)) - push!(runs, ("gpu obc2" , [true , false, false, true , true , true , false], 0.25, 20.0)) - push!(runs, ("gpu gbn2" , [true , false, false, true , true , false, true ], 0.25, 20.0)) - push!(runs, ("gpu gbn2 forward", [true , true , false, true , true , false, true ], 0.02, 0.02)) + if run_gpu_tests # gpu fwd f32 pis sis obc2 gbn2 + if run_cuda_tests + push!(runs, ("cuda" , [true , false, false, true , true , false, false, CuArray], 0.25, 20.0)) + push!(runs, ("cuda forward" , [true , true , false, true , true , false, false, CuArray], 0.01, 0.01)) + push!(runs, ("cuda f32" , [true , false, true , true , true , false, false, CuArray], 0.5 , 50.0)) + push!(runs, ("cuda nospecific" , [true , false, false, true , false, false, false, CuArray], 0.25, 0.0 )) + push!(runs, ("cuda nopairwise" , [true , false, false, false, true , false, false, CuArray], 0.0 , 10.0)) + push!(runs, ("cuda obc2" , [true , false, false, true , true , true , false, CuArray], 0.25, 20.0)) + push!(runs, ("cuda gbn2" , [true , false, false, true , true , false, true , CuArray], 0.25, 20.0)) + push!(runs, ("cuda gbn2 forward", [true , true , false, true , true , false, true , CuArray], 0.02, 0.02)) + end + if run_rocm_tests + push!(runs, ("rocm" , [true , false, false, true , true , false, false, ROCArray], 0.25, 20.0)) + push!(runs, ("rocm forward" , [true , true , false, true , true , false, false, ROCArray], 0.01, 0.01)) + push!(runs, ("rocm f32" , [true , false, true , true , true , false, false, ROCArray], 0.5 , 50.0)) + push!(runs, ("rocm nospecific" , [true , false, false, true , false, false, false, ROCArray], 0.25, 0.0 )) + push!(runs, ("rocm nopairwise" , [true , false, false, false, true , false, false, ROCArray], 0.0 , 10.0)) + push!(runs, ("rocm obc2" , [true , false, false, true , true , true , false, ROCArray], 0.25, 20.0)) + push!(runs, ("rocm gbn2" , [true , false, false, true , true , false, true , ROCArray], 0.25, 20.0)) + push!(runs, ("rocm gbn2 forward", [true , true , false, true , true , false, true , ROCArray], 0.02, 0.02)) + end end for (name, args, tol_σ, tol_k) in runs