Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .buildkite/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ weakdeps = ["CUDA", "MPI"]
ClimaCommsMPIExt = "MPI"

[[deps.ClimaCore]]
deps = ["Adapt", "BandedMatrices", "BlockArrays", "ClimaComms", "ClimaInterpolations", "CubedSphere", "DataStructures", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LazyBroadcast", "LinearAlgebra", "MultiBroadcastFusion", "NVTX", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "StaticArrays", "Statistics", "UnrolledUtilities"]
deps = ["Adapt", "BandedMatrices", "BlockArrays", "ClimaComms", "ClimaInterpolations", "CubedSphere", "DataStructures", "ForwardDiff", "GaussQuadrature", "GilbertCurves", "HDF5", "InteractiveUtils", "IntervalSets", "KrylovKit", "LLVM", "LazyBroadcast", "LinearAlgebra", "MultiBroadcastFusion", "NVTX", "PkgVersion", "RecursiveArrayTools", "RootSolvers", "SparseArrays", "StaticArrays", "Statistics", "UnrolledUtilities"]
path = ".."
uuid = "d414da3d-4745-48bb-8d80-42e94e092884"
version = "0.14.50"
Expand Down
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953"
KrylovKit = "0b1a1467-8014-51b9-945f-bf0ae24f4b77"
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
LazyBroadcast = "9dccce8e-a116-406d-9fcc-a88ed4f510c8"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MultiBroadcastFusion = "c3c07f87-98de-43f2-a76f-835b330b2cbb"
Expand Down Expand Up @@ -65,6 +66,7 @@ IntervalSets = "0.5, 0.6, 0.7"
JET = "0.9"
Krylov = "0.9, 0.10"
KrylovKit = "0.6, 0.7, 0.8"
LLVM = "9"
LazyBroadcast = "1"
LinearAlgebra = "1"
Logging = "1"
Expand Down
10 changes: 10 additions & 0 deletions docs/src/APIs/datalayouts_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,14 @@ DataLayouts.IHF
DataLayouts.IJHF
DataLayouts.VIHF
DataLayouts.VIJHF
DataLayouts.bitcast_struct
DataLayouts.default_basetype
DataLayouts.check_basetype
DataLayouts.checked_valid_basetype
DataLayouts.num_basetypes
DataLayouts.struct_field_view
DataLayouts.set_struct!
DataLayouts.get_struct
DataLayouts.parent_array_type
DataLayouts.promote_parent_array_type
```
3 changes: 3 additions & 0 deletions docs/src/APIs/utilities_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ CurrentModule = ClimaCore
```@docs
Utilities.PlusHalf
Utilities.half
Utilities.replace_type_parameter
Utilities.fieldtype_vals
Utilities.new
```

## Utilities.Cache
Expand Down
4 changes: 0 additions & 4 deletions examples/column/tvd_advection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ function tendency!(yₜ, y, parameters, t)
method = limiter_method,
)

If = Operators.InterpolateC2F()

if limiter_method == "Zalesak"
@. yₜ.q =
-divf2c(
Expand All @@ -68,8 +66,6 @@ function tendency!(yₜ, y, parameters, t)
),
)
else
Δfluxₕ = @. w * If(y.q)
Δfluxₗ = @. upwind1(w, y.q)
@. yₜ.q =
-divf2c(
upwind1(w, y.q) +
Expand Down
52 changes: 14 additions & 38 deletions ext/cuda/data_layouts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,51 +12,27 @@ import ClimaCore.DataLayouts: fused_copyto!
import Adapt
import CUDA

parent_array_type(::Type{<:CUDA.CuArray{T, N, B} where {N}}) where {T, B} =
CUDA.CuArray{T, N, B} where {N}

# allow on-device use of lazy broadcast objects
# Ensure that all CuArrays have the same memory buffer type.
parent_array_type(
::Type{<:CUDA.CuDeviceArray{T, N, A} where {N}},
) where {T, A} = CUDA.CuDeviceArray{T, N, A} where {N}

# Ensure that both parent array types have the same memory buffer type.
promote_parent_array_type(
::Type{CUDA.CuArray{T1, N, B} where {N}},
::Type{CUDA.CuArray{T2, N, B} where {N}},
) where {T1, T2, B} = CUDA.CuArray{promote_type(T1, T2), N, B} where {N}

# allow on-device use of lazy broadcast objects
::Type{<:CUDA.CuArray{<:Any, <:Any, B}},
::Type{T},
) where {T, B} = CUDA.CuArray{T, <:Any, B}
promote_parent_array_type(
::Type{CUDA.CuDeviceArray{T1, N, B} where {N}},
::Type{CUDA.CuDeviceArray{T2, N, B} where {N}},
) where {T1, T2, B} = CUDA.CuDeviceArray{promote_type(T1, T2), N, B} where {N}
::Type{CUDA.CuArray{T1, <:Any, B}},
::Type{CUDA.CuArray{T2, <:Any, B}},
) where {T1, T2, B} = CUDA.CuArray{promote_type(T1, T2), <:Any, B}

# allow on-device use of lazy broadcast objects with different type params
promote_parent_array_type(
::Type{CUDA.CuDeviceArray{T1, N, B1} where {N}},
::Type{CUDA.CuDeviceArray{T2, N, B2} where {N}},
) where {T1, T2, B1, B2} =
CUDA.CuDeviceArray{promote_type(T1, T2), N, B} where {N, B}

# allow on-device use of lazy broadcast objects with different type params
# Allow on-device use of lazy broadcast objects.
parent_array_type(::Type{<:CUDA.CuDeviceArray}, ::Type{T}) where {T} =
CUDA.CuDeviceArray{T}
promote_parent_array_type(
::Type{CUDA.CuDeviceArray{T1}},
::Type{CUDA.CuDeviceArray{T2, N, B2} where {N}},
) where {T1, T2, B2} =
CUDA.CuDeviceArray{promote_type(T1, T2), N, B} where {N, B}

promote_parent_array_type(
::Type{CUDA.CuDeviceArray{T1, N, B1} where {N}},
::Type{CUDA.CuDeviceArray{T2} where {N}},
) where {T1, T2, B1} =
CUDA.CuDeviceArray{promote_type(T1, T2), N, B} where {N, B}
::Type{CUDA.CuDeviceArray{T2}},
) where {T1, T2} = CUDA.CuDeviceArray{promote_type(T1, T2)}

# Make `similar` accept our special `UnionAll` parent array type for CuArray.
Base.similar(
::Type{CUDA.CuArray{T, N′, B} where {N′}},
dims::Dims{N},
) where {T, N, B} = similar(CUDA.CuArray{T, N, B}, dims)
Base.similar(::Type{CUDA.CuArray{T, <:Any, B}}, dims::Dims{N}) where {T, N, B} =
similar(CUDA.CuArray{T, N, B}, dims)

unval(::Val{CI}) where {CI} = CI
unval(CI) = CI
Expand Down
2 changes: 0 additions & 2 deletions ext/cuda/operators_sem_shmem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ Base.@propagate_inbounds function operator_shmem(
Nq = Quadratures.degrees_of_freedom(QS)
# allocate temp output
RT = operator_return_eltype(op, eltype(arg))
Nf = DataLayouts.typesize(FT, RT)
WJv¹ = CUDA.CuStaticSharedArray(RT, (Nq, Nvt))
return (WJv¹,)
end
Expand All @@ -107,7 +106,6 @@ Base.@propagate_inbounds function operator_shmem(
Nq = Quadratures.degrees_of_freedom(QS)
# allocate temp output
RT = operator_return_eltype(op, eltype(arg))
Nf = DataLayouts.typesize(FT, RT)
WJv¹ = CUDA.CuStaticSharedArray(RT, (Nq, Nq, Nvt))
WJv² = CUDA.CuStaticSharedArray(RT, (Nq, Nq, Nvt))
return (WJv¹, WJv²)
Expand Down
Loading
Loading