diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 28ca591a8c..9e5913c8b5 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -57,13 +57,6 @@ steps: agents: slurm_gpus: 1 - - group: "Unit: RecursiveApply" - steps: - - - label: "Unit: RecursiveApply" - key: unit_recursive_apply - command: "julia --color=yes --check-bounds=yes --project=.buildkite test/RecursiveApply/unit_recursive_apply.jl" - - group: "Unit: CUDA utils" steps: @@ -93,6 +86,10 @@ steps: key: unit_plushalf command: "julia --color=yes --check-bounds=yes --project=.buildkite test/Utilities/unit_plushalf.jl" + - label: "Unit: auto_broadcaster" + key: unit_auto_broadcaster + command: "julia --color=yes --check-bounds=yes --project=.buildkite test/Utilities/unit_auto_broadcaster.jl" + - group: "Unit: DataLayouts" steps: @@ -227,9 +224,9 @@ steps: key: unit_axistensors command: "julia --color=yes --check-bounds=yes --project=.buildkite test/Geometry/axistensors.jl" - - label: "Unit: rmul_with_projection" - key: unit_rmul_with_projection - command: "julia --color=yes --check-bounds=yes --project=.buildkite test/Geometry/rmul_with_projection.jl" + - label: "Unit: mul_with_projection" + key: unit_mul_with_projection + command: "julia --color=yes --check-bounds=yes --project=.buildkite test/Geometry/mul_with_projection.jl" - group: "Unit: Meshes" steps: diff --git a/benchmarks/3d/se_kernels.jl b/benchmarks/3d/se_kernels.jl index 4e8a613981..cc8032892d 100644 --- a/benchmarks/3d/se_kernels.jl +++ b/benchmarks/3d/se_kernels.jl @@ -13,9 +13,7 @@ import ClimaCore: Spaces, Quadratures, Topologies, - DataLayouts, - RecursiveApply - + DataLayouts const C1 = ClimaCore.Geometry.Covariant1Vector const C2 = ClimaCore.Geometry.Covariant2Vector const C3 = ClimaCore.Geometry.Covariant3Vector @@ -25,8 +23,6 @@ const CT123 = Geometry.Contravariant123Vector const ᶜinterp = Operators.InterpolateF2C() const ᶠinterp = Operators.InterpolateC2F() -const ⊞ = RecursiveApply.radd - init_uθ(ϕ, z, R) = 1.0 / R init_vθ(ϕ, z, R) = 1.0 / R init_w(ϕ, z) = 1.0 diff --git a/benchmarks/bickleyjet/bickleyjet_dg.jl b/benchmarks/bickleyjet/bickleyjet_dg.jl index 2fe4df39a7..052248d4b1 100644 --- a/benchmarks/bickleyjet/bickleyjet_dg.jl +++ b/benchmarks/bickleyjet/bickleyjet_dg.jl @@ -7,9 +7,6 @@ import ClimaCore.Operators using ClimaCore.Geometry import ClimaCore.Geometry: Abstract2DPoint -using ClimaCore.RecursiveApply - - const parameters = ( ϵ = 0.1, # perturbation size for initial condition l = 0.5, # Gaussian width @@ -60,7 +57,7 @@ roe_average(ρ⁻, ρ⁺, var⁻, var⁺) = (sqrt(ρ⁻) * var⁻ + sqrt(ρ⁺) * var⁺) / (sqrt(ρ⁻) + sqrt(ρ⁺)) function roeflux(n, (y⁻, parameters⁻), (y⁺, parameters⁺)) - Favg = RecursiveApply.rdiv(flux(y⁻, parameters⁻) ⊞ flux(y⁺, parameters⁺), 2) + Favg = (flux(y⁻, parameters⁻) + flux(y⁺, parameters⁺)) / 2 λ = sqrt(parameters⁻.g) @@ -115,7 +112,7 @@ function roeflux(n, (y⁻, parameters⁻), (y⁺, parameters⁺)) fluxᵀn_ρθ = ((w1 + w2) * θ + w5) * 0.5 Δf = (ρ = -fluxᵀn_ρ, ρu = -fluxᵀn_ρu, ρθ = -fluxᵀn_ρθ) - RecursiveApply.rmap(f -> f' * n, Favg) ⊞ Δf + return Favg' * n + Δf end function volume!(dydt, y, (parameters,), t) diff --git a/benchmarks/bickleyjet/core_vs_ref.jl b/benchmarks/bickleyjet/core_vs_ref.jl index 3c84bbdf68..a1f57939db 100644 --- a/benchmarks/bickleyjet/core_vs_ref.jl +++ b/benchmarks/bickleyjet/core_vs_ref.jl @@ -30,8 +30,7 @@ for Nq in Nqs volume!(dydt, y0, (parameters,), 0.0) # TODO: move this to volume! dydt_data = Fields.field_values(dydt) - dydt_data .= - RecursiveApply.rdiv.(dydt_data, Spaces.local_geometry_data(space).WJ) + dydt_data ./= Spaces.local_geometry_data(space).WJ # setup reference X = coordinates(Val(Nq), n1, n2) @@ -84,8 +83,7 @@ for Nq in Nqs add_face!(dydt, y0, (parameters,), 0.0) # TODO: move this to volume! dydt_data = Fields.field_values(dydt) - dydt_data .= - RecursiveApply.rdiv.(dydt_data, Spaces.local_geometry_data(space).WJ) + dydt_data ./= Spaces.local_geometry_data(space).WJ fill!(dydt_ref, 0.0) add_face_ref!(dydt_ref, y0_ref, (n1, n2, parameters, Val(Nq)), 0.0) diff --git a/docs/make.jl b/docs/make.jl index f359d168cf..8f57d04fc7 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -100,7 +100,6 @@ withenv("GKSwstype" => "nul") do "Limiters" => "APIs/limiters_api.md", "InputOutput" => "APIs/input_output_api.md", "Remapping" => "APIs/remapping_api.md", - "RecursiveApply" => "APIs/recursive_apply_api.md", "Devices" => "APIs/devices_api.md", "DebugOnly" => "APIs/debug_only_api.md", ], diff --git a/docs/src/APIs/geometry_api.md b/docs/src/APIs/geometry_api.md index 6d7fbeb77f..9d2b1d09f1 100644 --- a/docs/src/APIs/geometry_api.md +++ b/docs/src/APIs/geometry_api.md @@ -21,6 +21,7 @@ Geometry.LocalGeometry ```@docs Geometry.Δz_metric_component +Geometry.:⊗ ``` ## Coordinates diff --git a/docs/src/APIs/recursive_apply_api.md b/docs/src/APIs/recursive_apply_api.md deleted file mode 100644 index ef38087dda..0000000000 --- a/docs/src/APIs/recursive_apply_api.md +++ /dev/null @@ -1,9 +0,0 @@ -# RecursiveApply - -```@meta -CurrentModule = ClimaCore -``` - -```@docs -RecursiveApply -``` diff --git a/docs/src/APIs/utilities_api.md b/docs/src/APIs/utilities_api.md index ac1f0b91c3..201112b1ff 100644 --- a/docs/src/APIs/utilities_api.md +++ b/docs/src/APIs/utilities_api.md @@ -5,11 +5,30 @@ CurrentModule = ClimaCore ``` ```@docs -Utilities.PlusHalf -Utilities.half +Utilities.unionall_type Utilities.replace_type_parameter Utilities.fieldtype_vals Utilities.new +Utilities.unsafe_eltype +Utilities.safe_eltype +``` + +## Utilities.PlusHalf + +```@docs +Utilities.PlusHalf +Utilities.half +``` + +## Utilities.AutoBroadcaster + +```@docs +Utilities.AutoBroadcaster +Utilities.is_auto_broadcastable +Utilities.add_auto_broadcasters +Utilities.drop_auto_broadcasters +Utilities.auto_broadcasted +Utilities.nested_broadcast ``` ## Utilities.Cache diff --git a/docs/src/geometry.md b/docs/src/geometry.md index 483704dfa3..ef5fb3d450 100644 --- a/docs/src/geometry.md +++ b/docs/src/geometry.md @@ -6,7 +6,5 @@ CurrentModule = ClimaCore.Geometry ```@docs mul_with_projection -rmul_with_projection mul_return_type -rmul_return_type ``` diff --git a/docs/src/matrix_fields.md b/docs/src/matrix_fields.md index 6fb0a3136e..c9a90b2f6b 100644 --- a/docs/src/matrix_fields.md +++ b/docs/src/matrix_fields.md @@ -210,15 +210,11 @@ J = MatrixFields.FieldMatrix((@name(f), @name(g))=> ∂f_∂g) ## Optimizations -Each entry of a `FieldMatrix` can be a `ColumnwiseBandMatrixField`, a `DiagonalMatrixRow`, or an -`UniformScaling`. - -A `ColumnwiseBandMatrixField` is a `Field` with a `BandMatrixRow` at each point. It is intended -to represent a collection of banded matrices, where there is one band matrix for each column -of the space the `Field` is on. Beyond only storing the diagonals of the band matrix, an `entry` -can be optimized to use less memory. Each optimized representation can be indexed equivalently to -non optimized representations, and used in addition, subtraction, matrix-vector multiplication, -Matrix-matrix multiplication, `RecursiveApply`, and `FieldMatrixSolver`. +Each entry of a `FieldMatrix` can be a `ColumnwiseBandMatrixField`, a `DiagonalMatrixRow`, or a +`UniformScaling`. A `ColumnwiseBandMatrixField` is a `Field` with a `BandMatrixRow` at each point. +It represents a collection of banded matrices, with each column of the `Field` corresponding to a +specific matrix. If all columns correspond a constant multiple of the identity matrix, the `Field` +may be replaced with a `ScalingFieldMatrixEntry` (i.e., a `DiagonalMatrixRow` or `UniformScaling`). For the following sections, `space` is a column space with $N_v$ levels. A column space is used for simplicity in this example, but the optimizations work with any space with columns. diff --git a/docs/src/operators.md b/docs/src/operators.md index f75470fccc..b3b6a25d8b 100644 --- a/docs/src/operators.md +++ b/docs/src/operators.md @@ -112,8 +112,6 @@ column_accumulate! ## Internal APIs ```@docs -getidx_return_type -stencil_return_type return_eltype return_space stencil_interior_width diff --git a/examples/bickleyjet/bickleyjet_dg.jl b/examples/bickleyjet/bickleyjet_dg.jl index 7b729c1668..052be0b5eb 100644 --- a/examples/bickleyjet/bickleyjet_dg.jl +++ b/examples/bickleyjet/bickleyjet_dg.jl @@ -7,12 +7,10 @@ import ClimaCore: Geometry, Meshes, Operators, - RecursiveApply, Spaces, Quadratures, Topologies import ClimaCore.Geometry: ⊗ -import ClimaCore.RecursiveApply: ⊞, rdiv, rmap using OrdinaryDiffEqSSPRK: ODEProblem, solve, SSPRK33 @@ -106,8 +104,6 @@ roe_average(ρ⁻, ρ⁺, var⁻, var⁺) = (sqrt(ρ⁻) * var⁻ + sqrt(ρ⁺) * var⁺) / (sqrt(ρ⁻) + sqrt(ρ⁺)) function roeflux(n, (y⁻, parameters⁻), (y⁺, parameters⁺)) - Favg = rdiv(flux(y⁻, parameters⁻) ⊞ flux(y⁺, parameters⁺), 2) - λ = sqrt(parameters⁻.g) ρ⁻, ρu⁻, ρθ⁻ = y⁻.ρ, y⁻.ρu, y⁻.ρθ @@ -159,9 +155,11 @@ function roeflux(n, (y⁻, parameters⁻), (y⁺, parameters⁺)) (w1 * (u - c * n) + w2 * (u + c * n) + w3 * u + w4 * (Δu - Δuₙ * n)) * 0.5 fluxᵀn_ρθ = ((w1 + w2) * θ + w5) * 0.5 - Δf = (ρ = -fluxᵀn_ρ, ρu = -fluxᵀn_ρu, ρθ = -fluxᵀn_ρθ) - rmap(f -> f' * n, Favg) ⊞ Δf + + return map(flux(y⁻, parameters⁻), flux(y⁺, parameters⁺), Δf) do F⁻, F⁺, Δf + ((F⁻ + F⁺) / 2)' * n + Δf + end end @@ -205,9 +203,8 @@ function rhs!(dydt, y, (parameters, numflux), t) end # 6. Solve for final result - dydt_data = Fields.field_values(dydt) - dydt_data .= - RecursiveApply.rdiv.(dydt_data, Spaces.local_geometry_data(space).WJ) + dydt_data = + Fields.field_values(dydt) ./ Spaces.local_geometry_data(space).WJ M = Quadratures.cutoff_filter_matrix( Float64, Spaces.quadrature_style(space), diff --git a/examples/hybrid/sphere/deformation_flow.jl b/examples/hybrid/sphere/deformation_flow.jl index 5b8107f5c7..29f0414ac7 100644 --- a/examples/hybrid/sphere/deformation_flow.jl +++ b/examples/hybrid/sphere/deformation_flow.jl @@ -62,16 +62,15 @@ ode_algorithm = ExplicitAlgorithm(SSP33ShuOsher()) const hdiv = Operators.Divergence() const hwdiv = Operators.WeakDivergence() const hgrad = Operators.Gradient() -const If2c = Operators.InterpolateF2C() -const Ic2f = Operators.InterpolateC2F( +const interp = Operators.InterpolateC2F( bottom = Operators.Extrapolate(), top = Operators.Extrapolate(), ) -const ᶠwinterp = Operators.WeightedInterpolateC2F( +const winterp = Operators.WeightedInterpolateC2F( bottom = Operators.Extrapolate(), top = Operators.Extrapolate(), ) -const vdivf2c = Operators.DivergenceF2C( +const vdiv = Operators.DivergenceF2C( top = Operators.SetValue(Geometry.Contravariant3Vector(FT(0))), bottom = Operators.SetValue(Geometry.Contravariant3Vector(FT(0))), ) @@ -97,10 +96,6 @@ const LinVanLeerFlux = Operators.LinVanLeerC2F( top = Operators.FirstOrderOneSided(), constraint = Operators.MonotoneLocalExtrema(), ) -const FCTBorisBook = Operators.FCTBorisBook( - bottom = Operators.FirstOrderOneSided(), - top = Operators.FirstOrderOneSided(), -) # Reference pressure and density p(z) = p_0 * exp(-z / H) @@ -145,75 +140,52 @@ end function horizontal_tendency!(Yₜ, Y, cache, t) (; u, Δₕq) = cache - coord = Fields.coordinate_field(u) + coord = Fields.coordinate_field(Y.c) @. u = local_velocity(coord, t) @. Δₕq = hwdiv(hgrad(Y.c.ρq / Y.c.ρ)) Spaces.weighted_dss!(Δₕq) @. Yₜ.c.ρ = -hdiv(Y.c.ρ * u) - for n in 1:5 # TODO: update RecursiveApply/Operators to eliminate this loop - ρq_n = Y.c.ρq.:($n) - ρqₜ_n = Yₜ.c.ρq.:($n) - @. ρqₜ_n = -hdiv(ρq_n * u) - end - @. Yₜ.c.ρq -= D₄ * hwdiv(Y.c.ρ * hgrad(Δₕq)) + @. Yₜ.c.ρq = -hdiv(Y.c.ρq * u) - D₄ * hwdiv(Y.c.ρ * hgrad(Δₕq)) end function vertical_tendency!(Yₜ, Y, cache, t) - (; q_n, face_u, face_uₕ, face_uᵥ, fct_op, dt) = cache + (; q, face_ρ, face_u, fct_op, dt) = cache + (; J) = Fields.local_geometry_field(Y.c) face_coord = Fields.coordinate_field(face_u) + @. q = Y.c.ρq / Y.c.ρ + @. face_ρ = winterp(J, Y.c.ρ) @. face_u = local_velocity(face_coord, t) - @. face_uₕ = Geometry.project(Geometry.Covariant12Axis(), face_u) - @. face_uᵥ = Geometry.project(Geometry.Covariant3Axis(), face_u) - @. Yₜ.c.ρ = -vdivf2c(Ic2f(Y.c.ρ) * face_u) - ᶜJ = Fields.local_geometry_field(axes(Y.c.ρ)).J - for n in 1:5 # TODO: update RecursiveApply/Operators to eliminate this loop - ρq_n = Y.c.ρq.:($n) - ρqₜ_n = Yₜ.c.ρq.:($n) - @. q_n = ρq_n / Y.c.ρ - @. ρqₜ_n = -vdivf2c(Ic2f(ρq_n) * face_uₕ) - if isnothing(fct_op) - @. ρqₜ_n -= vdivf2c(ᶠwinterp(ᶜJ, Y.c.ρ) * face_uᵥ * Ic2f(q_n)) - elseif fct_op == upwind1 - @. ρqₜ_n -= vdivf2c(ᶠwinterp(ᶜJ, Y.c.ρ) * upwind1(face_uᵥ, q_n)) - elseif fct_op == upwind3 - @. ρqₜ_n -= vdivf2c(ᶠwinterp(ᶜJ, Y.c.ρ) * upwind3(face_uᵥ, q_n)) - elseif fct_op == FCTBorisBook - @. ρqₜ_n -= vdivf2c( - ᶠwinterp(ᶜJ, Y.c.ρ) * ( - upwind1(face_uᵥ, q_n) + FCTBorisBook( - upwind3(face_uᵥ, q_n) - upwind1(face_uᵥ, q_n), - q_n / dt - - vdivf2c(ᶠwinterp(ᶜJ, Y.c.ρ) * upwind1(face_uᵥ, q_n)) / Y.c.ρ, - ) + @. Yₜ.c.ρ = -vdiv(face_ρ * face_u) + if isnothing(fct_op) + @. Yₜ.c.ρq = -vdiv(face_ρ * face_u * interp(q)) + elseif fct_op == upwind1 + @. Yₜ.c.ρq = -vdiv(face_ρ * upwind1(face_u, q)) + elseif fct_op == upwind3 + @. Yₜ.c.ρq = -vdiv(face_ρ * upwind3(face_u, q)) + elseif fct_op == FCTZalesak + @. Yₜ.c.ρq = + -vdiv( + face_ρ * upwind1(face_u, q) + + FCTZalesak( + face_ρ * (upwind3(face_u, q) - upwind1(face_u, q)), + q / dt, + q / dt - vdiv(face_ρ * upwind1(face_u, q)) / Y.c.ρ, ), ) - elseif fct_op == FCTZalesak - @. ρqₜ_n -= vdivf2c( - ᶠwinterp(ᶜJ, Y.c.ρ) * ( - upwind1(face_uᵥ, q_n) + FCTZalesak( - upwind3(face_uᵥ, q_n) - upwind1(face_uᵥ, q_n), - q_n / dt, - q_n / dt - - vdivf2c(ᶠwinterp(ᶜJ, Y.c.ρ) * upwind1(face_uᵥ, q_n)) / Y.c.ρ, - ) + elseif fct_op == SlopeLimitedFlux + @. Yₜ.c.ρq = + -vdiv( + face_ρ * upwind1(face_u, q) + + SlopeLimitedFlux( + face_ρ * (upwind3(face_u, q) - upwind1(face_u, q)), + q / dt, + face_u, ), ) - elseif fct_op == SlopeLimitedFlux - @. ρqₜ_n -= vdivf2c( - ᶠwinterp(ᶜJ, Y.c.ρ) * ( - upwind1(face_uᵥ, q_n) + SlopeLimitedFlux( - upwind3(face_uᵥ, q_n) - upwind1(face_uᵥ, q_n), - q_n / dt, - face_uᵥ, - ) - ), - ) - elseif fct_op == LinVanLeerFlux - @. ρqₜ_n -= - vdivf2c(ᶠwinterp(ᶜJ, Y.c.ρ) * LinVanLeerFlux(face_uᵥ, q_n, dt)) - else - error("unrecognized FCT operator $fct_op") - end + elseif fct_op == LinVanLeerFlux + @. Yₜ.c.ρq = -vdiv(face_ρ * LinVanLeerFlux(face_u, q, dt)) + else + error("unrecognized FCT operator $fct_op") end end @@ -289,11 +261,10 @@ function run_deformation_flow(use_limiter, fct_op, dt) cache = (; u = Fields.Field(Geometry.UVWVector{FT}, cent_space), + q = Fields.Field(NTuple{5, FT}, cent_space), Δₕq = Fields.Field(NTuple{5, FT}, cent_space), - q_n = Fields.Field(FT, cent_space), + face_ρ = Fields.Field(FT, face_space), face_u = Fields.Field(Geometry.UVWVector{FT}, face_space), - face_uₕ = Fields.Field(Geometry.Covariant12Vector{FT}, face_space), - face_uᵥ = Fields.Field(Geometry.Covariant3Vector{FT}, face_space), limiter = use_limiter ? Limiters.QuasiMonotoneLimiter(Y.c.ρq) : nothing, fct_op, dt, @@ -305,12 +276,7 @@ function run_deformation_flow(use_limiter, fct_op, dt) (0, t_end), cache, ) - sol = solve( - problem, - ode_algorithm; - dt, - saveat = collect(0.0:(t_end / 2):t_end), - ) + sol = solve(problem, ode_algorithm; dt) if !(cache.limiter isa Nothing) @show cache.limiter.rtol Limiters.print_convergence_stats(cache.limiter) @@ -318,157 +284,117 @@ function run_deformation_flow(use_limiter, fct_op, dt) return sol end -function conservation_errors(sol) - initial_total_mass = sum(sol.u[1].c.ρ) - initial_tracer_masses = map(n -> sum(sol.u[1].c.ρq.:($n)), 1:5) - final_total_mass = sum(sol.u[end].c.ρ) - final_tracer_masses = map(n -> sum(sol.u[end].c.ρq.:($n)), 1:5) - return ( - (final_total_mass - initial_total_mass) / initial_total_mass, - (final_tracer_masses .- initial_tracer_masses) ./ initial_tracer_masses, - ) +function total_conservation_error(sol) + initial_mass = sum(sol[1].c.ρ) + final_mass = sum(sol[end].c.ρ) + return abs(final_mass - initial_mass) / initial_mass end -# Roughness is measure as a deviation from the mean value -tracer_roughnesses(sol) = - map(1:5) do n - q_n = sol.u[end].c.ρq.:($n) ./ sol.u[end].c.ρ - mean_q_n = mean(q_n) # TODO: replace the mean with a low-pass filter - return mean(abs.(q_n .- mean_q_n)) - end +function tracer_conservation_errors(sol) + initial_masses = sum(sol[1].c.ρq) + final_masses = sum(sol[end].c.ρq) + return abs.(final_masses .- initial_masses) ./ initial_masses +end -tracer_ranges(sol) = - map(1:5) do n - q_n = sol.u[end].c.ρq.:($n) ./ sol.u[end].c.ρ - return maximum(q_n) - minimum(q_n) - end +# Roughness measured as deviation from mean (TODO: use a low-pass filter instead) +function tracer_roughnesses(sol) + final_q = sol[end].c.ρq ./ sol[end].c.ρ + return mean(abs.(final_q .- mean(final_q))) +end + +function tracer_ranges(sol) + final_q = sol[end].c.ρq ./ sol[end].c.ρ + return maximum(final_q) .- minimum(final_q) +end -@info "Slope Limited Solutions" -tvd_sol = run_deformation_flow(false, SlopeLimitedFlux, _dt) -lim_tvd_sol = run_deformation_flow(true, SlopeLimitedFlux, _dt) -@info "vanLeer Flux Solutions" -lvl_sol = run_deformation_flow(false, LinVanLeerFlux, _dt) -lim_lvl_sol = run_deformation_flow(true, LinVanLeerFlux, _dt) -@info "Third-Order Upwind Solutions" -third_upwind_sol = run_deformation_flow(false, upwind3, _dt) -lim_third_upwind_sol = run_deformation_flow(true, upwind3, _dt) -@info "Zalesak Flux-Corrected Transport Solutions" -fct_sol = run_deformation_flow(false, FCTZalesak, _dt) -lim_fct_sol = run_deformation_flow(true, FCTZalesak, _dt) -@info "First-Order Upwind Solutions" -lim_first_upwind_sol = run_deformation_flow(true, upwind1, _dt) -lim_centered_sol = run_deformation_flow(true, nothing, _dt) - -third_upwind_ρ_err, third_upwind_ρq_errs = conservation_errors(third_upwind_sol) -fct_ρ_err, fct_ρq_errs = conservation_errors(fct_sol) -lim_third_upwind_ρ_err, lim_third_upwind_ρq_errs = - conservation_errors(lim_third_upwind_sol) -lim_fct_ρ_err, lim_fct_ρq_errs = conservation_errors(lim_fct_sol) -lim_first_upwind_ρ_err, lim_first_upwind_ρq_errs = - conservation_errors(lim_first_upwind_sol) -lim_centered_ρ_err, lim_centered_ρq_errs = conservation_errors(lim_centered_sol) - -# Check that the conservation errors are not too big. -max_err = 64 * eps(FT) -@test abs(third_upwind_ρ_err) < max_err -@test all(abs.(third_upwind_ρq_errs) .< max_err) -@test all(abs.(fct_ρq_errs) .< max_err) -@test all(abs.(lim_third_upwind_ρq_errs) .< max_err) -@test all(abs.(lim_fct_ρq_errs) .< max_err) -@test all(abs.(lim_first_upwind_ρ_err) .< max_err) -@test all(abs.(lim_centered_ρq_errs) .< max_err) - -# Check that the different upwinding modes with the limiter have no effect on ρ. -@test third_upwind_ρ_err == - fct_ρ_err == - lim_third_upwind_ρ_err == - lim_fct_ρ_err == - lim_first_upwind_ρ_err == - lim_centered_ρ_err - -# Check that the different upwinding modes with the limiter have no effect on the tracer with q = 1, or at -# least no effect up to round-off error. -max_q5_roundoff_err = 2 * eps(FT) -@test third_upwind_ρq_errs[5] ≈ third_upwind_ρ_err atol = max_q5_roundoff_err -@test fct_ρq_errs[5] ≈ third_upwind_ρ_err atol = max_q5_roundoff_err -@test lim_third_upwind_ρq_errs[5] ≈ third_upwind_ρ_err atol = - max_q5_roundoff_err -@test lim_fct_ρq_errs[5] ≈ third_upwind_ρ_err atol = max_q5_roundoff_err -@test lim_first_upwind_ρq_errs[5] ≈ third_upwind_ρ_err atol = - max_q5_roundoff_err -@test lim_centered_ρq_errs[5] ≈ third_upwind_ρ_err atol = max_q5_roundoff_err - -compare_tracer_props(a, b; buffer = 1) = all( - x -> x[1] < x[2] * buffer || (x[1] ≤ 100eps() && x[2] ≤ 100eps()), - zip(a, b), +@info "Centered Differences" +centered_sol_no_lim = run_deformation_flow(false, nothing, _dt) +centered_sol_with_lim = run_deformation_flow(true, nothing, _dt) +@info "First-Order Upwinding" +upwind1_sol_no_lim = run_deformation_flow(false, upwind1, _dt) +upwind1_sol_with_lim = run_deformation_flow(true, upwind1, _dt) +@info "Third-Order Upwinding" +upwind3_sol_no_lim = run_deformation_flow(false, upwind3, _dt) +upwind3_sol_with_lim = run_deformation_flow(true, upwind3, _dt) +@info "Flux-Corrected Transport" +fct_sol_no_lim = run_deformation_flow(false, FCTZalesak, _dt) +fct_sol_with_lim = run_deformation_flow(true, FCTZalesak, _dt) +@info "Slope-Limited Transport" +tvd_sol_no_lim = run_deformation_flow(false, SlopeLimitedFlux, _dt) +tvd_sol_with_lim = run_deformation_flow(true, SlopeLimitedFlux, _dt) +@info "van Leer Transport" +lvl_sol_no_lim = run_deformation_flow(false, LinVanLeerFlux, _dt) +lvl_sol_with_lim = run_deformation_flow(true, LinVanLeerFlux, _dt) + +sols_no_lim = (; + centered = centered_sol_no_lim, + upwind1 = upwind1_sol_no_lim, + upwind3 = upwind3_sol_no_lim, + fct = fct_sol_no_lim, + tvd = tvd_sol_no_lim, + lvl = lvl_sol_no_lim, ) +sols_with_lim = (; + centered = centered_sol_with_lim, + upwind1 = upwind1_sol_with_lim, + upwind3 = upwind3_sol_with_lim, + fct = fct_sol_with_lim, + tvd = tvd_sol_with_lim, + lvl = lvl_sol_with_lim, +) + +ρ_errs_no_lim = map(total_conservation_error, sols_no_lim) +ρ_errs_with_lim = map(total_conservation_error, sols_with_lim) +ρq_errs_no_lim = map(tracer_conservation_errors, sols_no_lim) +ρq_errs_with_lim = map(tracer_conservation_errors, sols_with_lim) +roughnesses_no_lim = map(tracer_roughnesses, sols_no_lim) +roughnesses_with_lim = map(tracer_roughnesses, sols_with_lim) +ranges_no_lim = map(tracer_ranges, sols_no_lim) +ranges_with_lim = map(tracer_ranges, sols_with_lim) + +# Check that upwinding has no effect on total mass. +for ρ_errs_data in (ρ_errs_no_lim, ρ_errs_with_lim), ρ_err in ρ_errs_data + @test ρ_err == ρ_errs_no_lim.centered +end + +# Check that upwinding has no effect on the constant tracer q5, and that the +# other non-constant tracers are all conserved, accounting for round-off errors. +for ρq_errs_data in (ρq_errs_no_lim, ρq_errs_with_lim), ρq_errs in ρq_errs_data + @test ρq_errs[5] ≈ ρ_errs_no_lim.centered atol = eps(FT) + @test all(ρq_errs[1:4] .< 40 * eps(FT)) +end + +# Check that using a limiter improves the "smoothness" of non-constant tracers. +for (no_lim, with_lim) in zip(roughnesses_no_lim, roughnesses_with_lim) + @test all(with_lim[1:4] .< no_lim[1:4] .* 0.9999) +end +for (no_lim, with_lim) in zip(ranges_no_lim, ranges_with_lim) + @test all(with_lim[1:4] .< no_lim[1:4] .* 0.992) +end -# Check that the different upwinding modes with the limiter improve the "smoothness" of the tracers. -#! format: off -@testset "Test tracer properties" begin - @test compare_tracer_props(tracer_roughnesses(fct_sol) , tracer_roughnesses(third_upwind_sol); buffer = 1.0) - @test compare_tracer_props(tracer_roughnesses(lim_third_upwind_sol), tracer_roughnesses(third_upwind_sol); buffer = 1.0) - @test compare_tracer_props(tracer_roughnesses(lim_fct_sol) , tracer_roughnesses(third_upwind_sol); buffer = 0.93) - @test compare_tracer_props(tracer_ranges(fct_sol) , tracer_ranges(third_upwind_sol); buffer = 1.0) - @test compare_tracer_props(tracer_ranges(lim_third_upwind_sol) , tracer_ranges(third_upwind_sol); buffer = 1.2) - @test compare_tracer_props(tracer_ranges(lim_fct_sol) , tracer_ranges(third_upwind_sol); buffer = 1.0) - @test compare_tracer_props(tracer_ranges(lim_first_upwind_sol) , tracer_ranges(third_upwind_sol); buffer = 0.6) - @test compare_tracer_props(tracer_ranges(lim_centered_sol) , tracer_ranges(third_upwind_sol); buffer = 1.3) +# Check that the relative effects of different upwinding schemes are consistent. +for data in (roughnesses_no_lim, roughnesses_with_lim, ranges_no_lim, ranges_with_lim) + @test all((data.upwind1 .< data.tvd .< data.lvl .< data.fct .< data.upwind3)[1:4]) end -#! format: on ENV["GKSwstype"] = "nul" using ClimaCorePlots, Plots Plots.GRBackend() path = joinpath(@__DIR__, "output", "deformation_flow") mkpath(path) -for (sol, suffix) in ( - (lim_centered_sol, "_lim_centered"), - (lim_first_upwind_sol, "_lim_first_upwind"), - (third_upwind_sol, "_third_upwind"), - (fct_sol, "_fct"), - (tvd_sol, "_tvd"), - (lvl_sol, "_lvl"), - (lim_third_upwind_sol, "_lim_third_upwind"), - (lim_fct_sol, "_lim_fct"), - (lim_tvd_sol, "_lim_tvd"), - (lim_lvl_sol, "_lim_lvl"), -) - for (sol_index, day) in ((1, 6), (2, 12)) + +ref_final_q3 = upwind3_sol_with_lim[end].c.ρq.:3 ./ upwind3_sol_with_lim[end].c.ρ +for (lim_suffix, sols) in (("no_lim", sols_no_lim), ("with_lim", sols_with_lim)) + for (name, sol) in pairs(sols) + final_q3 = sol[end].c.ρq.:3 ./ sol[end].c.ρ Plots.png( - Plots.plot( - sol.u[sol_index].c.ρq.:3 ./ sol.u[sol_index].c.ρ, - level = 15, - clim = (-1, 1), - ), - joinpath(path, "q3_day$day$suffix.png"), + Plots.plot(final_q3, level = 15, clim = (-1, 1)), + joinpath(path, "q3_day12_$(name)_$(lim_suffix).png"), ) - end -end - -for (sol, suffix) in ( - (lim_centered_sol, "_lim_centered"), - (lim_first_upwind_sol, "_lim_first_upwind"), - (third_upwind_sol, "_third_upwind"), - (fct_sol, "_fct"), - (tvd_sol, "_tvd"), - (lvl_sol, "_lvl"), - (lim_fct_sol, "_lim_fct"), - (lim_lvl_sol, "_lim_lvl"), -) - for (sol_index, day) in ((1, 6), (2, 12)) + sol === upwind3_sol_with_lim && continue # skip diff plot for reference Plots.png( - Plots.plot( - ( - ((sol.u[sol_index].c.ρq.:3) ./ sol.u[sol_index].c.ρ) .- ( - lim_third_upwind_sol[sol_index].c.ρq.:3 ./ - lim_third_upwind_sol[sol_index].c.ρ - ) - ), - level = 15, - clim = (-1, 1), - ), - joinpath(path, "q3_day_diff_$day$suffix.png"), + Plots.plot(final_q3 .- ref_final_q3, level = 15, clim = (-0.2, 0.2)), + joinpath(path, "q3_diff_day12_$(name)_$(lim_suffix).png"), ) end end diff --git a/ext/ClimaCoreCUDAExt.jl b/ext/ClimaCoreCUDAExt.jl index c30283a099..2c5551b087 100644 --- a/ext/ClimaCoreCUDAExt.jl +++ b/ext/ClimaCoreCUDAExt.jl @@ -15,10 +15,7 @@ import ClimaCore.DataLayouts: mapreduce_cuda import ClimaCore.DataLayouts: ToCUDA import ClimaCore.DataLayouts: NoMask, IJHMask import ClimaCore.DataLayouts: slab, column -import ClimaCore.Utilities: half -import ClimaCore.Utilities: cart_ind, linear_ind -import ClimaCore.RecursiveApply: - ⊠, ⊞, ⊟, radd, rmul, rsub, rdiv, rmap, rzero, rmin, rmax +import ClimaCore.Utilities: half, new, cart_ind, linear_ind import ClimaCore.DataLayouts: get_N, get_Nv, get_Nij, get_Nij, get_Nh import ClimaCore.DataLayouts: UniversalSize diff --git a/ext/cuda/column_matrix_helpers.jl b/ext/cuda/column_matrix_helpers.jl index 443ddb832a..6466a80733 100644 --- a/ext/cuda/column_matrix_helpers.jl +++ b/ext/cuda/column_matrix_helpers.jl @@ -19,15 +19,15 @@ Base.@propagate_inbounds function row_mul_mat!( pd1, pd2 = MatrixFields.outer_diagonals(prod_eltype) li = 1i32 ri = CUDA.blockDim().x - 1i32 - zero_entry = rzero(eltype(prod_eltype)) + zero_entry = zero(eltype(prod_eltype)) prod_entries = UnrolledUtilities.unrolled_map((pd1:pd2...,)) do pd if v + pd < li || v + pd > ri zero_entry else - UnrolledUtilities.unrolled_mapreduce(⊞, (ld1:ud1...,)) do mat1_row_d + UnrolledUtilities.unrolled_mapreduce(+, (ld1:ud1...,)) do mat1_row_d if ld2 <= pd - mat1_row_d <= ud2 && (0i32 < v + mat1_row_d + half <= CUDA.blockDim().x) - @inbounds mat1_row[mat1_row_d] ⊠ + @inbounds mat1_row[mat1_row_d] * matrix2[v + mat1_row_d + half + (i - 1i32) * CUDA.blockDim().x][pd - mat1_row_d] else zero_entry @@ -55,15 +55,15 @@ Base.@propagate_inbounds function row_mul_mat!( pd1, pd2 = MatrixFields.outer_diagonals(prod_eltype) li = 1i32 ri = CUDA.blockDim().x - zero_entry = rzero(eltype(prod_eltype)) + zero_entry = zero(eltype(prod_eltype)) prod_entries = UnrolledUtilities.unrolled_map((pd1:pd2...,)) do pd if v + pd < li || v + pd > ri zero_entry else - UnrolledUtilities.unrolled_mapreduce(⊞, (ld1:ud1...,)) do mat1_row_d + UnrolledUtilities.unrolled_mapreduce(+, (ld1:ud1...,)) do mat1_row_d if ld2 <= pd - mat1_row_d <= ud2 && (0i32 < v + mat1_row_d - half < CUDA.blockDim().x) - @inbounds mat1_row[mat1_row_d] ⊠ + @inbounds mat1_row[mat1_row_d] * matrix2[v + mat1_row_d - half + (i - 1i32) * CUDA.blockDim().x][pd - mat1_row_d] else zero_entry @@ -91,15 +91,15 @@ Base.@propagate_inbounds function row_mul_mat!( pd1, pd2 = MatrixFields.outer_diagonals(prod_eltype) li = 1i32 ri = CUDA.blockDim().x - 1i32 - zero_entry = rzero(eltype(prod_eltype)) + zero_entry = zero(eltype(prod_eltype)) prod_entries = UnrolledUtilities.unrolled_map((pd1:pd2...,)) do pd if v + pd < li || v + pd > ri zero_entry else - UnrolledUtilities.unrolled_mapreduce(⊞, (ld1:ud1...,)) do mat1_row_d + UnrolledUtilities.unrolled_mapreduce(+, (ld1:ud1...,)) do mat1_row_d if ld2 <= pd - mat1_row_d <= ud2 && (0i32 < v + mat1_row_d <= CUDA.blockDim().x - 1i32) - @inbounds mat1_row[mat1_row_d] ⊠ + @inbounds mat1_row[mat1_row_d] * matrix2[v + mat1_row_d + (i - 1i32) * CUDA.blockDim().x][pd - mat1_row_d] else zero_entry @@ -129,14 +129,14 @@ Base.@propagate_inbounds function row_mul_mat!( li = 1i32 ri = CUDA.blockDim().x - zero_entry = rzero(eltype(prod_eltype)) + zero_entry = zero(eltype(prod_eltype)) prod_entries = UnrolledUtilities.unrolled_map((pd1:pd2...,)) do pd if v + pd < li || v + pd > ri zero_entry else - UnrolledUtilities.unrolled_mapreduce(⊞, (ld1:ud1...,)) do mat1_row_d + UnrolledUtilities.unrolled_mapreduce(+, (ld1:ud1...,)) do mat1_row_d if ld2 <= pd - mat1_row_d <= ud2 && (0i32 < v + mat1_row_d <= CUDA.blockDim().x) - @inbounds mat1_row[mat1_row_d] ⊠ + @inbounds mat1_row[mat1_row_d] * matrix2[v + mat1_row_d + (i - 1i32) * CUDA.blockDim().x][pd - mat1_row_d] else zero_entry @@ -164,15 +164,15 @@ Base.@propagate_inbounds function row_mul_mat!( pd1, pd2 = MatrixFields.outer_diagonals(prod_eltype) li = 1i32 ri = CUDA.blockDim().x - zero_entry = rzero(eltype(prod_eltype)) + zero_entry = zero(eltype(prod_eltype)) prod_entries = UnrolledUtilities.unrolled_map((pd1:pd2...,)) do pd if v + pd + half < li || v + pd + half > ri zero_entry else - UnrolledUtilities.unrolled_mapreduce(⊞, (ld1:ud1...,)) do mat1_row_d + UnrolledUtilities.unrolled_mapreduce(+, (ld1:ud1...,)) do mat1_row_d if ld2 <= pd - mat1_row_d <= ud2 && (0i32 < v + mat1_row_d + half <= CUDA.blockDim().x) - @inbounds mat1_row[mat1_row_d] ⊠ + @inbounds mat1_row[mat1_row_d] * matrix2[v + mat1_row_d + half + (i - 1i32) * CUDA.blockDim().x][pd - mat1_row_d] else zero_entry @@ -200,15 +200,15 @@ Base.@propagate_inbounds function row_mul_mat!( pd1, pd2 = MatrixFields.outer_diagonals(prod_eltype) li = 1i32 ri = CUDA.blockDim().x - zero_entry = rzero(eltype(prod_eltype)) + zero_entry = zero(eltype(prod_eltype)) prod_entries = UnrolledUtilities.unrolled_map((pd1:pd2...,)) do pd if v + pd + half < li || v + pd + half > ri zero_entry else - UnrolledUtilities.unrolled_mapreduce(⊞, (ld1:ud1...,)) do mat1_row_d + UnrolledUtilities.unrolled_mapreduce(+, (ld1:ud1...,)) do mat1_row_d if ld2 <= pd - mat1_row_d <= ud2 && (0i32 < v + mat1_row_d - half < CUDA.blockDim().x) - @inbounds mat1_row[mat1_row_d] ⊠ + @inbounds mat1_row[mat1_row_d] * matrix2[v + mat1_row_d - half + (i - 1i32) * CUDA.blockDim().x][pd - mat1_row_d] else zero_entry @@ -236,14 +236,14 @@ Base.@propagate_inbounds function row_mul_mat!( pd1, pd2 = MatrixFields.outer_diagonals(prod_eltype) li = 1i32 ri = CUDA.blockDim().x - zero_entry = rzero(eltype(prod_eltype)) + zero_entry = zero(eltype(prod_eltype)) prod_entries = UnrolledUtilities.unrolled_map((pd1:pd2...,)) do pd if v + pd + half < li || v + pd + half > ri zero_entry else - UnrolledUtilities.unrolled_mapreduce(⊞, (ld1:ud1...,)) do mat1_row_d + UnrolledUtilities.unrolled_mapreduce(+, (ld1:ud1...,)) do mat1_row_d if ld2 <= pd - mat1_row_d <= ud2 && (0i32 < v + mat1_row_d <= CUDA.blockDim().x) - @inbounds mat1_row[mat1_row_d] ⊠ + @inbounds mat1_row[mat1_row_d] * matrix2[v + mat1_row_d + (i - 1i32) * CUDA.blockDim().x][pd - mat1_row_d] else zero_entry @@ -271,14 +271,14 @@ Base.@propagate_inbounds function row_mul_mat!( pd1, pd2 = MatrixFields.outer_diagonals(prod_eltype) li = 1i32 ri = CUDA.blockDim().x - zero_entry = rzero(eltype(prod_eltype)) + zero_entry = zero(eltype(prod_eltype)) prod_entries = UnrolledUtilities.unrolled_map((pd1:pd2...,)) do pd if v + pd + half < li || v + pd + half > ri zero_entry else - UnrolledUtilities.unrolled_mapreduce(⊞, (ld1:ud1...,)) do mat1_row_d + UnrolledUtilities.unrolled_mapreduce(+, (ld1:ud1...,)) do mat1_row_d if ld2 <= pd - mat1_row_d <= ud2 && (0i32 < v + mat1_row_d < CUDA.blockDim().x) - @inbounds mat1_row[mat1_row_d] ⊠ + @inbounds mat1_row[mat1_row_d] * matrix2[v + mat1_row_d + (i - 1i32) * CUDA.blockDim().x][pd - mat1_row_d] else zero_entry @@ -307,9 +307,9 @@ Base.@propagate_inbounds function row_mul_vec!( ld1, ud1 = MatrixFields.outer_diagonals(mat1_eltype) li = 1i32 ri = CUDA.blockDim().x - 1i32 - zero_entry = rzero(prod_eltype) + zero_entry = zero(prod_eltype) return UnrolledUtilities.unrolled_mapreduce( - ⊞, + +, ld1:ud1; init = zero_entry, ) do mat1_row_d @@ -338,9 +338,9 @@ Base.@propagate_inbounds function row_mul_vec!( ld1, ud1 = MatrixFields.outer_diagonals(mat1_eltype) li = 1i32 ri = CUDA.blockDim().x - zero_entry = rzero(prod_eltype) + zero_entry = zero(prod_eltype) return UnrolledUtilities.unrolled_mapreduce( - ⊞, + +, ld1:ud1; init = zero_entry, ) do mat1_row_d @@ -369,9 +369,9 @@ Base.@propagate_inbounds function row_mul_vec!( ld1, ud1 = MatrixFields.outer_diagonals(mat1_eltype) li = 1i32 ri = CUDA.blockDim().x - 1i32 - zero_entry = rzero(prod_eltype) + zero_entry = zero(prod_eltype) return UnrolledUtilities.unrolled_mapreduce( - ⊞, + +, ld1:ud1; init = zero_entry, ) do mat1_row_d @@ -400,9 +400,9 @@ Base.@propagate_inbounds function row_mul_vec!( ld1, ud1 = MatrixFields.outer_diagonals(mat1_eltype) li = 1i32 ri = CUDA.blockDim().x - zero_entry = rzero(prod_eltype) + zero_entry = zero(prod_eltype) return UnrolledUtilities.unrolled_mapreduce( - ⊞, + +, ld1:ud1; init = zero_entry, ) do mat1_row_d @@ -420,23 +420,9 @@ end # Handles multiplication in row_mul_vec!. # Basically rmul, but some operators matrices require special handling # general case -Base.@propagate_inbounds outer_or_mul(x::T1, y::T2) where {T1, T2} = x ⊠ y +Base.@propagate_inbounds outer_or_mul(x::T1, y::T2) where {T1, T2} = x * y # case for grad of a vec Base.@propagate_inbounds outer_or_mul(x::T1, y::T2) where {T1 <: AbstractVector, T2} = x ⊗ y -Base.@propagate_inbounds outer_or_mul( - x::T1, - y::T2, -) where {T1, T2 <: Union{Tuple, NamedTuple}} = - RecursiveApply.rmap(Base.Fix1(outer_or_mul, x), y) -Base.@propagate_inbounds outer_or_mul( - x::T1, - y::T2, -) where {T1 <: Union{Tuple, NamedTuple}, T2 <: Union{Tuple, NamedTuple}} = x ⊠ y -Base.@propagate_inbounds outer_or_mul( - x::T1, - y::T2, -) where {T1 <: AbstractVector, T2 <: Union{Tuple, NamedTuple}} = - RecursiveApply.rmap(Base.Fix1(outer_or_mul, x), y) # case for divgrad of a vec Base.@propagate_inbounds outer_or_mul( x::T1, diff --git a/ext/cuda/limiters.jl b/ext/cuda/limiters.jl index dc20ef5c4b..642dfe40ca 100644 --- a/ext/cuda/limiters.jl +++ b/ext/cuda/limiters.jl @@ -23,8 +23,12 @@ function compute_element_bounds!( ρ, dev::ClimaComms.CUDADevice, ) - ρ_values = Fields.field_values(Operators.strip_space(ρ, axes(ρ))) - ρq_values = Fields.field_values(Operators.strip_space(ρq, axes(ρq))) + ρ_values = Base.broadcastable( + Fields.field_values(Operators.strip_space(ρ, axes(ρ))), + ) + ρq_values = Base.broadcastable( + Fields.field_values(Operators.strip_space(ρq, axes(ρq))), + ) (_, _, _, Nv, Nh) = DataLayouts.universal_size(ρ_values) nthreads, nblocks = config_threadblock(Nv, Nh) @@ -53,13 +57,13 @@ function compute_element_bounds_kernel!(limiter, ρq, ρ) slab_ρ = slab(ρ, v, h) for j in 1:Nj for i in 1:Ni - q = rdiv(slab_ρq[slab_index(i, j)], slab_ρ[slab_index(i, j)]) + q = slab_ρq[slab_index(i, j)] / slab_ρ[slab_index(i, j)] if i == 1 && j == 1 q_min = q q_max = q else - q_min = rmin(q_min, q) - q_max = rmax(q_max, q) + q_min = min(q_min, q) + q_max = max(q_max, q) end end end @@ -107,7 +111,8 @@ function compute_neighbor_bounds_local_kernel!( tidx = thread_index() @inbounds if valid_range(tidx, prod(n)) (v, h) = kernel_indexes(tidx, n).I - (; q_bounds, q_bounds_nbr, ghost_buffer, rtol) = limiter + (; q_bounds_nbr, ghost_buffer, rtol) = limiter + q_bounds = Base.broadcastable(limiter.q_bounds) slab_q_bounds = slab(q_bounds, v, h) q_min = slab_q_bounds[slab_index(1)] q_max = slab_q_bounds[slab_index(2)] @@ -115,8 +120,8 @@ function compute_neighbor_bounds_local_kernel!( local_neighbor_elem_offset[h]:(local_neighbor_elem_offset[h + 1] - 1) h_nbr = local_neighbor_elem[lne] slab_q_bounds = slab(q_bounds, v, h_nbr) - q_min = rmin(q_min, slab_q_bounds[slab_index(1)]) - q_max = rmax(q_max, slab_q_bounds[slab_index(2)]) + q_min = min(q_min, slab_q_bounds[slab_index(1)]) + q_max = max(q_max, slab_q_bounds[slab_index(2)]) end slab_q_bounds_nbr = slab(q_bounds_nbr, v, h) slab_q_bounds_nbr[slab_index(1)] = q_min diff --git a/ext/cuda/matrix_fields_multiple_field_solve.jl b/ext/cuda/matrix_fields_multiple_field_solve.jl index c1bb2abde7..e573f3cc9e 100644 --- a/ext/cuda/matrix_fields_multiple_field_solve.jl +++ b/ext/cuda/matrix_fields_multiple_field_solve.jl @@ -15,8 +15,8 @@ NVTX.@annotate function multiple_field_solve!( x, A, b, - x1, ) + x1 = first(values(x)) names = MatrixFields.matrix_row_keys(keys(A)) Nnames = length(names) Ni, Nj, _, _, Nh = size(Fields.field_values(x1)) @@ -29,13 +29,12 @@ NVTX.@annotate function multiple_field_solve!( xs = map(name -> ssx[name], names) As = map(name -> ssA[name, name], names) bs = map(name -> ssb[name], names) - x1 = first(xs) device = ClimaComms.device(x[first(names)]) us = UniversalSize(Fields.field_values(x1)) cart_inds = cartesian_indices_multiple_field_solve(us; Nnames) - args = (device, caches, xs, As, bs, x1, us, mask, cart_inds, Val(Nnames)) + args = (device, caches, xs, As, bs, us, mask, cart_inds, Val(Nnames)) nitems = Ni * Nj * Nh * Nnames (; threads, blocks) = config_via_occupancy(multiple_field_solve_kernel!, nitems, args) @@ -46,7 +45,7 @@ NVTX.@annotate function multiple_field_solve!( blocks_s = blocks, always_inline = true, ) - call_post_op_callback() && post_op_callback(x, dev, cache, x, A, b, x1) + call_post_op_callback() && post_op_callback(x, dev, cache, x, A, b) end Base.@propagate_inbounds column_A(A::UniformScaling, i, j, h) = A @@ -83,7 +82,6 @@ function multiple_field_solve_kernel!( xs, As, bs, - x1, us::UniversalSize, mask, cart_inds, diff --git a/ext/cuda/matrix_fields_single_field_solve.jl b/ext/cuda/matrix_fields_single_field_solve.jl index b32000f190..fbb2ed5f42 100644 --- a/ext/cuda/matrix_fields_single_field_solve.jl +++ b/ext/cuda/matrix_fields_single_field_solve.jl @@ -11,7 +11,6 @@ import ClimaCore.DataLayouts: vindex import ClimaCore.MatrixFields: single_field_solve! import ClimaCore.MatrixFields: _single_field_solve! import ClimaCore.MatrixFields: band_matrix_solve!, unzip_tuple_field_values -import ClimaCore.RecursiveApply: ⊠, ⊞, ⊟, rmap, rzero, rdiv function single_field_solve!(device::ClimaComms.CUDADevice, cache, x, A, b) Ni, Nj, _, _, Nh = size(Fields.field_values(A)) @@ -73,7 +72,7 @@ function _single_field_solve_diag_matrix_row!( b_data = Fields.field_values(b) Nv = DataLayouts.nlevels(x_data) @inbounds for v in 1:Nv - x_data[vi(v)] = inv(A₀[vi(v)]) ⊠ b_data[vi(v)] + x_data[vi(v)] = inv(A₀[vi(v)]) * b_data[vi(v)] end end @@ -108,7 +107,7 @@ function _single_field_solve!( b_data = Fields.field_values(b) Nv = DataLayouts.nlevels(x_data) @inbounds for v in 1:Nv - x_data[vindex(v)] = inv(A.λ) ⊠ b_data[vindex(v)] + x_data[vindex(v)] = inv(A.λ) * b_data[vindex(v)] end end @@ -121,7 +120,7 @@ function _single_field_solve!( ) x_data = Fields.field_values(x) b_data = Fields.field_values(b) - x_data[] = inv(A.λ) ⊠ b_data[] + x_data[] = inv(A.λ) * b_data[] end using StaticArrays: MArray @@ -207,7 +206,7 @@ function band_matrix_solve_local_mem!( Nv = DataLayouts.nlevels(x) (A₀,) = Aⱼs @inbounds for v in 1:Nv - x[vindex(v)] = inv(A₀[vindex(v)]) ⊠ b[vindex(v)] + x[vindex(v)] = inv(A₀[vindex(v)]) * b[vindex(v)] end return nothing end diff --git a/ext/cuda/operators_fd_eager.jl b/ext/cuda/operators_fd_eager.jl index 60580b05f4..df5c04e9d4 100644 --- a/ext/cuda/operators_fd_eager.jl +++ b/ext/cuda/operators_fd_eager.jl @@ -2,10 +2,9 @@ import ClimaCore: Spaces, Quadratures, Topologies, Operators import Base.Broadcast: Broadcasted import ClimaCore.Fields: Field, field_values, AbstractFieldStyle import ClimaComms -import ClimaCore.Utilities: half +import ClimaCore.Utilities: half, new import ClimaCore.Operators import ClimaCore.Geometry: ⊗, project -import ClimaCore.RecursiveApply: rzero, ⊞, ⊠, rmuladd, rmap import ClimaCore.Operators: StencilBroadcasted, setidx!, getidx, reconstruct_placeholder_space import ClimaCore.MatrixFields: FaceToCenter, CenterToFace, Square, CenterToCenter, @@ -263,7 +262,7 @@ Base.@propagate_inbounds function calc_level_val( CUDA.sync_threads() # if the output is on centers, the CUDA.blockDim().xth thread can just return 0 mat1_space.staggering isa Spaces.CellCenter && v == CUDA.blockDim().x && - return new_struct(eltype(bc)) + return new(eltype(bc)) if mat1_space.staggering isa Spaces.CellCenter mat1_shape = eltype(ClimaCore.MatrixFields.outer_diagonals(typeof(mat1_row))) <: @@ -308,7 +307,7 @@ Base.@propagate_inbounds function calc_level_val( h = blockIdx().z hidx = (i, j, h) if space.staggering isa Spaces.CellCenter - v == CUDA.blockDim().x && return @inline @inbounds new_struct(eltype(bc)) + v == CUDA.blockDim().x && return @inline @inbounds new(eltype(bc)) end li = space.staggering isa Spaces.CellCenter ? 1i32 : half idx = v - 1i32 + li @@ -355,7 +354,7 @@ Base.@propagate_inbounds function calc_level_val( h = blockIdx().z hidx = (i, j, h) if space.staggering isa Spaces.CellCenter - v == CUDA.blockDim().x && return @inline @inbounds new_struct(eltype(bc)) + v == CUDA.blockDim().x && return @inline @inbounds new(eltype(bc)) end li = space.staggering isa Spaces.CellCenter ? 1i32 : half idx = v - 1i32 + li @@ -366,7 +365,7 @@ end calc_level_val(f::Field, space) Returns the value of the field `f` at the thread's index. -When the staggering of `space` is `CellCenter`, the thread with `v == CUDA.blockDim().x` returns `new_struct(eltype(f))` +When the staggering of `space` is `CellCenter`, the thread with `v == CUDA.blockDim().x` returns `new(eltype(f))` """ Base.@propagate_inbounds function calc_level_val( arg::F, @@ -380,7 +379,7 @@ Base.@propagate_inbounds function calc_level_val( if space isa Union{Spaces.ExtrudedFiniteDifferenceSpace, Spaces.FiniteDifferenceSpace} && space.staggering isa Spaces.CellCenter - v == CUDA.blockDim().x && return @inline @inbounds new_struct(eltype(data)) + v == CUDA.blockDim().x && return @inline @inbounds new(eltype(data)) end return @inline @inbounds data[CartesianIndex(i, j, 1i32, v, h)] end @@ -421,7 +420,7 @@ Base.@propagate_inbounds function get_op_row(op, args, space) outputs_to_face = space.staggering isa ClimaCore.Grids.CellFace row_type = @inbounds @inline op_matrix_row_type(op, FT, args[1:(end - 1)]...) if !outputs_to_face && v == CUDA.blockDim().x - return new_struct(row_type) + return new(row_type) end v_half = outputs_to_face ? v - half : v in_left_bnd = Operators.should_call_left_boundary(v_half, space, op, nothing) @@ -484,7 +483,7 @@ Base.@propagate_inbounds function project_row2_for_mul(mat1_row, mat2_row, space project_onto = ClimaCore.Geometry.recursively_find_dual_axes_for_projection(mat1_et) if space.staggering isa Spaces.CellCenter && v == CUDA.blockDim().x - lg = new_struct(Spaces.local_geometry_type(typeof(space))) + lg = new(Spaces.local_geometry_type(typeof(space))) else v_maybe_half = space.staggering isa Spaces.CellFace ? v - half : v @inbounds lg = Geometry.LocalGeometry(space, v_maybe_half, hidx) @@ -503,20 +502,14 @@ end Recursively project `y` onto the axes in `projection_tuple[1]` using the local geometry in `projection_tuple[2]`. """ -Base.@propagate_inbounds recursively_project( - projection_tuple::T, - y::Y, -) where {T, Y <: BandMatrixRow} = map(Base.Fix1(recursively_project, projection_tuple), y) Base.@propagate_inbounds recursively_project(projection_tuple::T, y::Y) where {T, Y} = - rmap(Base.Fix1(recursively_project, projection_tuple), y) + map(Base.Fix1(recursively_project, projection_tuple), y) Base.@propagate_inbounds recursively_project( projection_tuple::T, y::Y, ) where {T, Y <: AxisTensor} = @inbounds @inline project(projection_tuple[1], y, projection_tuple[2]) -@generated new_struct(::Type{T}) where {T} = Expr(:new, :T) - if hasfield(Method, :recursion_relation) dont_limit = (args...) -> true for m in methods(recursively_project) diff --git a/ext/cuda/operators_fd_shmem.jl b/ext/cuda/operators_fd_shmem.jl index da1d3ffdd9..7fb8f3f317 100644 --- a/ext/cuda/operators_fd_shmem.jl +++ b/ext/cuda/operators_fd_shmem.jl @@ -1,8 +1,7 @@ -import ClimaCore: DataLayouts, Spaces, Geometry, RecursiveApply, DataLayouts +import ClimaCore: DataLayouts, Spaces, Geometry, DataLayouts import CUDA import ClimaCore.Operators: return_eltype, get_local_geometry import ClimaCore.Geometry: ⊗ -import ClimaCore.RecursiveApply: ⊟, ⊞ Base.@propagate_inbounds function fd_operator_shmem( space, @@ -75,7 +74,7 @@ Base.@propagate_inbounds function fd_operator_evaluate( if !on_boundary(idx, space, op) Ju³₋ = Ju³[vt] # corresponds to idx - half Ju³₊ = Ju³[vt + 1] # corresponds to idx + half - return (Ju³₊ ⊟ Ju³₋) ⊠ lg.invJ + return (Ju³₊ - Ju³₋) * lg.invJ else bloc = on_left_boundary(idx, space, op) ? @@ -87,7 +86,7 @@ Base.@propagate_inbounds function fd_operator_evaluate( if bc isa Operators.SetValue Ju³₋ = lJu³[1] # corresponds to idx - half Ju³₊ = Ju³[vt + 1] # corresponds to idx + half - return (Ju³₊ ⊟ Ju³₋) ⊠ lg.invJ + return (Ju³₊ - Ju³₋) * lg.invJ else # @assert bc isa Operators.SetDivergence return lJu³[1] @@ -97,7 +96,7 @@ Base.@propagate_inbounds function fd_operator_evaluate( if bc isa Operators.SetValue Ju³₋ = Ju³[vt] # corresponds to idx - half Ju³₊ = rJu³[1] # corresponds to idx + half - return (Ju³₊ ⊟ Ju³₋) ⊠ lg.invJ + return (Ju³₊ - Ju³₋) * lg.invJ else @assert bc isa Operators.SetDivergence return rJu³[1] @@ -174,7 +173,7 @@ Base.@propagate_inbounds function fd_operator_evaluate( if !on_boundary(idx, space, op) u₋ = u[vt - 1] # corresponds to idx - half u₊ = u[vt] # corresponds to idx + half - return u₊ ⊟ u₋ + return u₊ - u₋ else bloc = on_left_boundary(idx, space, op) ? @@ -186,14 +185,14 @@ Base.@propagate_inbounds function fd_operator_evaluate( if bc isa Operators.SetValue u₋ = 2 * lb[1] # corresponds to idx - half u₊ = 2 * u[vt] # corresponds to idx + half - return u₊ ⊟ u₋ + return u₊ - u₋ end else @assert on_right_boundary(idx, space) if bc isa Operators.SetValue u₋ = 2 * u[vt - 1] # corresponds to idx - half u₊ = 2 * rb[1] # corresponds to idx + half - return u₊ ⊟ u₋ + return u₊ - u₋ end end end @@ -273,7 +272,7 @@ Base.@propagate_inbounds function fd_operator_evaluate( if !on_boundary(idx, space, op) u₋ = u[ᶜidx - 1] # corresponds to idx - half u₊ = u[ᶜidx] # corresponds to idx + half - return RecursiveApply.rdiv(u₊ ⊞ u₋, 2) + return (u₊ + u₋) / 2 else bloc = on_left_boundary(idx, space, op) ? @@ -289,7 +288,7 @@ Base.@propagate_inbounds function fd_operator_evaluate( elseif bc isa Operators.SetGradient u₋ = lb[1] # corresponds to idx - half u₊ = u[ᶜidx] # corresponds to idx + half - return u₊ ⊟ RecursiveApply.rdiv(u₋, 2) + return u₊ - u₋ / 2 else @assert bc isa Operators.Extrapolate return u[ᶜidx] @@ -301,7 +300,7 @@ Base.@propagate_inbounds function fd_operator_evaluate( elseif bc isa Operators.SetGradient u₋ = u[ᶜidx - 1] # corresponds to idx - half u₊ = rb[1] # corresponds to idx + half - return u₋ ⊞ RecursiveApply.rdiv(u₊, 2) + return u₋ + u₊ / 2 else @assert bc isa Operators.Extrapolate return u[ᶜidx - 1] diff --git a/ext/cuda/operators_fd_shmem_common.jl b/ext/cuda/operators_fd_shmem_common.jl index 6aded04ead..cb978a084e 100644 --- a/ext/cuda/operators_fd_shmem_common.jl +++ b/ext/cuda/operators_fd_shmem_common.jl @@ -1,4 +1,4 @@ -import ClimaCore: DataLayouts, Spaces, Geometry, RecursiveApply, DataLayouts +import ClimaCore: DataLayouts, Spaces, Geometry, DataLayouts import CUDA import ClimaCore.Operators: return_eltype, get_local_geometry import ClimaCore.Operators: getidx @@ -9,18 +9,11 @@ import ClimaCore.Utilities ##### Boundary helpers ##### -@inline has_left_boundary(space, op) = - Operators.has_boundary(op, Operators.left_boundary_window(space)) -@inline has_right_boundary(space, op) = - Operators.has_boundary(op, Operators.right_boundary_window(space)) - @inline on_boundary(idx, space, op) = on_left_boundary(idx, space, op) || on_right_boundary(idx, space, op) -@inline on_left_boundary(idx, space, op) = - has_left_boundary(space, op) && on_left_boundary(idx, space) -@inline on_right_boundary(idx, space, op) = - has_right_boundary(space, op) && on_right_boundary(idx, space) +@inline on_left_boundary(idx, space, op) = on_left_boundary(idx, space) +@inline on_right_boundary(idx, space, op) = on_right_boundary(idx, space) @inline on_boundary(idx::PlusHalf, space) = idx == Operators.left_face_boundary_idx(space) || @@ -40,8 +33,7 @@ import ClimaCore.Utilities idx == Operators.right_center_boundary_idx(space) @inline on_any_boundary(idx, space, op) = - (has_left_boundary(space, op) && on_left_boundary(idx, space)) || - has_right_boundary(space, op) && on_right_boundary(idx, space) + on_left_boundary(idx, space) || on_right_boundary(idx, space) @inline function is_out_of_bounds(idx::Integer, space) ᶜspace = Spaces.center_space(space) diff --git a/ext/cuda/operators_sem_shmem.jl b/ext/cuda/operators_sem_shmem.jl index 5a5a17ef03..6e8f308931 100644 --- a/ext/cuda/operators_sem_shmem.jl +++ b/ext/cuda/operators_sem_shmem.jl @@ -1,4 +1,4 @@ -import ClimaCore: DataLayouts, Spaces, Geometry, RecursiveApply, DataLayouts +import ClimaCore: DataLayouts, Spaces, Geometry, DataLayouts import CUDA import ClimaCore.Operators: Divergence, @@ -51,11 +51,8 @@ Base.@propagate_inbounds function operator_fill_shmem!( vt = threadIdx().z local_geometry = get_local_geometry(space, ij, slabidx) i, _ = ij.I - Jv¹[i, vt] = - local_geometry.J ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant1(v, local_geometry), - arg, - ) + (; J) = local_geometry + Jv¹[i, vt] = J * Geometry.contravariant1(arg, local_geometry) end Base.@propagate_inbounds function operator_fill_shmem!( op::Divergence{(1, 2)}, @@ -68,17 +65,9 @@ Base.@propagate_inbounds function operator_fill_shmem!( vt = threadIdx().z local_geometry = get_local_geometry(space, ij, slabidx) i, j = ij.I - - Jv¹[i, j, vt] = - local_geometry.J ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant1(v, local_geometry), - arg, - ) - Jv²[i, j, vt] = - local_geometry.J ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant2(v, local_geometry), - arg, - ) + (; J) = local_geometry + Jv¹[i, j, vt] = J * Geometry.contravariant1(arg, local_geometry) + Jv²[i, j, vt] = J * Geometry.contravariant2(arg, local_geometry) end Base.@propagate_inbounds function operator_shmem( @@ -122,11 +111,8 @@ Base.@propagate_inbounds function operator_fill_shmem!( vt = threadIdx().z local_geometry = get_local_geometry(space, ij, slabidx) i, _ = ij.I - WJv¹[i, vt] = - local_geometry.WJ ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant1(v, local_geometry), - arg, - ) + (; WJ) = local_geometry + WJv¹[i, vt] = WJ * Geometry.contravariant1(arg, local_geometry) end Base.@propagate_inbounds function operator_fill_shmem!( op::WeakDivergence{(1, 2)}, @@ -139,17 +125,9 @@ Base.@propagate_inbounds function operator_fill_shmem!( vt = threadIdx().z local_geometry = get_local_geometry(space, ij, slabidx) i, j = ij.I - - WJv¹[i, j, vt] = - local_geometry.WJ ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant1(v, local_geometry), - arg, - ) - WJv²[i, j, vt] = - local_geometry.WJ ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant2(v, local_geometry), - arg, - ) + (; WJ) = local_geometry + WJv¹[i, j, vt] = WJ * Geometry.contravariant1(arg, local_geometry) + WJv²[i, j, vt] = WJ * Geometry.contravariant2(arg, local_geometry) end Base.@propagate_inbounds function operator_shmem( @@ -198,11 +176,8 @@ Base.@propagate_inbounds function operator_fill_shmem!( vt = threadIdx().z local_geometry = get_local_geometry(space, ij, slabidx) i, _ = ij.I - Ju1[i, vt] = - local_geometry.J ⊠ RecursiveApply.rmap( - u -> Geometry.contravariant1(u, local_geometry), - arg1, - ) + (; J) = local_geometry + Ju1[i, vt] = J * Geometry.contravariant1(arg1, local_geometry) psi[i, vt] = arg2 end @@ -218,16 +193,9 @@ Base.@propagate_inbounds function operator_fill_shmem!( vt = threadIdx().z local_geometry = get_local_geometry(space, ij, slabidx) i, j = ij.I - Ju1[i, j, vt] = - local_geometry.J ⊠ RecursiveApply.rmap( - u -> Geometry.contravariant1(u, local_geometry), - arg1, - ) - Ju2[i, j, vt] = - local_geometry.J ⊠ RecursiveApply.rmap( - u -> Geometry.contravariant2(u, local_geometry), - arg1, - ) + (; J) = local_geometry + Ju1[i, j, vt] = J * Geometry.contravariant1(arg1, local_geometry) + Ju2[i, j, vt] = J * Geometry.contravariant2(arg1, local_geometry) psi[i, j, vt] = arg2 end @@ -318,7 +286,7 @@ Base.@propagate_inbounds function operator_fill_shmem!( local_geometry = get_local_geometry(space, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ i, _ = ij.I - Wf[i, vt] = W ⊠ arg + Wf[i, vt] = W * arg end Base.@propagate_inbounds function operator_fill_shmem!( op::WeakGradient{(1, 2)}, @@ -332,7 +300,7 @@ Base.@propagate_inbounds function operator_fill_shmem!( local_geometry = get_local_geometry(space, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ i, j = ij.I - Wf[i, j, vt] = W ⊠ arg + Wf[i, j, vt] = W * arg end Base.@propagate_inbounds function operator_shmem( @@ -515,14 +483,14 @@ Base.@propagate_inbounds function operator_fill_shmem!( RT = operator_return_eltype(op, typeof(arg)) if RT <: Geometry.Contravariant3Vector _, Wv₂ = work - Wv₂[i, vt] = W ⊠ Geometry.covariant2(arg, local_geometry) + Wv₂[i, vt] = W * Geometry.covariant2(arg, local_geometry) elseif RT <: Geometry.Contravariant2Vector (Wv₃,) = work - Wv₃[i, vt] = W ⊠ Geometry.covariant3(arg, local_geometry) + Wv₃[i, vt] = W * Geometry.covariant3(arg, local_geometry) else _, Wv₂, Wv₃ = work - Wv₂[i, vt] = W ⊠ Geometry.covariant2(arg, local_geometry) - Wv₃[i, vt] = W ⊠ Geometry.covariant3(arg, local_geometry) + Wv₂[i, vt] = W * Geometry.covariant2(arg, local_geometry) + Wv₃[i, vt] = W * Geometry.covariant3(arg, local_geometry) end end Base.@propagate_inbounds function operator_fill_shmem!( @@ -540,15 +508,15 @@ Base.@propagate_inbounds function operator_fill_shmem!( RT = operator_return_eltype(op, typeof(arg)) if RT <: Geometry.Contravariant3Vector Wv₁, Wv₂ = work - Wv₁[i, j, vt] = W ⊠ Geometry.covariant1(arg, local_geometry) - Wv₂[i, j, vt] = W ⊠ Geometry.covariant2(arg, local_geometry) + Wv₁[i, j, vt] = W * Geometry.covariant1(arg, local_geometry) + Wv₂[i, j, vt] = W * Geometry.covariant2(arg, local_geometry) elseif RT <: Geometry.Contravariant12Vector (Wv₃,) = work - Wv₃[i, j, vt] = W ⊠ Geometry.covariant3(arg, local_geometry) + Wv₃[i, j, vt] = W * Geometry.covariant3(arg, local_geometry) else Wv₁, Wv₂, Wv₃ = work - Wv₁[i, j, vt] = W ⊠ Geometry.covariant1(arg, local_geometry) - Wv₂[i, j, vt] = W ⊠ Geometry.covariant2(arg, local_geometry) - Wv₃[i, j, vt] = W ⊠ Geometry.covariant3(arg, local_geometry) + Wv₁[i, j, vt] = W * Geometry.covariant1(arg, local_geometry) + Wv₂[i, j, vt] = W * Geometry.covariant2(arg, local_geometry) + Wv₃[i, j, vt] = W * Geometry.covariant3(arg, local_geometry) end end diff --git a/ext/cuda/operators_spectral_element.jl b/ext/cuda/operators_spectral_element.jl index a849f76f06..4988793a84 100644 --- a/ext/cuda/operators_spectral_element.jl +++ b/ext/cuda/operators_spectral_element.jl @@ -1,5 +1,5 @@ import ClimaCore: Spaces, Quadratures, Topologies -import ClimaCore: Operators, Geometry, Quadratures, RecursiveApply +import ClimaCore: Operators, Geometry, Quadratures import ClimaComms using CUDA import ClimaCore.Operators: AbstractSpectralStyle, strip_space @@ -198,11 +198,11 @@ Base.@propagate_inbounds function operator_evaluate( local_geometry = get_local_geometry(space, ij, slabidx) - DJv = D[i, 1] ⊠ Jv¹[1, vt] + DJv = D[i, 1] * Jv¹[1, vt] for k in 2:Nq - DJv = DJv ⊞ D[i, k] ⊠ Jv¹[k, vt] + DJv += D[i, k] * Jv¹[k, vt] end - return RecursiveApply.rmul(DJv, local_geometry.invJ) + return DJv * local_geometry.invJ end Base.@propagate_inbounds function operator_evaluate( op::Divergence{(1, 2)}, @@ -221,14 +221,14 @@ Base.@propagate_inbounds function operator_evaluate( local_geometry = get_local_geometry(space, ij, slabidx) - DJv = D[i, 1] ⊠ Jv¹[1, j, vt] + DJv = D[i, 1] * Jv¹[1, j, vt] for k in 2:Nq - DJv = DJv ⊞ D[i, k] ⊠ Jv¹[k, j, vt] + DJv += D[i, k] * Jv¹[k, j, vt] end for k in 1:Nq - DJv = DJv ⊞ D[j, k] ⊠ Jv²[i, k, vt] + DJv += D[j, k] * Jv²[i, k, vt] end - return RecursiveApply.rmul(DJv, local_geometry.invJ) + return DJv * local_geometry.invJ end Base.@propagate_inbounds function operator_evaluate( @@ -248,11 +248,11 @@ Base.@propagate_inbounds function operator_evaluate( local_geometry = get_local_geometry(space, ij, slabidx) - Dᵀ₁WJv¹ = D[1, i] ⊠ WJv¹[1, vt] + Dᵀ₁WJv¹ = D[1, i] * WJv¹[1, vt] for k in 2:Nq - Dᵀ₁WJv¹ = Dᵀ₁WJv¹ ⊞ D[k, i] ⊠ WJv¹[k, vt] + Dᵀ₁WJv¹ += D[k, i] * WJv¹[k, vt] end - return ⊟(RecursiveApply.rdiv(Dᵀ₁WJv¹, local_geometry.WJ)) + return -Dᵀ₁WJv¹ / local_geometry.WJ end Base.@propagate_inbounds function operator_evaluate( op::WeakDivergence{(1, 2)}, @@ -271,13 +271,13 @@ Base.@propagate_inbounds function operator_evaluate( local_geometry = get_local_geometry(space, ij, slabidx) - Dᵀ₁WJv¹ = D[1, i] ⊠ WJv¹[1, j, vt] - Dᵀ₂WJv² = D[1, j] ⊠ WJv²[i, 1, vt] + Dᵀ₁WJv¹ = D[1, i] * WJv¹[1, j, vt] + Dᵀ₂WJv² = D[1, j] * WJv²[i, 1, vt] for k in 2:Nq - Dᵀ₁WJv¹ = Dᵀ₁WJv¹ ⊞ D[k, i] ⊠ WJv¹[k, j, vt] - Dᵀ₂WJv² = Dᵀ₂WJv² ⊞ D[k, j] ⊠ WJv²[i, k, vt] + Dᵀ₁WJv¹ += D[k, i] * WJv¹[k, j, vt] + Dᵀ₂WJv² += D[k, j] * WJv²[i, k, vt] end - return ⊟(RecursiveApply.rdiv(Dᵀ₁WJv¹ ⊞ Dᵀ₂WJv², local_geometry.WJ)) + return -(Dᵀ₁WJv¹ + Dᵀ₂WJv²) / local_geometry.WJ end Base.@propagate_inbounds function operator_evaluate( @@ -294,20 +294,17 @@ Base.@propagate_inbounds function operator_evaluate( QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) - RT = Geometry.rmul_return_type(eltype(Ju1), eltype(psi)) + RT = Geometry.mul_return_type(eltype(Ju1), eltype(psi)) local_geometry = get_local_geometry(space, ij, slabidx) result = zero(RT) for j in 1:Nq j == i && continue - F1 = RecursiveApply.rdiv( - (Ju1[i, vt] ⊞ Ju1[j, vt]) ⊠ (psi[i, vt] ⊞ psi[j, vt]), - 2, - ) - result = result ⊞ D[i, j] ⊠ F1 + result += + D[i, j] * (Ju1[i, vt] + Ju1[j, vt]) * (psi[i, vt] + psi[j, vt]) / 2 end - return result ⊠ local_geometry.invJ + return result * local_geometry.invJ end Base.@propagate_inbounds function operator_evaluate( op::SplitDivergence{(1, 2)}, @@ -323,28 +320,24 @@ Base.@propagate_inbounds function operator_evaluate( QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) - RT = Geometry.rmul_return_type(eltype(Ju1), eltype(psi)) + RT = Geometry.mul_return_type(eltype(Ju1), eltype(psi)) local_geometry = get_local_geometry(space, ij, slabidx) result = zero(RT) for k in 1:Nq k == i && continue - F1 = RecursiveApply.rdiv( - (Ju1[i, j, vt] ⊞ Ju1[k, j, vt]) ⊠ (psi[i, j, vt] ⊞ psi[k, j, vt]), - 2, - ) - result = result ⊞ D[i, k] ⊠ F1 + result += + D[i, k] * + (Ju1[i, j, vt] + Ju1[k, j, vt]) * (psi[i, j, vt] + psi[k, j, vt]) / 2 end for k in 1:Nq k == j && continue - F2 = RecursiveApply.rdiv( - (Ju2[i, j, vt] ⊞ Ju2[i, k, vt]) ⊠ (psi[i, j, vt] ⊞ psi[i, k, vt]), - 2, - ) - result = result ⊞ D[j, k] ⊠ F2 + result += + D[j, k] * + (Ju2[i, j, vt] + Ju2[i, k, vt]) * (psi[i, j, vt] + psi[i, k, vt]) / 2 end - return result ⊠ local_geometry.invJ + return result * local_geometry.invJ end Base.@propagate_inbounds function operator_evaluate( @@ -363,9 +356,9 @@ Base.@propagate_inbounds function operator_evaluate( D = Quadratures.differentiation_matrix(FT, QS) @inbounds begin - ∂f∂ξ₁ = D[i, 1] ⊠ input[1, vt] + ∂f∂ξ₁ = D[i, 1] * input[1, vt] for k in 2:Nq - ∂f∂ξ₁ = ∂f∂ξ₁ ⊞ D[i, k] ⊠ input[k, vt] + ∂f∂ξ₁ += D[i, k] * input[k, vt] end end if eltype(input) <: Number @@ -394,11 +387,11 @@ Base.@propagate_inbounds function operator_evaluate( D = Quadratures.differentiation_matrix(FT, QS) @inbounds begin - ∂f∂ξ₁ = D[i, 1] ⊠ input[1, j, vt] - ∂f∂ξ₂ = D[j, 1] ⊠ input[i, 1, vt] + ∂f∂ξ₁ = D[i, 1] * input[1, j, vt] + ∂f∂ξ₂ = D[j, 1] * input[i, 1, vt] for k in 2:Nq - ∂f∂ξ₁ = ∂f∂ξ₁ ⊞ D[i, k] ⊠ input[k, j, vt] - ∂f∂ξ₂ = ∂f∂ξ₂ ⊞ D[j, k] ⊠ input[i, k, vt] + ∂f∂ξ₁ += D[i, k] * input[k, j, vt] + ∂f∂ξ₂ += D[j, k] * input[i, k, vt] end end if eltype(input) <: Number @@ -431,11 +424,11 @@ Base.@propagate_inbounds function operator_evaluate( local_geometry = get_local_geometry(space, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ - Dᵀ₁Wf = D[1, i] ⊠ Wf[1, vt] + Dᵀ₁Wf = D[1, i] * Wf[1, vt] for k in 2:Nq - Dᵀ₁Wf = Dᵀ₁Wf ⊞ D[k, i] ⊠ Wf[k, vt] + Dᵀ₁Wf += D[k, i] * Wf[k, vt] end - return Geometry.Covariant1Vector(⊟(RecursiveApply.rdiv(Dᵀ₁Wf, W))) + return Geometry.Covariant1Vector(-Dᵀ₁Wf) / W end Base.@propagate_inbounds function operator_evaluate( op::WeakGradient{(1, 2)}, @@ -455,16 +448,13 @@ Base.@propagate_inbounds function operator_evaluate( local_geometry = get_local_geometry(space, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ - Dᵀ₁Wf = D[1, i] ⊠ Wf[1, j, vt] - Dᵀ₂Wf = D[1, j] ⊠ Wf[i, 1, vt] + Dᵀ₁Wf = D[1, i] * Wf[1, j, vt] + Dᵀ₂Wf = D[1, j] * Wf[i, 1, vt] for k in 2:Nq - Dᵀ₁Wf = Dᵀ₁Wf ⊞ D[k, i] ⊠ Wf[k, j, vt] - Dᵀ₂Wf = Dᵀ₂Wf ⊞ D[k, j] ⊠ Wf[i, k, vt] + Dᵀ₁Wf += D[k, i] * Wf[k, j, vt] + Dᵀ₂Wf += D[k, j] * Wf[i, k, vt] end - return Geometry.Covariant12Vector( - ⊟(RecursiveApply.rdiv(Dᵀ₁Wf, W)), - ⊟(RecursiveApply.rdiv(Dᵀ₂Wf, W)), - ) + return Geometry.Covariant12Vector(-Dᵀ₁Wf, -Dᵀ₂Wf) / W end Base.@propagate_inbounds function operator_evaluate( @@ -485,35 +475,29 @@ Base.@propagate_inbounds function operator_evaluate( if length(work) == 2 _, v₂ = work - D₁v₂ = D[i, 1] ⊠ v₂[1, vt] + D₁v₂ = D[i, 1] * v₂[1, vt] for k in 2:Nq - D₁v₂ = D₁v₂ ⊞ D[i, k] ⊠ v₂[k, vt] + D₁v₂ += D[i, k] * v₂[k, vt] end - return Geometry.Contravariant3Vector( - RecursiveApply.rmul(D₁v₂, local_geometry.invJ), - ) + result = Geometry.Contravariant3Vector(D₁v₂) elseif length(work) == 1 (v₃,) = work - D₁v₃ = D[i, 1] ⊠ v₃[1, vt] + D₁v₃ = D[i, 1] * v₃[1, vt] for k in 2:Nq - D₁v₃ = D₁v₃ ⊞ D[i, k] ⊠ v₃[k, vt] + D₁v₃ += D[i, k] * v₃[k, vt] end - return Geometry.Contravariant2Vector( - ⊟(RecursiveApply.rmul(D₁v₃, local_geometry.invJ)), - ) + result = Geometry.Contravariant2Vector(-D₁v₃) else _, v₂, v₃ = work - D₁v₂ = D[i, 1] ⊠ v₂[1, vt] - D₁v₃ = D[i, 1] ⊠ v₃[1, vt] + D₁v₂ = D[i, 1] * v₂[1, vt] + D₁v₃ = D[i, 1] * v₃[1, vt] @simd for k in 2:Nq - D₁v₂ = D₁v₂ ⊞ D[i, k] ⊠ v₂[k, vt] - D₁v₃ = D₁v₃ ⊞ D[i, k] ⊠ v₃[k, vt] + D₁v₂ += D[i, k] * v₂[k, vt] + D₁v₃ += D[i, k] * v₃[k, vt] end - return Geometry.Contravariant23Vector( - ⊟(RecursiveApply.rmul(D₁v₃, local_geometry.invJ)), - RecursiveApply.rmul(D₁v₂, local_geometry.invJ), - ) + result = Geometry.Contravariant23Vector(-D₁v₃, D₁v₂) end + return result * local_geometry.invJ end Base.@propagate_inbounds function operator_evaluate( op::Curl{(1, 2)}, @@ -533,45 +517,37 @@ Base.@propagate_inbounds function operator_evaluate( if length(work) == 2 v₁, v₂ = work - D₁v₂ = D[i, 1] ⊠ v₂[1, j, vt] - D₂v₁ = D[j, 1] ⊠ v₁[i, 1, vt] + D₁v₂ = D[i, 1] * v₂[1, j, vt] + D₂v₁ = D[j, 1] * v₁[i, 1, vt] for k in 2:Nq - D₁v₂ = D₁v₂ ⊞ D[i, k] ⊠ v₂[k, j, vt] - D₂v₁ = D₂v₁ ⊞ D[j, k] ⊠ v₁[i, k, vt] + D₁v₂ += D[i, k] * v₂[k, j, vt] + D₂v₁ += D[j, k] * v₁[i, k, vt] end - return Geometry.Contravariant3Vector( - RecursiveApply.rmul(D₁v₂ ⊟ D₂v₁, local_geometry.invJ), - ) + result = Geometry.Contravariant3Vector(D₁v₂ - D₂v₁) elseif length(work) == 1 (v₃,) = work - D₁v₃ = D[i, 1] ⊠ v₃[1, j, vt] - D₂v₃ = D[j, 1] ⊠ v₃[i, 1, vt] + D₁v₃ = D[i, 1] * v₃[1, j, vt] + D₂v₃ = D[j, 1] * v₃[i, 1, vt] for k in 2:Nq - D₁v₃ = D₁v₃ ⊞ D[i, k] ⊠ v₃[k, j, vt] - D₂v₃ = D₂v₃ ⊞ D[j, k] ⊠ v₃[i, k, vt] + D₁v₃ += D[i, k] * v₃[k, j, vt] + D₂v₃ += D[j, k] * v₃[i, k, vt] end - return Geometry.Contravariant12Vector( - RecursiveApply.rmul(D₂v₃, local_geometry.invJ), - ⊟(RecursiveApply.rmul(D₁v₃, local_geometry.invJ)), - ) + result = Geometry.Contravariant12Vector(D₂v₃, -D₁v₃) else v₁, v₂, v₃ = work - D₁v₂ = D[i, 1] ⊠ v₂[1, j, vt] - D₂v₁ = D[j, 1] ⊠ v₁[i, 1, vt] - D₁v₃ = D[i, 1] ⊠ v₃[1, j, vt] - D₂v₃ = D[j, 1] ⊠ v₃[i, 1, vt] + D₁v₂ = D[i, 1] * v₂[1, j, vt] + D₂v₁ = D[j, 1] * v₁[i, 1, vt] + D₁v₃ = D[i, 1] * v₃[1, j, vt] + D₂v₃ = D[j, 1] * v₃[i, 1, vt] @simd for k in 2:Nq - D₁v₂ = D₁v₂ ⊞ D[i, k] ⊠ v₂[k, j, vt] - D₂v₁ = D₂v₁ ⊞ D[j, k] ⊠ v₁[i, k, vt] - D₁v₃ = D₁v₃ ⊞ D[i, k] ⊠ v₃[k, j, vt] - D₂v₃ = D₂v₃ ⊞ D[j, k] ⊠ v₃[i, k, vt] + D₁v₂ += D[i, k] * v₂[k, j, vt] + D₂v₁ += D[j, k] * v₁[i, k, vt] + D₁v₃ += D[i, k] * v₃[k, j, vt] + D₂v₃ += D[j, k] * v₃[i, k, vt] end - return Geometry.Contravariant123Vector( - RecursiveApply.rmul(D₂v₃, local_geometry.invJ), - ⊟(RecursiveApply.rmul(D₁v₃, local_geometry.invJ)), - RecursiveApply.rmul(D₁v₂ ⊟ D₂v₁, local_geometry.invJ), - ) + result = Geometry.Contravariant123Vector(D₂v₃, -D₁v₃, D₁v₂ - D₂v₁) end + return result * local_geometry.invJ end Base.@propagate_inbounds function operator_evaluate( @@ -592,35 +568,29 @@ Base.@propagate_inbounds function operator_evaluate( if length(work) == 2 _, Wv₂ = work - Dᵀ₁Wv₂ = D[1, i] ⊠ Wv₂[1, vt] + Dᵀ₁Wv₂ = D[1, i] * Wv₂[1, vt] for k in 2:Nq - Dᵀ₁Wv₂ = Dᵀ₁Wv₂ ⊞ D[k, i] ⊠ Wv₂[k, vt] + Dᵀ₁Wv₂ += D[k, i] * Wv₂[k, vt] end - return Geometry.Contravariant3Vector( - RecursiveApply.rdiv(⊟(Dᵀ₁Wv₂), local_geometry.WJ), - ) + result = Geometry.Contravariant3Vector(-Dᵀ₁Wv₂) elseif length(work) == 1 (Wv₃,) = work - Dᵀ₁Wv₃ = D[1, i] ⊠ Wv₃[1, vt] + Dᵀ₁Wv₃ = D[1, i] * Wv₃[1, vt] for k in 2:Nq - Dᵀ₁Wv₃ = Dᵀ₁Wv₃ ⊞ D[k, i] ⊠ Wv₃[k, vt] + Dᵀ₁Wv₃ += D[k, i] * Wv₃[k, vt] end - return Geometry.Contravariant2Vector( - RecursiveApply.rdiv(Dᵀ₁Wv₃, local_geometry.WJ), - ) + result = Geometry.Contravariant2Vector(Dᵀ₁Wv₃) else _, Wv₂, Wv₃ = work - Dᵀ₁Wv₂ = D[1, i] ⊠ Wv₂[1, vt] - Dᵀ₁Wv₃ = D[1, i] ⊠ Wv₃[1, vt] + Dᵀ₁Wv₂ = D[1, i] * Wv₂[1, vt] + Dᵀ₁Wv₃ = D[1, i] * Wv₃[1, vt] @simd for k in 2:Nq - Dᵀ₁Wv₂ = Dᵀ₁Wv₂ ⊞ D[k, i] ⊠ Wv₂[k, vt] - Dᵀ₁Wv₃ = Dᵀ₁Wv₃ ⊞ D[k, i] ⊠ Wv₃[k, vt] + Dᵀ₁Wv₂ += D[k, i] * Wv₂[k, vt] + Dᵀ₁Wv₃ += D[k, i] * Wv₃[k, vt] end - return Geometry.Contravariant23Vector( - RecursiveApply.rdiv(Dᵀ₁Wv₃, local_geometry.WJ), - RecursiveApply.rdiv(⊟(Dᵀ₁Wv₂), local_geometry.WJ), - ) + result = Geometry.Contravariant23Vector(Dᵀ₁Wv₃, -Dᵀ₁Wv₂) end + return result / local_geometry.WJ end Base.@propagate_inbounds function operator_evaluate( op::WeakCurl{(1, 2)}, @@ -640,43 +610,35 @@ Base.@propagate_inbounds function operator_evaluate( if length(work) == 2 Wv₁, Wv₂ = work - Dᵀ₁Wv₂ = D[1, i] ⊠ Wv₂[1, j, vt] - Dᵀ₂Wv₁ = D[1, j] ⊠ Wv₁[i, 1, vt] + Dᵀ₁Wv₂ = D[1, i] * Wv₂[1, j, vt] + Dᵀ₂Wv₁ = D[1, j] * Wv₁[i, 1, vt] for k in 2:Nq - Dᵀ₁Wv₂ = Dᵀ₁Wv₂ ⊞ D[k, i] ⊠ Wv₂[k, j, vt] - Dᵀ₂Wv₁ = Dᵀ₂Wv₁ ⊞ D[k, j] ⊠ Wv₁[i, k, vt] + Dᵀ₁Wv₂ += D[k, i] * Wv₂[k, j, vt] + Dᵀ₂Wv₁ += D[k, j] * Wv₁[i, k, vt] end - return Geometry.Contravariant3Vector( - RecursiveApply.rdiv(Dᵀ₂Wv₁ ⊟ Dᵀ₁Wv₂, local_geometry.WJ), - ) + result = Geometry.Contravariant3Vector(Dᵀ₂Wv₁ - Dᵀ₁Wv₂) elseif length(work) == 1 (Wv₃,) = work - Dᵀ₁Wv₃ = D[1, i] ⊠ Wv₃[1, j, vt] - Dᵀ₂Wv₃ = D[1, j] ⊠ Wv₃[i, 1, vt] + Dᵀ₁Wv₃ = D[1, i] * Wv₃[1, j, vt] + Dᵀ₂Wv₃ = D[1, j] * Wv₃[i, 1, vt] for k in 2:Nq - Dᵀ₁Wv₃ = Dᵀ₁Wv₃ ⊞ D[k, i] ⊠ Wv₃[k, j, vt] - Dᵀ₂Wv₃ = Dᵀ₂Wv₃ ⊞ D[k, j] ⊠ Wv₃[i, k, vt] + Dᵀ₁Wv₃ += D[k, i] * Wv₃[k, j, vt] + Dᵀ₂Wv₃ += D[k, j] * Wv₃[i, k, vt] end - return Geometry.Contravariant12Vector( - ⊟(RecursiveApply.rdiv(Dᵀ₂Wv₃, local_geometry.WJ)), - RecursiveApply.rdiv(Dᵀ₁Wv₃, local_geometry.WJ), - ) + result = Geometry.Contravariant12Vector(-Dᵀ₂Wv₃, Dᵀ₁Wv₃) else Wv₁, Wv₂, Wv₃ = work - Dᵀ₁Wv₂ = D[1, i] ⊠ Wv₂[1, j, vt] - Dᵀ₂Wv₁ = D[1, j] ⊠ Wv₁[i, 1, vt] - Dᵀ₁Wv₃ = D[1, i] ⊠ Wv₃[1, j, vt] - Dᵀ₂Wv₃ = D[1, j] ⊠ Wv₃[i, 1, vt] + Dᵀ₁Wv₂ = D[1, i] * Wv₂[1, j, vt] + Dᵀ₂Wv₁ = D[1, j] * Wv₁[i, 1, vt] + Dᵀ₁Wv₃ = D[1, i] * Wv₃[1, j, vt] + Dᵀ₂Wv₃ = D[1, j] * Wv₃[i, 1, vt] @simd for k in 2:Nq - Dᵀ₁Wv₂ = Dᵀ₁Wv₂ ⊞ D[k, i] ⊠ Wv₂[k, j, vt] - Dᵀ₂Wv₁ = Dᵀ₂Wv₁ ⊞ D[k, j] ⊠ Wv₁[i, k, vt] - Dᵀ₁Wv₃ = Dᵀ₁Wv₃ ⊞ D[k, i] ⊠ Wv₃[k, j, vt] - Dᵀ₂Wv₃ = Dᵀ₂Wv₃ ⊞ D[k, j] ⊠ Wv₃[i, k, vt] + Dᵀ₁Wv₂ += D[k, i] * Wv₂[k, j, vt] + Dᵀ₂Wv₁ += D[k, j] * Wv₁[i, k, vt] + Dᵀ₁Wv₃ += D[k, i] * Wv₃[k, j, vt] + Dᵀ₂Wv₃ += D[k, j] * Wv₃[i, k, vt] end - return Geometry.Contravariant123Vector( - ⊟(RecursiveApply.rdiv(Dᵀ₂Wv₃, local_geometry.WJ)), - RecursiveApply.rdiv(Dᵀ₁Wv₃, local_geometry.WJ), - RecursiveApply.rdiv(Dᵀ₂Wv₁ ⊟ Dᵀ₁Wv₂, local_geometry.WJ), - ) + result = Geometry.Contravariant123Vector(-Dᵀ₂Wv₃, Dᵀ₁Wv₃, Dᵀ₂Wv₁ - Dᵀ₁Wv₂) end + return result / local_geometry.WJ end diff --git a/ext/cuda/topologies_dss.jl b/ext/cuda/topologies_dss.jl index 25dd5caa89..c02070e43a 100644 --- a/ext/cuda/topologies_dss.jl +++ b/ext/cuda/topologies_dss.jl @@ -262,8 +262,7 @@ function dss_transform_kernel!( local_geometry[loc], dss_weights[loc], ) - perimeter_data[CI(p, 1, 1, level, elem)] = - Topologies.drop_vert_dim(eltype(perimeter_data), src) + perimeter_data[CI(p, 1, 1, level, elem)] = src end return nothing end @@ -595,7 +594,7 @@ function Topologies.dss_1d!( nitems = Nv * nfaces threads = _max_threads_cuda() p = linear_partition(nitems, threads) - args = (data, local_geometry, dss_weights, nfaces) + args = (Base.broadcastable(data), local_geometry, dss_weights, nfaces) auto_launch!( dss_1d_kernel!, args; @@ -621,7 +620,7 @@ function dss_1d_kernel!(data, local_geometry, dss_weights, nfaces) local_geometry, dss_weights, left_idx, - ) ⊞ Topologies.dss_transform( + ) + Topologies.dss_transform( data, local_geometry, dss_weights, diff --git a/src/ClimaCore.jl b/src/ClimaCore.jl index 01dd82287f..761dfbc782 100644 --- a/src/ClimaCore.jl +++ b/src/ClimaCore.jl @@ -8,7 +8,7 @@ include("DebugOnly/DebugOnly.jl") include("Utilities/Utilities.jl") include("interface.jl") include("devices.jl") -include("RecursiveApply/RecursiveApply.jl") +include("recursive_apply.jl") include("DataLayouts/DataLayouts.jl") include("Geometry/Geometry.jl") include("Domains/Domains.jl") diff --git a/src/DataLayouts/DataLayouts.jl b/src/DataLayouts/DataLayouts.jl index 81f07429fb..807e40a73e 100644 --- a/src/DataLayouts/DataLayouts.jl +++ b/src/DataLayouts/DataLayouts.jl @@ -73,8 +73,9 @@ using UnrolledUtilities import ..Utilities.Unrolled: unrolled_setindex, unrolled_insert, unrolled_map_with_inbounds -import ..Utilities: - PlusHalf, unionall_type, replace_type_parameter, fieldtype_vals +import ..Utilities: PlusHalf, unionall_type, replace_type_parameter +import ..Utilities: fieldtype_vals, safe_eltype, unsafe_eltype, auto_broadcasted +import ..Utilities: add_auto_broadcasters, drop_auto_broadcasters import ..DebugOnly: call_post_op_callback, post_op_callback import ..slab, ..slab_args, ..column, ..column_args, ..level, ..level_args export slab, @@ -1616,6 +1617,10 @@ rebuild(data::AbstractData, ::Type{DA}) where {DA} = Base.copy(data::AbstractData) = union_all(singleton(data)){type_params(data)...}(copy(parent(data))) +Base.reinterpret(::Type{S}, data::AbstractData{S}) where {S} = data +Base.reinterpret(::Type{S}, data::AbstractData) where {S} = + union_all(singleton(data)){S, type_params(data)[2:end]...}(parent(data)) + # broadcast machinery include("non_extruded_broadcasted.jl") include("broadcast.jl") diff --git a/src/DataLayouts/broadcast.jl b/src/DataLayouts/broadcast.jl index a43f205148..bdbb2ac01e 100644 --- a/src/DataLayouts/broadcast.jl +++ b/src/DataLayouts/broadcast.jl @@ -338,7 +338,22 @@ Base.Broadcast.BroadcastStyle( ) where {Nv, Nij, A1, A2} = VIJHFStyle{Nv, Nij, promote_parent_array_type(A1, A2)}() -Base.Broadcast.broadcastable(data::AbstractData) = data +# Enable automatic nested broadcasting over supported types of iterators, in +# addition to the standard broadcasting over array indices. +Base.Broadcast.broadcastable(data::AbstractData) = + reinterpret(add_auto_broadcasters(eltype(data)), data) +Base.Broadcast.broadcasted(style::DataStyle, f::F, args...) where {F} = + auto_broadcasted(style, f, args) + +Base.eltype(bc::Base.Broadcast.Broadcasted{<:DataStyle}) = unsafe_eltype(bc) + +# Remove all AutoBroadcaster wrappers when allocating a new AbstractData. +Base.similar(bc::Base.Broadcast.Broadcasted{<:DataStyle}) = + similar(bc, drop_auto_broadcasters(safe_eltype(bc))) + +# Only allocate a new AbstractData if its concrete element type can be inferred. +Base.copy(bc::Base.Broadcast.Broadcasted{<:DataStyle}) = + copyto!(similar(bc), bc) Base.@propagate_inbounds function slab( bc::Base.Broadcast.Broadcasted{DS}, diff --git a/src/DataLayouts/mapreduce.jl b/src/DataLayouts/mapreduce.jl index 648a23aa76..111ef558c2 100644 --- a/src/DataLayouts/mapreduce.jl +++ b/src/DataLayouts/mapreduce.jl @@ -1,18 +1,22 @@ # This is only defined for testing. function mapreduce_cuda end -function Base.mapreduce( +Base.mapreduce( + fn::F, + op::Op, + data::Union{AbstractData, Base.Broadcast.Broadcasted{<:DataStyle}}, +) where {F, Op} = + drop_auto_broadcasters(mapreduce_data(fn, op, Base.broadcastable(data))) + +function mapreduce_data( fn::F, op::Op, bc::BroadcastedUnionDataF{<:Any, A}, ) where {F, Op, A} - mapreduce(op, 1) do v - Base.@_inline_meta - @inbounds fn(bc[]) - end + @inbounds fn(bc[]) end -function Base.mapreduce( +function mapreduce_data( fn::F, op::Op, bc::Union{ @@ -25,11 +29,11 @@ function Base.mapreduce( mapreduce(op, 1:Nh) do h Base.@_inline_meta slabview = @inbounds slab(bc, h) - mapreduce(fn, op, slabview) + mapreduce_data(fn, op, slabview) end end -function Base.mapreduce( +function mapreduce_data( fn::F, op::Op, bc::Union{ @@ -42,11 +46,15 @@ function Base.mapreduce( mapreduce(op, 1:Nh) do h Base.@_inline_meta slabview = @inbounds slab(bc, h) - mapreduce(fn, op, slabview) + mapreduce_data(fn, op, slabview) end end -function Base.mapreduce(fn::F, op::Op, bc::IJF{S, Nij}) where {F, Op, S, Nij} +function mapreduce_data( + fn::F, + op::Op, + bc::BroadcastedUnionIJF{<:Any, Nij, A}, +) where {F, Op, Nij, A} # mapreduce across DataSlab2D nodes mapreduce(op, Iterators.product(1:Nij, 1:Nij)) do (i, j) Base.@_inline_meta @@ -56,7 +64,11 @@ function Base.mapreduce(fn::F, op::Op, bc::IJF{S, Nij}) where {F, Op, S, Nij} end end -function Base.mapreduce(fn::F, op::Op, bc::IF{S, Ni}) where {F, Op, S, Ni} +function mapreduce_data( + fn::F, + op::Op, + bc::BroadcastedUnionIF{<:Any, Ni, A}, +) where {F, Op, Ni, A} # mapreduce across DataSlab1D nodes mapreduce(op, 1:Ni) do i Base.@_inline_meta @@ -66,7 +78,7 @@ function Base.mapreduce(fn::F, op::Op, bc::IF{S, Ni}) where {F, Op, S, Ni} end end -function Base.mapreduce( +function mapreduce_data( fn::F, op::Op, bc::BroadcastedUnionVF{<:Any, Nv, A}, @@ -80,7 +92,7 @@ function Base.mapreduce( end end -function Base.mapreduce( +function mapreduce_data( fn::F, op::Op, bc::Union{ @@ -93,11 +105,11 @@ function Base.mapreduce( mapreduce(op, Iterators.product(1:Ni, 1:Nh)) do (i, h) Base.@_inline_meta columnview = @inbounds column(bc, i, h) - mapreduce(fn, op, columnview) + mapreduce_data(fn, op, columnview) end end -function Base.mapreduce( +function mapreduce_data( fn::F, op::Op, bc::Union{ @@ -110,6 +122,6 @@ function Base.mapreduce( mapreduce(op, Iterators.product(1:Nij, 1:Nij, 1:Nh)) do (i, j, h) Base.@_inline_meta columnview = @inbounds column(bc, i, j, h) - mapreduce(fn, op, columnview) + mapreduce_data(fn, op, columnview) end end diff --git a/src/Fields/Fields.jl b/src/Fields/Fields.jl index 6f2b6759a8..efeb824844 100644 --- a/src/Fields/Fields.jl +++ b/src/Fields/Fields.jl @@ -25,14 +25,14 @@ import ..Spaces: nlevels, ncolumns import ..Spaces: get_mask, set_mask! import ..DataLayouts: AbstractMask import ..Geometry: Geometry, Cartesian12Vector -import ..Utilities: PlusHalf, half +import ..Utilities: PlusHalf, half, safe_eltype, unsafe_eltype +import ..Utilities: drop_auto_broadcasters, auto_broadcasted -using ..RecursiveApply +using UnrolledUtilities using ClimaComms import Adapt -import UnrolledUtilities: unrolled_map, unrolled_mapreduce, unrolled_findfirst, unrolled_all -import StaticArrays, LinearAlgebra, Statistics, InteractiveUtils +import StaticArrays, LinearAlgebra, Statistics """ Field(values, space) diff --git a/src/Fields/broadcast.jl b/src/Fields/broadcast.jl index 011c425510..5ae30ac6b1 100644 --- a/src/Fields/broadcast.jl +++ b/src/Fields/broadcast.jl @@ -43,6 +43,23 @@ Base.Broadcast.BroadcastStyle( ::FieldStyle{DS2}, ) where {DS1, DS2} = FieldStyle(Base.Broadcast.BroadcastStyle(DS1(), DS2())) +""" + FieldConflict + +Analogue of the built-in `Broadcast.ArrayConflict` for Fields. Used in place of +`Broadcast.Unknown` to call `Broadcast.broadcasted(::AbstractFieldStyle, ...)`. +Without this broadcast style, such `broadcasted` methods would need more complex +definitions that specialize on argument types, rather than just the style type. +""" +struct FieldConflict <: AbstractFieldStyle end + +Base.Broadcast.result_join( + ::AbstractFieldStyle, + ::AbstractFieldStyle, + ::Base.Broadcast.Unknown, + ::Base.Broadcast.Unknown, +) = FieldConflict() + # Override the recursive unrolling used in combine_styles (which can lead to # inference failures in broadcast expressions with more than 10 arguments) with # manual unrolling (which can have higher latency but is always inferrable). @@ -57,67 +74,22 @@ Base.Broadcast.combine_styles( (arg1, arg2, arg3, args...), ) -Base.Broadcast.broadcastable(field::Field) = field +# Define broadcastable/broadcasted/eltype/similar/copy to match DataStyle +# broadcasting, but with the application of a mask when copying +Base.Broadcast.broadcastable(field::Field) = + Field(Base.Broadcast.broadcastable(field_values(field)), axes(field)) + +Base.Broadcast.broadcasted(style::AbstractFieldStyle, f::F, args...) where {F} = + auto_broadcasted(style, f, args) Base.eltype(bc::Base.Broadcast.Broadcasted{<:AbstractFieldStyle}) = - Base.Broadcast.combine_eltypes(bc.f, bc.args) - -# _first: recursively get the first element -function _first end - -# If we haven't caught the datatype, then this -# may just result in a method error-- but all -# we're trying to do is throw a more helpful -# error message. So, let's throw it here instead. -_first(bc, ::Any) = throw(BroadcastInferenceError(bc)) -_first_data_layout(data::DataLayouts.VF) = data[CartesianIndex(1, 1, 1, 1, 1)] -_first_data_layout(data::DataLayouts.DataF) = data[] -_first(bc, x::Real) = x -_first(bc, x::Geometry.LocalGeometry) = x -_first(bc, data::DataLayouts.VF) = data[] -_first(bc, field::Field) = - _first_data_layout(field_values(column(field, 1, 1, 1))) -_first(bc, space::Spaces.AbstractSpace) = - _first_data_layout(field_values(column(space, 1, 1, 1))) -_first(bc, x::Base.Broadcast.Broadcasted) = _first(bc, copy(x)) -_first(bc, x::Ref{T}) where {T} = x.x -_first(bc, x::Tuple{T}) where {T} = x[1] - -function call_with_first(bc) - # Try calling with first applied to all arguments: - bc′ = Base.Broadcast.preprocess(nothing, bc) - first_args = map(arg -> _first(bc, arg), bc′.args) - bc.f(first_args...) -end - -# we implement our own to avoid the type-widening code, and throw a more useful error -struct BroadcastInferenceError <: Exception - bc::Base.Broadcast.Broadcasted -end - -function Base.showerror(io::IO, err::BroadcastInferenceError) - print(io, "BroadcastInferenceError: cannot infer eltype.\n") - bc = err.bc - f = bc.f - eltypes = map(eltype, bc.args) - if !hasmethod(f, eltypes) - print(io, " function $(f) does not have a method for $(eltypes)") - else - InteractiveUtils.code_warntype(io, f, eltypes) - end -end + unsafe_eltype(bc) -function Base.copy( - bc::Base.Broadcast.Broadcasted{Style}, -) where {Style <: AbstractFieldStyle} - ElType = eltype(bc) - if !Base.isconcretetype(ElType) - call_with_first(bc) - throw(BroadcastInferenceError(bc)) - end - # We can trust it and defer to the simpler `copyto!` - return copyto!(similar(bc, ElType), bc, Spaces.get_mask(axes(bc))) -end +Base.similar(bc::Base.Broadcast.Broadcasted{<:AbstractFieldStyle}) = + similar(bc, drop_auto_broadcasters(safe_eltype(bc))) + +Base.copy(bc::Base.Broadcast.Broadcasted{<:AbstractFieldStyle}) = + copyto!(similar(bc), bc, Spaces.get_mask(axes(bc))) Base.@propagate_inbounds function slab( bc::Base.Broadcast.Broadcasted{Style}, @@ -216,15 +188,15 @@ Base.axes(bc::Base.Broadcast.Broadcasted{<:AbstractFieldStyle}) = _axes(bc, ::Nothing) = Base.Broadcast.combine_axes(bc.args...) _axes(bc, axes) = axes -function Base.similar( +Base.similar( bc::Base.Broadcast.Broadcasted{<:AbstractFieldStyle}, ::Type{Eltype}, -) where {Eltype} - return Field(similar(todata(bc), Eltype), axes(bc)) -end +) where {Eltype} = Field(Eltype, axes(bc)) -Base.similar(bc::Base.Broadcast.Broadcasted{<:AbstractFieldStyle}) = - Base.similar(bc, eltype(bc)) +Base.similar( + bc::Base.Broadcast.Broadcasted{<:FieldStyle}, + ::Type{Eltype}, +) where {Eltype} = Field(similar(todata(bc), Eltype), axes(bc)) @inline function Base.copyto!( dest::Field, @@ -360,6 +332,7 @@ end return nothing end +# By default, broadcasted Vals are put in Refs, leading to type instabilities Base.Broadcast.broadcasted( ::typeof(Base.literal_pow), ::typeof(^), @@ -367,27 +340,6 @@ Base.Broadcast.broadcasted( ::Val{n}, ) where {n} = Base.Broadcast.broadcasted(x -> Base.literal_pow(^, x, Val(n)), f) -# Specialize handling of +, *, muladd, so that we can support broadcasting over NamedTuple element types -# Required for ODE solvers - -Base.Broadcast.broadcasted(fs::AbstractFieldStyle, ::typeof(+), args...) = - Base.Broadcast.broadcasted(fs, RecursiveApply.:⊞, args...) - -Base.Broadcast.broadcasted(fs::AbstractFieldStyle, ::typeof(-), args...) = - Base.Broadcast.broadcasted(fs, RecursiveApply.:⊟, args...) - -Base.Broadcast.broadcasted(fs::AbstractFieldStyle, ::typeof(*), args...) = - Base.Broadcast.broadcasted(fs, RecursiveApply.:⊠, args...) - -Base.Broadcast.broadcasted(fs::AbstractFieldStyle, ::typeof(/), args...) = - Base.Broadcast.broadcasted(fs, RecursiveApply.rdiv, args...) - -Base.Broadcast.broadcasted(fs::AbstractFieldStyle, ::typeof(muladd), args...) = - Base.Broadcast.broadcasted(fs, RecursiveApply.rmuladd, args...) - -Base.Broadcast.broadcasted(fs::AbstractFieldStyle, ::typeof(zero), arg) = - Base.Broadcast.broadcasted(fs, RecursiveApply.rzero, arg) - # Specialize handling of vector-based functions to automatically add LocalGeometry information function Base.Broadcast.broadcasted( fs::AbstractFieldStyle, diff --git a/src/Fields/mapreduce.jl b/src/Fields/mapreduce.jl index 1197d5902e..0ad71e4ac1 100644 --- a/src/Fields/mapreduce.jl +++ b/src/Fields/mapreduce.jl @@ -1,4 +1,5 @@ -Base.map(fn, fields::Field...) = Base.broadcast(fn, fields...) +Base.map(fn, field::Field, fields::Field...) = + Base.broadcast(fn, field, fields...) Base.map!(fn, dest::Field, fields::Field...) = Base.broadcast!(fn, dest, fields...) @@ -14,10 +15,9 @@ function local_sum( field::Union{Field, Base.Broadcast.Broadcasted{<:FieldStyle}}, dev::ClimaComms.AbstractCPUDevice, ) - result = Base.reduce( - RecursiveApply.radd, + result = Base.sum( Base.Broadcast.broadcasted( - RecursiveApply.rmul, + *, Spaces.weighted_jacobian(axes(field)), todata(field), ), @@ -122,7 +122,7 @@ function Statistics.mean( DataLayouts.DataF((local_sum(field), Spaces.local_area(space))) ClimaComms.allreduce!(context, parent(data_combined), +) sum_v, area_v = data_combined[] - RecursiveApply.rdiv(sum_v, area_v) + return sum_v ./ area_v end Statistics.mean(fn, field::Field, ::ClimaComms.AbstractCPUDevice) = Statistics.mean(Base.Broadcast.broadcasted(fn, field)) diff --git a/src/Geometry/Geometry.jl b/src/Geometry/Geometry.jl index 453f676f27..27238c3385 100644 --- a/src/Geometry/Geometry.jl +++ b/src/Geometry/Geometry.jl @@ -1,6 +1,5 @@ module Geometry -using ..RecursiveApply import LinearAlgebra import UnrolledUtilities: unrolled_findfirst @@ -22,7 +21,8 @@ include("axistensors.jl") include("localgeometry.jl") include("conversions.jl") include("globalgeometry.jl") -include("rmul_with_projection.jl") +include("mul_with_projection.jl") +include("auto_broadcaster_methods.jl") """ Δz_metric_component(::Type{<:AbstractPoint}) diff --git a/src/Geometry/auto_broadcaster_methods.jl b/src/Geometry/auto_broadcaster_methods.jl new file mode 100644 index 0000000000..d75856a7d5 --- /dev/null +++ b/src/Geometry/auto_broadcaster_methods.jl @@ -0,0 +1,48 @@ +import ..Utilities: + AutoBroadcaster, nested_broadcast, nested_broadcast_result_type + +# TODO: Avoid defining these methods by refactoring the Geometry module so that +# all relevant functionality is expressed in terms of standard math operations + +(::Type{T})(x::AutoBroadcaster) where {T <: AxisTensor} = nested_broadcast(T, x) + +for f in (:covariant, :contravariant), n in (1, 2, 3) + @eval $(Symbol(f, n))(x::AutoBroadcaster, lg) = + nested_broadcast(Base.Fix2($(Symbol(f, n)), lg), x) +end +Jcontravariant3(x::AutoBroadcaster, lg) = + nested_broadcast(Base.Fix2(Jcontravariant3, lg), x) + +mul_with_projection(x::AutoBroadcaster, y::AutoBroadcaster, lg) = + nested_broadcast((x, y) -> mul_with_projection(x, y, lg), x, y) +mul_with_projection(x::AutoBroadcaster, y, lg) = + nested_broadcast(x -> mul_with_projection(x, y, lg), x) +mul_with_projection(x, y::AutoBroadcaster, lg) = + nested_broadcast(y -> mul_with_projection(x, y, lg), y) + +needs_projection( + ::Type{X}, + ::Type{Y}, +) where {X <: AutoBroadcaster, Y <: AutoBroadcaster} = + needs_projection(eltype(X), eltype(Y)) +needs_projection(::Type{X}, ::Type{Y}) where {X <: AutoBroadcaster, Y} = + needs_projection(eltype(X), Y) +needs_projection(::Type{X}, ::Type{Y}) where {X, Y <: AutoBroadcaster} = + needs_projection(X, eltype(Y)) + +mul_return_type( + ::Type{X}, + ::Type{Y}, +) where {X <: AutoBroadcaster, Y <: AutoBroadcaster} = + nested_broadcast_result_type(mul_return_type, X, Y) +mul_return_type(::Type{X}, ::Type{Y}) where {X <: AutoBroadcaster, Y} = + nested_broadcast_result_type(Base.Fix2(mul_return_type, Y), X) +mul_return_type(::Type{X}, ::Type{Y}) where {X, Y <: AutoBroadcaster} = + nested_broadcast_result_type(Base.Fix1(mul_return_type, X), Y) + +divergence_result_type(::Type{X}) where {X <: AutoBroadcaster} = + nested_broadcast_result_type(divergence_result_type, X) +gradient_result_type(val, ::Type{X}) where {X <: AutoBroadcaster} = + nested_broadcast_result_type(Base.Fix1(gradient_result_type, val), X) +curl_result_type(val, ::Type{X}) where {X <: AutoBroadcaster} = + nested_broadcast_result_type(Base.Fix1(curl_result_type, val), X) diff --git a/src/Geometry/axistensors.jl b/src/Geometry/axistensors.jl index 32f051af2d..c072d555c5 100644 --- a/src/Geometry/axistensors.jl +++ b/src/Geometry/axistensors.jl @@ -519,11 +519,9 @@ end end """ - outer(x, y) x ⊗ y -Compute the outer product of `x` and `y`. Typically `x` will be a vector, and -`y` can be either a number, vector or tuple/named tuple. +Shorthand for the outer product `x * y'`. ```julia # vector ⊗ scalar = vector @@ -537,21 +535,7 @@ julia> [1.0,2.0] ⊗ [1.0,3.0] 2×2 Matrix{Float64}: 1.0 3.0 2.0 6.0 - -# vector ⊗ tuple = recursion -julia> [1.0,2.0] ⊗ (1.0, (a=2.0, b=3.0)) -([1.0, 2.0], (a = [2.0, 4.0], b = [3.0, 6.0])) ``` """ -function outer end -const ⊗ = outer - -@inline function outer(x::AbstractVector, y::AbstractVector) - x * y' -end -@inline function outer(x::AbstractVector, y::Number) - x * y -end -@inline function outer(x::AbstractVector, y) - RecursiveApply.rmap(y -> x ⊗ y, y) -end +⊗(x, y) = x * y' +const outer = ⊗ # For backwards compatibility with previous versions of ClimaCore diff --git a/src/Geometry/rmul_with_projection.jl b/src/Geometry/mul_with_projection.jl similarity index 63% rename from src/Geometry/rmul_with_projection.jl rename to src/Geometry/mul_with_projection.jl index 108f41a751..e50985b370 100644 --- a/src/Geometry/rmul_with_projection.jl +++ b/src/Geometry/mul_with_projection.jl @@ -1,7 +1,5 @@ import LinearAlgebra: Adjoint, AdjointAbsVec -import .RecursiveApply: rmap, rmaptype -# import LinearAlgebra: I, UniformScaling, Adjoint, AdjointAbsVec -# Types that are treated as single values when using matrix fields. + const SingleValue = Union{Number, AxisTensor, AdjointAxisTensor} """ @@ -17,17 +15,6 @@ mul_with_projection(x, y, _) = x * y mul_with_projection(x::Union{AdjointAxisVector, Axis2TensorOrAdj}, y::AxisTensor, lg) = x * project(dual(axes(x)[2]), y, lg) -""" - rmul_with_projection(x, y, lg) - -Similar to `rmul(x, y)`, except that this version calls `mul_with_projection` -instead of `*`. -""" -rmul_with_projection(x, y, lg) = rmap((x′, y′) -> mul_with_projection(x′, y′, lg), x, y) -rmul_with_projection(x::SingleValue, y, lg) = rmap(y′ -> mul_with_projection(x, y′, lg), y) -rmul_with_projection(x, y::SingleValue, lg) = rmap(x′ -> mul_with_projection(x′, y, lg), x) -rmul_with_projection(x::SingleValue, y::SingleValue, lg) = mul_with_projection(x, y, lg) - axis_tensor_type(::Type{T}, ::Type{Tuple{A1}}) where {T, A1} = AxisVector{T, A1, SVector{_length(A1), T}} function axis_tensor_type(::Type{T}, ::Type{Tuple{A1, A2}}) where {T, A1, A2} @@ -48,37 +35,15 @@ axis2(::Type{<:AdjointAxis2Tensor{<:Any, <:Tuple{A, Any}}}) where {A} = A """ needs_projection(::Type{X}, ::Type{Y}) -Returns `true` if multiplying an object of type `X` with an object of type `Y` would require -projection. This always returns false if `X` or `Y` are a `Tuple` or `NamedTuple` with -eltype any. +Returns `true` if multiplying an object of type `X` with an object of type `Y` +would require projection. """ -needs_projection(::Type{X}, ::Type{Y}) where {X <: Number, Y <: SingleValue} = false -needs_projection(::Type{X}, ::Type{Y}) where {X <: SingleValue, Y <: SingleValue} = false -function needs_projection(::Type{X}, ::Type{Y}) where {X, Y} - (eltype(X) === Any || eltype(Y) === Any) && return false - needs_projection(eltype(X), eltype(Y)) -end +needs_projection(::Type{X}, ::Type{Y}) where {X, Y} = false needs_projection( ::Type{X}, ::Type{Y}, ) where {X <: Union{AdjointAxisVector, Axis2TensorOrAdj}, Y <: AxisTensor} = axes(X)[2] != Geometry.dual(axes(Y)[1]) -function needs_projection( - ::Type{X}, - ::Type{Y}, -) where {X <: SingleValue, Y <: Union{Tuple, NamedTuple}} - X <: Number && return false - eltype(Y) === Any && return false - needs_projection(X, eltype(Y)) -end -function needs_projection( - ::Type{X}, - ::Type{Y}, -) where {X <: Union{Tuple, NamedTuple}, Y <: SingleValue} - Y <: Number && return false - eltype(X) === Any && return false - needs_projection(eltype(X), Y) -end recursively_find_dual_axes_for_projection( ::Type{X}, @@ -147,22 +112,3 @@ mul_return_type( ::Type{X}, ::Type{Y}, ) where {T1, T2, X <: Axis2TensorOrAdj{T1}, Y <: Axis2TensorOrAdj{T2}} = axis_tensor_type(promote_type(T1, T2), Tuple{axis1(X), axis2(Y)}) - -""" - rmul_return_type(X, Y) - -Computes the return type of `rmul_with_projection(x, y, lg)`, where `x isa X` -and `y isa Y`. This can also be used to obtain the return type of `rmul(x, y)`, -although `rmul(x, y)` will throw an error when projection is necessary. - -Note that this is equivalent to calling the internal function `_return_type`: -`Base._return_type(rmul_with_projection, Tuple{X, Y, LG})`, where `lg isa LG`. -""" -rmul_return_type(::Type{X}, ::Type{Y}) where {X, Y} = - rmaptype((X′, Y′) -> mul_return_type(X′, Y′), X, Y) -rmul_return_type(::Type{X}, ::Type{Y}) where {X <: SingleValue, Y} = - rmaptype(Y′ -> mul_return_type(X, Y′), Y) -rmul_return_type(::Type{X}, ::Type{Y}) where {X, Y <: SingleValue} = - rmaptype(X′ -> mul_return_type(X′, Y), X) -rmul_return_type(::Type{X}, ::Type{Y}) where {X <: SingleValue, Y <: SingleValue} = - mul_return_type(X, Y) diff --git a/src/Limiters/Limiters.jl b/src/Limiters/Limiters.jl index 9859330fa3..6eef103a20 100644 --- a/src/Limiters/Limiters.jl +++ b/src/Limiters/Limiters.jl @@ -1,7 +1,6 @@ module Limiters import ..DataLayouts, ..Topologies, ..Spaces, ..Fields -import ..RecursiveApply: rdiv, rmin, rmax import ..DebugOnly: call_post_op_callback, post_op_callback import ClimaCore: slab diff --git a/src/Limiters/quasimonotone.jl b/src/Limiters/quasimonotone.jl index 35fba4fd92..6acc3d8524 100644 --- a/src/Limiters/quasimonotone.jl +++ b/src/Limiters/quasimonotone.jl @@ -1,6 +1,5 @@ import ClimaComms import ..Operators -import ..RecursiveApply: ⊠, ⊞, ⊟, rmap, rzero, rdiv import ..DataLayouts: slab_index import Adapt @@ -186,8 +185,8 @@ function compute_element_bounds!( ρ, dev::ClimaComms.AbstractCPUDevice, ) - ρ_data = Fields.field_values(ρ) - ρq_data = Fields.field_values(ρq) + ρ_data = Base.broadcastable(Fields.field_values(ρ)) + ρq_data = Base.broadcastable(Fields.field_values(ρq)) q_bounds = limiter.q_bounds (Ni, Nj, _, Nv, Nh) = size(ρq_data) for h in 1:Nh @@ -197,16 +196,13 @@ function compute_element_bounds!( local q_min, q_max for j in 1:Nj for i in 1:Ni - q = rdiv( - slab_ρq[slab_index(i, j)], - slab_ρ[slab_index(i, j)], - ) + q = slab_ρq[slab_index(i, j)] / slab_ρ[slab_index(i, j)] if i == 1 && j == 1 q_min = q q_max = q else - q_min = rmin(q_min, q) - q_max = rmax(q_max, q) + q_min = min(q_min, q) + q_max = max(q_max, q) end end end @@ -237,7 +233,7 @@ function compute_neighbor_bounds_local!( dev::ClimaComms.AbstractCPUDevice, ) topology = Spaces.topology(axes(ρ)) - q_bounds = limiter.q_bounds + q_bounds = Base.broadcastable(limiter.q_bounds) q_bounds_nbr = limiter.q_bounds_nbr (_, _, _, Nv, Nh) = size(q_bounds_nbr) for h in 1:Nh @@ -247,8 +243,8 @@ function compute_neighbor_bounds_local!( q_max = slab_q_bounds[slab_index(2)] for h_nbr in Topologies.local_neighboring_elements(topology, h) slab_q_bounds = slab(q_bounds, v, h_nbr) - q_min = rmin(q_min, slab_q_bounds[slab_index(1)]) - q_max = rmax(q_max, slab_q_bounds[slab_index(2)]) + q_min = min(q_min, slab_q_bounds[slab_index(1)]) + q_max = max(q_max, slab_q_bounds[slab_index(2)]) end slab_q_bounds_nbr = slab(q_bounds_nbr, v, h) slab_q_bounds_nbr[slab_index(1)] = q_min @@ -275,8 +271,7 @@ function compute_neighbor_bounds_ghost!( q_bounds_nbr = limiter.q_bounds_nbr (_, _, _, Nv, Nh) = size(q_bounds_nbr) if limiter.ghost_buffer isa Topologies.GhostBuffer - q_bounds_ghost = limiter.ghost_buffer.recv_data - + q_bounds_ghost = Base.broadcastable(limiter.ghost_buffer.recv_data) for h in 1:Nh for v in 1:Nv slab_q_bounds = slab(q_bounds_nbr, v, h) @@ -284,8 +279,8 @@ function compute_neighbor_bounds_ghost!( q_max = slab_q_bounds[slab_index(2)] for gidx in Topologies.ghost_neighboring_elements(topology, h) ghost_slab_q_bounds = slab(q_bounds_ghost, v, gidx) - q_min = rmin(q_min, ghost_slab_q_bounds[slab_index(1)]) - q_max = rmax(q_max, ghost_slab_q_bounds[slab_index(2)]) + q_min = min(q_min, ghost_slab_q_bounds[slab_index(1)]) + q_max = max(q_max, ghost_slab_q_bounds[slab_index(2)]) end slab_q_bounds_nbr = slab(q_bounds_nbr, v, h) slab_q_bounds_nbr[slab_index(1)] = q_min diff --git a/src/MatrixFields/MatrixFields.jl b/src/MatrixFields/MatrixFields.jl index 5be04a597c..69d1d006f1 100644 --- a/src/MatrixFields/MatrixFields.jl +++ b/src/MatrixFields/MatrixFields.jl @@ -16,9 +16,9 @@ for them: - Matrix-matrix multiplication, e.g., `@. matrix_field1 * matrix_field2` - Compatibility with `LinearAlgebra.I`, e.g., `@. matrix_field = (4I,)` or `@. matrix_field - (4I,)` -- Integration with `RecursiveApply`, e.g., the entries of `matrix_field` can be - `Tuple`s or `NamedTuple`s instead of single values, which allows - `matrix_field` to represent multiple band matrices at the same time +- Compatibility with generic data types, e.g., the entries of `matrix_field` can + be iterators instead of single values, which allows `matrix_field` to + represent multiple band matrices at the same time - Integration with `Operators`, e.g., the `matrix_field` that gets applied to the argument of any `FiniteDifferenceOperator` `op` can be obtained using the `FiniteDifferenceOperator` `operator_matrix(op)` @@ -35,9 +35,6 @@ multiples of `LinearAlgebra.I`. This comes with the following functionality: - Addition and subtraction, e.g., `@. field_matrix1 + field_matrix2` - Matrix-vector multiplication, e.g., `@. field_matrix * field_vector` - Matrix-matrix multiplication, e.g., `@. field_matrix1 * field_matrix2` -- Integration with `RecursiveApply`, e.g., the entries of `field_matrix` can be - specified either as matrix `Field`s of `Tuple`s or `NamedTuple`s, or as - separate matrix `Field`s of single values - The ability to solve linear equations using `FieldMatrixSolver`, which is a generalization of `ldiv!` that is designed to optimize solver performance """ @@ -54,10 +51,10 @@ import NVTX import Adapt using UnrolledUtilities -import ..Utilities: PlusHalf, half -import ..RecursiveApply: - rmap, rmaptype, rpromote_type, rzero, rconvert, radd, rsub, rmul, rdiv -import ..RecursiveApply: ⊠, ⊞, ⊟ +import ..RecursiveApply: rzero +import ..Utilities: PlusHalf, half, new +import ..Utilities: AutoBroadcaster, is_auto_broadcastable, auto_broadcasted +import ..Utilities: add_auto_broadcasters, drop_auto_broadcasters import ..DataLayouts import ..DataLayouts: AbstractData import ..DataLayouts: vindex @@ -67,11 +64,7 @@ import ..Spaces import ..Spaces: local_geometry_type import ..Fields import ..Operators -using ..Geometry: - rmul_with_projection, - mul_with_projection, - axis_tensor_type, - rmul_return_type +using ..Geometry: mul_with_projection, mul_return_type, axis_tensor_type export DiagonalMatrixRow, BidiagonalMatrixRow, @@ -104,41 +97,18 @@ include("field_matrix_solver.jl") include("field_matrix_iterative_solver.jl") include("field_matrix_with_solver.jl") -const FieldOrStencilStyleType = Union{ - Fields.Field, - Base.Broadcast.Broadcasted{<:Fields.AbstractFieldStyle}, - Operators.StencilBroadcasted, - LazyOperatorBroadcasted, -} - -function Base.Broadcast.broadcasted( - ::typeof(*), - field_or_broadcasted::FieldOrStencilStyleType, - args..., -) +# Evaluate multiplications in left-associative order. This should technically be +# right-associative, but flipping the order worsens performance in GPU kernels, +# where the second argument of each matrix product is cached. Left-associativity +# makes the first argument grow in bandwidth when multiplying a chain of +# matrices, whereas right-associativity makes the second argument grow instead. +Base.broadcasted(::Fields.AbstractFieldStyle, ::typeof(*), arg, args...) = + unrolled_reduce((x, y) -> Base.broadcasted(*, x, y), args; init = arg) - unrolled_reduce(args; init = field_or_broadcasted) do arg1, arg2 - arg1_isa_matrix = - arg1 isa LazyOperatorBroadcasted && length(arg1.args) > 0 ? - eltype(arg1.args[1]) <: BandMatrixRow || - arg1.args[1] isa LazyOperatorBroadcasted : - eltype(arg1) <: BandMatrixRow || arg1 isa LazyOperatorBroadcasted - use_matrix_mul_op = arg1_isa_matrix && arg2 isa FieldOrStencilStyleType - op = use_matrix_mul_op ? MultiplyColumnwiseBandMatrixField() : ⊠ - Base.Broadcast.broadcasted(op, arg1, arg2) - end -end -Base.Broadcast.broadcasted( - ::typeof(*), - single_value_or_broadcasted::SingleValueStyleType, - field_or_broadcasted::FieldOrStencilStyleType, - args..., -) = Base.Broadcast.broadcasted( - ⊠, - single_value_or_broadcasted, - Base.Broadcast.broadcasted(*, field_or_broadcasted, args...), -) -# TODO: Generalize this to handle, e.g., @. scalar * scalar * matrix * matrix. +Base.broadcasted(style::Fields.AbstractFieldStyle, ::typeof(*), x, y) = + check_entry(FieldNamePair, x) && check_entry(FieldName, y) ? + Base.broadcasted(MultiplyColumnwiseBandMatrixField(), x, y) : + auto_broadcasted(style, *, (x, y)) function Base.show(io::IO, field::ColumnwiseBandMatrixField) print(io, eltype(field), "-valued Field") diff --git a/src/MatrixFields/band_matrix_row.jl b/src/MatrixFields/band_matrix_row.jl index 0b795fc091..d8d4b727e8 100644 --- a/src/MatrixFields/band_matrix_row.jl +++ b/src/MatrixFields/band_matrix_row.jl @@ -16,14 +16,12 @@ several aliases for commonly used subtypes of `BandMatrixRow`: struct BandMatrixRow{ld, bw, T} # bw is the bandwidth (the number of diagonals) entries::NTuple{bw, T} BandMatrixRow{ld, bw, T}(entries::NTuple{bw, Any}) where {ld, bw, T} = - new{ld, bw, T}(rconvert(NTuple{bw, T}, entries)) - # TODO: Remove this inner constructor once Julia's default convert function - # is type-stable for nested Tuple/NamedTuple types. + new{ld, bw, T}(entries) end BandMatrixRow{ld}(entries::Vararg{Any, bw}) where {ld, bw} = BandMatrixRow{ld, bw}(entries...) BandMatrixRow{ld, bw}(entries::Vararg{Any, bw}) where {ld, bw} = - BandMatrixRow{ld, bw, rpromote_type(map(typeof, entries)...)}(entries) + BandMatrixRow{ld, bw, promote_type(map(typeof, entries)...)}(entries) const DiagonalMatrixRow{T} = BandMatrixRow{0, 1, T} const BidiagonalMatrixRow{T} = BandMatrixRow{-1 + half, 2, T} @@ -74,7 +72,7 @@ function Base.promote_rule( row type $BMR1 and the $(ld2 isa PlusHalf ? "non-" : "")square matrix \ row type $BMR2 to a common type", ) - T = rpromote_type(eltype(BMR1), eltype(BMR2)) + T = promote_type(eltype(BMR1), eltype(BMR2)) return band_matrix_row_type(min(ld1, ld2), max(ud1, ud2), T) end @@ -84,6 +82,7 @@ Base.promote_rule( ) where {BMR <: BandMatrixRow, US <: UniformScaling} = promote_rule(BMR, DiagonalMatrixRow{eltype(US)}) +Base.convert(::Type{BMR}, row::BMR) where {BMR <: BandMatrixRow} = row function Base.convert( ::Type{BMR}, row::BandMatrixRow, @@ -95,12 +94,13 @@ function Base.convert( row of type $(typeof(row)) to the \ $(new_ld isa PlusHalf ? "non-" : "")square matrix row type $BMR", ) + new_ld == old_ld && new_ud == old_ud && return BMR(row.entries) new_ld <= old_ld && new_ud >= old_ud || error( "Cannot convert a $(typeof(row)) to a $BMR, since that would require \ dropping potentially non-zero row entries", ) - first_zeros = ntuple(_ -> rzero(eltype(BMR)), Val(old_ld - new_ld)) - last_zeros = ntuple(_ -> rzero(eltype(BMR)), Val(new_ud - old_ud)) + first_zeros = ntuple(Returns(zero(eltype(BMR))), Val(old_ld - new_ld)) + last_zeros = ntuple(Returns(zero(eltype(BMR))), Val(new_ud - old_ud)) return BMR((first_zeros..., row.entries..., last_zeros...)) end @@ -116,44 +116,49 @@ Base.:(==)(row1::BandMatrixRow, row2::UniformScaling) = Base.:(==)(row1::UniformScaling, row2::BandMatrixRow) = ==(promote(row1, row2)...) -Base.:+(row::BandMatrixRow) = map(radd, row) +Base.:+(row::BandMatrixRow) = map(+, row) Base.:+(row1::BandMatrixRow, row2::BandMatrixRow) = - map(radd, promote(row1, row2)...) + map(+, promote(row1, row2)...) Base.:+(row1::BandMatrixRow, row2::UniformScaling) = - map(radd, promote(row1, row2)...) + map(+, promote(row1, row2)...) Base.:+(row1::UniformScaling, row2::BandMatrixRow) = - map(radd, promote(row1, row2)...) + map(+, promote(row1, row2)...) -Base.:-(row::BandMatrixRow) = map(rsub, row) +Base.:-(row::BandMatrixRow) = map(-, row) Base.:-(row1::BandMatrixRow, row2::BandMatrixRow) = - map(rsub, promote(row1, row2)...) + map(-, promote(row1, row2)...) Base.:-(row1::BandMatrixRow, row2::UniformScaling) = - map(rsub, promote(row1, row2)...) + map(-, promote(row1, row2)...) Base.:-(row1::UniformScaling, row2::BandMatrixRow) = - map(rsub, promote(row1, row2)...) + map(-, promote(row1, row2)...) -Base.:*(row::BandMatrixRow, value::Geometry.SingleValue) = - map(entry -> rmul(entry, value), row) -Base.:*(value::Geometry.SingleValue, row::BandMatrixRow) = - map(entry -> rmul(value, entry), row) +Base.:*(row::BandMatrixRow, value::Union{Geometry.SingleValue, AutoBroadcaster}) = + map(Base.Fix2(*, value), row) +Base.:*(value::Union{Geometry.SingleValue, AutoBroadcaster}, row::BandMatrixRow) = + map(Base.Fix1(*, value), row) -Base.:/(row::BandMatrixRow, value::Number) = - map(entry -> rdiv(entry, value), row) +Base.:/(row::BandMatrixRow, value::Union{Geometry.SingleValue, AutoBroadcaster}) = + map(Base.Fix2(/, value), row) Base.zero(row::BandMatrixRow) = zero(typeof(row)) Base.zero(::Type{BandMatrixRow{ld, bw, T}}) where {ld, bw, T} = - BandMatrixRow{ld}(ntuple(_ -> rzero(T), Val(bw))...) + BandMatrixRow{ld}(ntuple(Returns(zero(T)), Val(bw))...) Base.one(row::BandMatrixRow) = one(typeof(row)) -Base.one(::Type{DiagonalMatrixRow{T}}) where {T} = - DiagonalMatrixRow(rmap(one, T)) +Base.one(::Type{DiagonalMatrixRow{T}}) where {T} = DiagonalMatrixRow(one(T)) Base.one(::Type{BandMatrixRow{ld, bw, T}}) where {ld, bw, T} = ld isa PlusHalf ? error("A non-square matrix does not have a corresponding identity matrix") : one(DiagonalMatrixRow{T}) -inv(row::DiagonalMatrixRow) = DiagonalMatrixRow(rmap(inv, row[0])) +inv(row::DiagonalMatrixRow) = DiagonalMatrixRow(inv(row[0])) inv(::BandMatrixRow{ld, bw}) where {ld, bw} = error( "The inverse of a matrix with $bw diagonals is generally a dense matrix, \ so it cannot be represented using BandMatrixRows", ) + +# Allow row entries to be wrapped in AutoBroadcasters, but not the row itself. +is_auto_broadcastable(::Type{BMR}) where {BMR <: BandMatrixRow} = + is_auto_broadcastable(eltype(BMR)) +add_auto_broadcasters(row::BandMatrixRow) = map(add_auto_broadcasters, row) +drop_auto_broadcasters(row::BandMatrixRow) = map(drop_auto_broadcasters, row) diff --git a/src/MatrixFields/field_name_dict.jl b/src/MatrixFields/field_name_dict.jl index cfb24a38f3..589f16e37b 100644 --- a/src/MatrixFields/field_name_dict.jl +++ b/src/MatrixFields/field_name_dict.jl @@ -75,9 +75,7 @@ is_field_broadcasted(bc) = check_entry(::Type{FieldName}, entry::Base.AbstractBroadcasted) = is_field_broadcasted(entry) check_entry(::Type{FieldNamePair}, entry::Base.AbstractBroadcasted) = - is_field_broadcasted(entry) # && eltype(entry) <: BandMatrixRow -# TODO: Adding the eltype check introduces JET failures to several FieldMatrix -# test cases in CI. We may to implement our own version of eltype to avoid this. + is_field_broadcasted(entry) && eltype(entry) <: BandMatrixRow is_diagonal_matrix_entry(::ScalingFieldMatrixEntry) = true is_diagonal_matrix_entry(entry) = eltype(entry) <: DiagonalMatrixRow @@ -276,7 +274,10 @@ function get_internal_entry( else # fallback to broadcasted indexing on each element, currently no support for view_of_blocks return Base.broadcasted(entry) do matrix_row map(matrix_row) do matrix_row_entry - get_internal_entry(matrix_row_entry, name_pair) + get_internal_entry( + drop_auto_broadcasters(matrix_row_entry), + name_pair, + ) end end end @@ -546,7 +547,7 @@ e³ = Geometry.Covariant3Vector(1) ᶜᶠmat2 = fill(BidiagonalMatrixRow(4.3, 1.7), center_space) ᶜᶜmat3_uₕ_scalar = ᶜᶜmat3 .* (e¹²,) ρχ_unit = (;ρq_liq = 1.0, ρq_ice = 1.0) -ᶜᶠmat2_ρχ_u₃ = map(Base.Fix1(map, Base.Fix2(⊠, ρχ_unit ⊠ e₃')), ᶜᶠmat2) +ᶜᶠmat2_ρχ_u₃ = map(Base.Fix1(map, Base.Fix2(*, ρχ_unit * e₃')), ᶜᶠmat2) A = MatrixFields.FieldMatrix( (@name(c.ρχ), @name(f.u₃)) => ᶜᶠmat2_ρχ_u₃, @@ -714,6 +715,7 @@ const SingleValueStyle = const SingleValueStyleType = Union{ Number, + Ref{Geometry.SingleValue}, Tuple{Geometry.SingleValue}, Base.Broadcast.Broadcasted{<:SingleValueStyle}, } diff --git a/src/MatrixFields/field_name_set.jl b/src/MatrixFields/field_name_set.jl index 677d2e09b1..f2ec6b62a3 100644 --- a/src/MatrixFields/field_name_set.jl +++ b/src/MatrixFields/field_name_set.jl @@ -188,7 +188,7 @@ end #= There are four cases that we need to support in order to be compatible with -RecursiveApply (i.e., with rmul): +generic data types: 1. (_, name) * name or (_, name) * (name, _) 2. (_, name_child) * name -> (_, name_child) * name_child or diff --git a/src/MatrixFields/lazy_operators.jl b/src/MatrixFields/lazy_operators.jl index 050eb39e3a..cef0c89376 100644 --- a/src/MatrixFields/lazy_operators.jl +++ b/src/MatrixFields/lazy_operators.jl @@ -59,15 +59,9 @@ replace_lazy_operators(space, bc::LazyOperatorBroadcasted) = bc.f isa AbstractLazyOperator ? replace_lazy_operator(space, bc.f) : Base.Broadcast.broadcasted( bc.f, - replace_lazy_operators_args(space, bc.args...)..., + unrolled_map(Base.Fix1(replace_lazy_operators, space), bc.args)..., ) -replace_lazy_operators_args(_) = () -replace_lazy_operators_args(space, arg, args...) = ( - replace_lazy_operators(space, arg), - replace_lazy_operators_args(space, args...)..., -) - """ replace_lazy_operator(space, lazy_op) @@ -83,11 +77,8 @@ replace_lazy_operator(_, ::AbstractLazyOperator) = largest_space(_) = nothing largest_space(field::Fields.Field) = axes(field) -largest_space(bc::Base.AbstractBroadcasted) = largest_space_args(bc.args...) - -largest_space_args() = nothing -largest_space_args(arg, args...) = - larger_space(largest_space(arg), largest_space_args(args...)) +largest_space(bc::Base.AbstractBroadcasted) = + unrolled_reduce(larger_space, unrolled_map(largest_space, bc.args); init = nothing) larger_space(::Nothing, ::Nothing) = nothing larger_space(space1, ::Nothing) = space1 diff --git a/src/MatrixFields/matrix_multiplication.jl b/src/MatrixFields/matrix_multiplication.jl index a82b70e01e..61045668b7 100644 --- a/src/MatrixFields/matrix_multiplication.jl +++ b/src/MatrixFields/matrix_multiplication.jl @@ -207,15 +207,6 @@ Operators.strip_space(op::MultiplyColumnwiseBandMatrixField, _) = op struct TopLeftMatrixCorner <: Operators.AbstractBoundaryCondition end struct BottomRightMatrixCorner <: Operators.AbstractBoundaryCondition end -Operators.has_boundary( - ::MultiplyColumnwiseBandMatrixField, - ::Operators.LeftBoundaryWindow{name}, -) where {name} = true -Operators.has_boundary( - ::MultiplyColumnwiseBandMatrixField, - ::Operators.RightBoundaryWindow{name}, -) where {name} = true - Operators.get_boundary( ::MultiplyColumnwiseBandMatrixField, ::Operators.LeftBoundaryWindow{name}, @@ -300,42 +291,11 @@ function Operators.return_eltype( ld1, ud1 = outer_diagonals(et_mat1) ld2, ud2 = outer_diagonals(et_arg) prod_ld, prod_ud = ld1 + ld2, ud1 + ud2 - prod_value_type = rmul_return_type(eltype(et_mat1), eltype(et_arg)) - return band_matrix_row_type(prod_ld, prod_ud, prod_value_type) - else # matrix-vector multiplication - vector = arg - return rmul_return_type(eltype(et_mat1), et_arg) - end -end - -function Operators.return_eltype( - ::MultiplyColumnwiseBandMatrixField, - matrix1, - arg, - ::Type{LG}, -) where {LG} - et_mat1 = eltype(matrix1) - et_arg = eltype(arg) - et_mat1 <: BandMatrixRow || error( - "The first argument of MultiplyColumnwiseBandMatrixField must have - elements of type BandMatrixRow, but the given argument has $et_mat1", - ) - if et_arg <: BandMatrixRow # matrix-matrix multiplication - matrix2 = arg - ld1, ud1 = outer_diagonals(et_mat1) - ld2, ud2 = outer_diagonals(et_arg) - prod_ld, prod_ud = ld1 + ld2, ud1 + ud2 - prod_value_type = Base.promote_op( - rmul_with_projection, - eltype(et_mat1), - eltype(et_arg), - LG, - ) + prod_value_type = mul_return_type(eltype(et_mat1), eltype(et_arg)) return band_matrix_row_type(prod_ld, prod_ud, prod_value_type) else # matrix-vector multiplication vector = arg - prod_value_type = - Base.promote_op(rmul_with_projection, eltype(et_mat1), et_arg, LG) + return mul_return_type(eltype(et_mat1), et_arg) end end @@ -369,13 +329,11 @@ function multiply_matrix_at_index( bc, ::Type{T}, ) where {T <: BandMatrixRow} - # T = eltype(arg) lg = Geometry.LocalGeometry(space, idx, hidx) prod_type = Operators.return_eltype( MultiplyColumnwiseBandMatrixField(), matrix1, arg, - typeof(lg), ) column_space1 = column_axes(matrix1, space) @@ -415,7 +373,7 @@ function multiply_matrix_at_index( # Precompute the zero value to avoid inference issues caused by passing # prod_type into the function closure below. - zero_value = rzero(eltype(prod_type)) + zero_value = zero(eltype(prod_type)) # Compute the entries of the product matrix row. To avoid inference # issues at boundary points, this is implemented as a padded map from @@ -435,10 +393,7 @@ function multiply_matrix_at_index( value1 = matrix1_row[d] value2 = matrix2_rows_wrapper[d][prod_d - d] value2_lg = Geometry.LocalGeometry(space, idx + d, hidx) - prod_entry = radd( - prod_entry, - rmul_with_projection(value1, value2, value2_lg), - ) + prod_entry += mul_with_projection(value1, value2, value2_lg) end # Using a for-loop is currently faster than using mapreduce. prod_entry else @@ -457,13 +412,11 @@ function multiply_matrix_at_index( bc, ::Type{T}, ) where {T} - # T = eltype(arg) lg = Geometry.LocalGeometry(space, idx, hidx) prod_type = Operators.return_eltype( MultiplyColumnwiseBandMatrixField(), matrix1, arg, - typeof(lg), ) column_space1 = column_axes(matrix1, space) @@ -476,13 +429,12 @@ function multiply_matrix_at_index( matrix1_row = @inbounds Operators.getidx(space, matrix1, idx, hidx) vector = arg - prod_value = rzero(prod_type) + prod_value = zero(prod_type) @inbounds for d in boundary_modified_ld1:boundary_modified_ud1 value1 = matrix1_row[d] value2 = Operators.getidx(space, vector, idx + d, hidx) value2_lg = Geometry.LocalGeometry(space, idx + d, hidx) - prod_value = - radd(prod_value, rmul_with_projection(value1, value2, value2_lg)) + prod_value += mul_with_projection(value1, value2, value2_lg) end # Using a for-loop is currently faster than using mapreduce. return prod_value end diff --git a/src/MatrixFields/multiple_field_solver.jl b/src/MatrixFields/multiple_field_solver.jl index 926de822f1..c065d83cde 100644 --- a/src/MatrixFields/multiple_field_solver.jl +++ b/src/MatrixFields/multiple_field_solver.jl @@ -1,23 +1,12 @@ -# TODO: Can different A's be different matrix styles? -# if so, how can we handle fuse/parallelize? - -# First, dispatch based on the first x and the device: function multiple_field_solve!(cache, x, A, b) - name1 = first(matrix_row_keys(keys(A))) - x1 = x[name1] - multiple_field_solve!(ClimaComms.device(axes(x1)), cache, x, A, b, x1) + x1 = first(values(x)) + x_bc = FieldNameDict(keys(x), unrolled_map(Base.broadcastable, values(x))) + b_bc = FieldNameDict(keys(b), unrolled_map(Base.broadcastable, values(b))) + multiple_field_solve!(ClimaComms.device(axes(x1)), cache, x_bc, A, b_bc) end # TODO: fuse/parallelize -function multiple_field_solve!( - ::ClimaComms.AbstractCPUDevice, - cache, - x, - A, - b, - x1, -) +multiple_field_solve!(::ClimaComms.AbstractCPUDevice, cache, x, A, b) = foreach(matrix_row_keys(keys(A))) do name single_field_solve!(cache[name], x[name], A[name, name], b[name]) end -end diff --git a/src/MatrixFields/operator_matrices.jl b/src/MatrixFields/operator_matrices.jl index 59ae6dbeba..5a332f4aae 100644 --- a/src/MatrixFields/operator_matrices.jl +++ b/src/MatrixFields/operator_matrices.jl @@ -225,15 +225,6 @@ operator_matrix(::O) where {O <: Operators.AbstractOperator} = ################################################################################ -Operators.has_boundary( - op_matrix::FDOperatorMatrix, - lbw::Operators.LeftBoundaryWindow{name}, -) where {name} = Operators.has_boundary(op_matrix.op, lbw) -Operators.has_boundary( - op_matrix::FDOperatorMatrix, - rbw::Operators.RightBoundaryWindow{name}, -) where {name} = Operators.has_boundary(op_matrix.op, rbw) - Operators.get_boundary( op_matrix::FDOperatorMatrix, lbw::Operators.LeftBoundaryWindow{name}, @@ -321,6 +312,25 @@ op_matrix_first_row(op, bc, space, idx, hidx, args...) = op_matrix_last_row(op, bc, space, idx, hidx, args...) = op_matrix_last_row(op, bc, Spaces.undertype(space)) +# Fallback methods for unspecified boundary conditions (need to use zero here +# instead of NaN to avoid polluting nearby interior rows with NaNs) +Operators.stencil_left_boundary( + op_matrix::FDOperatorMatrix, + ::Operators.NullBoundaryCondition, + space, + _, + _, + args..., +) = zero(Operators.return_eltype(op_matrix, args...)) +Operators.stencil_right_boundary( + op_matrix::FDOperatorMatrix, + ::Operators.NullBoundaryCondition, + space, + _, + _, + args..., +) = zero(Operators.return_eltype(op_matrix, args...)) + ################################################################################ # Additional aliases for CenterToFace or FaceToCenter matrix rows @@ -438,8 +448,8 @@ Base.@propagate_inbounds function op_matrix_interior_row( ) w⁻ = Operators.getidx(space, weight, idx - half, hidx) w⁺ = Operators.getidx(space, weight, idx + half, hidx) - denominator = radd(w⁻, w⁺) - return BidiagonalMatrixRow(rdiv(w⁻, denominator), rdiv(w⁺, denominator)) + denominator = w⁻ + w⁺ + return BidiagonalMatrixRow(w⁻ / denominator, w⁺ / denominator) end op_matrix_first_row( ::Operators.WeightedInterpolateC2F, diff --git a/src/MatrixFields/single_field_solver.jl b/src/MatrixFields/single_field_solver.jl index 74fb45053d..0ed4d92d1d 100644 --- a/src/MatrixFields/single_field_solver.jl +++ b/src/MatrixFields/single_field_solver.jl @@ -12,19 +12,12 @@ inv_return_type(::Type{X}) where {T, X <: Geometry.Axis2TensorOrAdj{T}} = Tuple{dual_type(Geometry.axis2(X)), dual_type(Geometry.axis1(X))}, ) -x_eltype(A::ScalingFieldMatrixEntry, b) = x_eltype(eltype(A), eltype(b)) +x_eltype(A::ScalingFieldMatrixEntry, b) = + x_type(eltype(A), eltype(Base.broadcastable(b))) x_eltype(A::ColumnwiseBandMatrixField, b) = - x_eltype(eltype(eltype(A)), eltype(b)) -x_eltype(::Type{T_A}, ::Type{T_b}) where {T_A, T_b} = - rmul_return_type(inv_return_type(T_A), T_b) -# Base.promote_op(rmul_with_projection, inv_return_type(T_A), T_b, LG) - -unit_eltype(A::ScalingFieldMatrixEntry) = eltype(A) -unit_eltype(A::ColumnwiseBandMatrixField) = - unit_eltype(eltype(eltype(A)), local_geometry_type(A)) -unit_eltype(::Type{T_A}, ::Type{LG}) where {T_A, LG} = - rmul_return_type(inv_return_type(T_A), T_A) -# Base.promote_op(rmul_with_projection, inv_return_type(T_A), T_A, LG) + x_type(eltype(eltype(A)), eltype(Base.broadcastable(b))) +x_type(::Type{T_A}, ::Type{T_b}) where {T_A, T_b} = + mul_return_type(inv_return_type(T_A), T_b) ################################################################################ @@ -43,32 +36,22 @@ end single_field_solver_cache(::ScalingFieldMatrixEntry, b) = similar(b, Tuple{}) function single_field_solver_cache(A::ColumnwiseBandMatrixField, b) ud = outer_diagonals(eltype(A))[2] - cache_eltype = - ud == 0 ? Tuple{} : - Tuple{x_eltype(A, b), ntuple(_ -> unit_eltype(A), Val(ud))...} - return similar(b, cache_eltype) + ud == 0 && return similar(b, Tuple{}) + T_U = mul_return_type(inv_return_type(eltype(eltype(A))), eltype(eltype(A))) + return similar(b, Tuple{x_eltype(A, b), ntuple(Returns(T_U), Val(ud))...}) end -function single_field_solve_diag_matrix_row!( - cache, - x, - A::ColumnwiseBandMatrixField, - b, -) - # Use fields here, and not field values, so that this operation is - # mask-aware. - A₀ = A.entries.:1 - @. x = inv(A₀) ⊠ b -end single_field_solve!(_, x, A::ScalingFieldMatrixEntry, b) = x .= (inv(scaling_value(A)),) .* b -function single_field_solve!(cache, x, A::ColumnwiseBandMatrixField, b) +single_field_solve!(cache, x, A::ColumnwiseBandMatrixField, b) = if eltype(A) <: MatrixFields.DiagonalMatrixRow - single_field_solve_diag_matrix_row!(cache, x, A, b) + A₀ = A.entries.:1 + @. x = inv(A₀) * b else - single_field_solve!(ClimaComms.device(axes(A)), cache, x, A, b) + x_bc = Base.broadcastable(x) + b_bc = Base.broadcastable(b) + single_field_solve!(ClimaComms.device(axes(A)), cache, x_bc, A, b_bc) end -end single_field_solve!(::ClimaComms.AbstractCPUDevice, cache, x, A, b) = _single_field_solve!(ClimaComms.device(axes(A)), cache, x, A, b) @@ -85,13 +68,12 @@ function _single_field_solve!( mask = Spaces.get_mask(space) if space isa Spaces.FiniteDifferenceSpace @assert mask isa DataLayouts.NoMask - _single_field_solve_col!(device, cache, x, A, b) + single_field_solve_col!(cache, x, A, b) else Fields.bycolumn(space) do colidx I = Fields.universal_index(colidx) if DataLayouts.should_compute(mask, I) - _single_field_solve_col!( - device, + single_field_solve_col!( cache[colidx], x[colidx], A[colidx], @@ -102,28 +84,15 @@ function _single_field_solve!( end end -function _single_field_solve_col!( - ::ClimaComms.AbstractCPUDevice, - cache, - x, - A, - b, -) - if A isa Fields.ColumnField - band_matrix_solve!( - eltype(A), - unzip_tuple_field_values(Fields.field_values(cache)), - Fields.field_values(x), - unzip_tuple_field_values(Fields.field_values(A.entries)), - Fields.field_values(b), - vindex, - ) - elseif A isa ScalingFieldMatrixEntry - x .= (inv(scaling_value(A)),) .* b - else - error("uncaught case") - end -end +single_field_solve_col!(cache, x, A, b) = + band_matrix_solve!( + eltype(A), + unzip_tuple_field_values(Fields.field_values(cache)), + Fields.field_values(x), + unzip_tuple_field_values(Fields.field_values(A.entries)), + Fields.field_values(b), + vindex, + ) unzip_tuple_field_values(data) = ntuple(i -> data.:($i), Val(length(propertynames(data)))) @@ -132,7 +101,7 @@ function band_matrix_solve!(::Type{<:DiagonalMatrixRow}, _, x, Aⱼs, b, vi) (A₀,) = Aⱼs n = length(x) @inbounds for i in 1:n - x[vi(i)] = inv(A₀[vi(i)]) ⊠ b[vi(i)] + x[vi(i)] = inv(A₀[vi(i)]) * b[vi(i)] end end @@ -163,18 +132,18 @@ function band_matrix_solve!( n = length(x) @inbounds begin inv_D₀ = inv(A₀[vi(1)]) - U₊₁ᵢ₋₁ = inv_D₀ ⊠ A₊₁[vi(1)] - Uxᵢ₋₁ = inv_D₀ ⊠ b[vi(1)] + U₊₁ᵢ₋₁ = inv_D₀ * A₊₁[vi(1)] + Uxᵢ₋₁ = inv_D₀ * b[vi(1)] Ux[vi(1)] = Uxᵢ₋₁ U₊₁[vi(1)] = U₊₁ᵢ₋₁ for i in 2:n A₋₁ᵢ = A₋₁[vi(i)] - inv_D₀ = inv(A₀[vi(i)] ⊟ A₋₁ᵢ ⊠ U₊₁ᵢ₋₁) - Uxᵢ₋₁ = inv_D₀ ⊠ (b[vi(i)] ⊟ A₋₁ᵢ ⊠ Uxᵢ₋₁) + inv_D₀ = inv(A₀[vi(i)] - A₋₁ᵢ * U₊₁ᵢ₋₁) + Uxᵢ₋₁ = inv_D₀ * (b[vi(i)] - A₋₁ᵢ * Uxᵢ₋₁) Ux[vi(i)] = Uxᵢ₋₁ if i < n - U₊₁ᵢ₋₁ = inv_D₀ ⊠ A₊₁[vi(i)] # U₊₁[n] is outside the matrix. + U₊₁ᵢ₋₁ = inv_D₀ * A₊₁[vi(i)] # U₊₁[n] is outside the matrix. U₊₁[vi(i)] = U₊₁ᵢ₋₁ end end @@ -184,7 +153,7 @@ function band_matrix_solve!( i = (n - 1) # for i in (n - 1):-1:1 while i ≥ 1 - x[vi(i)] = Ux[vi(i)] ⊟ U₊₁[vi(i)] ⊠ x[vi(i + 1)] + x[vi(i)] = Ux[vi(i)] - U₊₁[vi(i)] * x[vi(i + 1)] i -= 1 end end @@ -222,36 +191,36 @@ function band_matrix_solve!( n = length(x) @inbounds begin inv_D₀ = inv(A₀[vi(1)]) - Ux[vi(1)] = inv_D₀ ⊠ b[vi(1)] - U₊₁[vi(1)] = inv_D₀ ⊠ A₊₁[vi(1)] - U₊₂[vi(1)] = inv_D₀ ⊠ A₊₂[vi(1)] + Ux[vi(1)] = inv_D₀ * b[vi(1)] + U₊₁[vi(1)] = inv_D₀ * A₊₁[vi(1)] + U₊₂[vi(1)] = inv_D₀ * A₊₂[vi(1)] - inv_D₀ = inv(A₀[vi(2)] ⊟ A₋₁[vi(2)] ⊠ U₊₁[vi(1)]) - Ux[vi(2)] = inv_D₀ ⊠ (b[vi(2)] ⊟ A₋₁[vi(2)] ⊠ Ux[vi(1)]) - U₊₁[vi(2)] = inv_D₀ ⊠ (A₊₁[vi(2)] ⊟ A₋₁[vi(2)] ⊠ U₊₂[vi(1)]) - U₊₂[vi(2)] = inv_D₀ ⊠ A₊₂[vi(2)] + inv_D₀ = inv(A₀[vi(2)] - A₋₁[vi(2)] * U₊₁[vi(1)]) + Ux[vi(2)] = inv_D₀ * (b[vi(2)] - A₋₁[vi(2)] * Ux[vi(1)]) + U₊₁[vi(2)] = inv_D₀ * (A₊₁[vi(2)] - A₋₁[vi(2)] * U₊₂[vi(1)]) + U₊₂[vi(2)] = inv_D₀ * A₊₂[vi(2)] for i in 3:n - L₋₁ = A₋₁[vi(i)] ⊟ A₋₂[vi(i)] ⊠ U₊₁[vi(i - 2)] + L₋₁ = A₋₁[vi(i)] - A₋₂[vi(i)] * U₊₁[vi(i - 2)] inv_D₀ = inv( - A₀[vi(i)] ⊟ L₋₁ ⊠ U₊₁[vi(i - 1)] ⊟ A₋₂[vi(i)] ⊠ U₊₂[vi(i - 2)], + A₀[vi(i)] - L₋₁ * U₊₁[vi(i - 1)] - A₋₂[vi(i)] * U₊₂[vi(i - 2)], ) Ux[vi(i)] = - inv_D₀ ⊠ - (b[vi(i)] ⊟ L₋₁ ⊠ Ux[vi(i - 1)] ⊟ A₋₂[vi(i)] ⊠ Ux[vi(i - 2)]) - i < n && (U₊₁[vi(i)] = inv_D₀ ⊠ (A₊₁[vi(i)] ⊟ L₋₁ ⊠ U₊₂[vi(i - 1)])) - i < n - 1 && (U₊₂[vi(i)] = inv_D₀ ⊠ A₊₂[vi(i)]) + inv_D₀ * + (b[vi(i)] - L₋₁ * Ux[vi(i - 1)] - A₋₂[vi(i)] * Ux[vi(i - 2)]) + i < n && (U₊₁[vi(i)] = inv_D₀ * (A₊₁[vi(i)] - L₋₁ * U₊₂[vi(i - 1)])) + i < n - 1 && (U₊₂[vi(i)] = inv_D₀ * A₊₂[vi(i)]) end x[vi(n)] = Ux[vi(n)] - x[vi(n - 1)] = Ux[vi(n - 1)] ⊟ U₊₁[vi(n - 1)] ⊠ x[vi(n)] + x[vi(n - 1)] = Ux[vi(n - 1)] - U₊₁[vi(n - 1)] * x[vi(n)] # Avoid steprange on GPU: https://cuda.juliagpu.org/stable/tutorials/performance/#Avoiding-StepRange # for i in (n - 2):-1:1 i = (n - 2) while i ≥ 1 x[vi(i)] = - Ux[vi(i)] ⊟ U₊₁[vi(i)] ⊠ x[vi(i + 1)] ⊟ - U₊₂[vi(i)] ⊠ x[vi(i + 2)] + Ux[vi(i)] - U₊₁[vi(i)] * x[vi(i + 1)] - + U₊₂[vi(i)] * x[vi(i + 2)] i -= 1 end end @@ -266,8 +235,6 @@ eltype(x), eltype(A), and eltype(b): - SVector{N}, SMatrix{N, N}, and SVector{N} - AxisVector with axis A1, Axis2TensorOrAdj with axes (A2, dual(A1)), and AxisVector with axis A2 -- nested type (Tuple or NamedTuple), scalar type (Number, SMatrix, or - Axis2TensorOrAdj), nested type (Tuple or NamedTuple) We might eventually want a single general method for band_matrix_solve!, similar to the BLAS.gbsv function. For now, though, the methods above should be enough. diff --git a/src/Operators/Operators.jl b/src/Operators/Operators.jl index 375b6b5c73..57ccaed22f 100644 --- a/src/Operators/Operators.jl +++ b/src/Operators/Operators.jl @@ -8,6 +8,8 @@ import Base.Broadcast: Broadcasted import ..slab, ..slab_args, ..column, ..column_args import ClimaComms +import ..Utilities: + new, is_auto_broadcastable, add_auto_broadcasters, drop_auto_broadcasters import ..DebugOnly: call_post_op_callback, post_op_callback import ..DataLayouts: DataLayouts, Data2D, DataSlab2D import ..DataLayouts: vindex @@ -18,8 +20,6 @@ import ..Meshes import ..Grids import ..Fields: Fields, Field -using ..RecursiveApply - include("common.jl") include("spectralelement.jl") include("numericalflux.jl") diff --git a/src/Operators/common.jl b/src/Operators/common.jl index e3262dbe04..98f76b94f8 100644 --- a/src/Operators/common.jl +++ b/src/Operators/common.jl @@ -24,7 +24,6 @@ Base.Broadcast.BroadcastStyle( ::Type{<:OperatorBroadcasted{Style}}, ) where {Style} = Style() - # recursively unwrap axes broadcast arguments in a way that is statically reducible by the optimizer @inline axes_args(args::Tuple) = unrolled_map(axes, args) @@ -38,17 +37,14 @@ function Base.axes(opbc::OperatorBroadcasted) opbc.axes end end -function Base.similar(opbc::OperatorBroadcasted, ::Type{Eltype}) where {Eltype} - space = axes(opbc) - return Field(Eltype, space) -end -function Base.copy(opbc::OperatorBroadcasted) - # figure out return type - dest = similar(opbc, eltype(opbc)) - # allocate dest - copyto!(dest, opbc) -end Base.Broadcast.broadcastable(opbc::OperatorBroadcasted) = opbc +Base.copy(opbc::OperatorBroadcasted) = copyto!(similar(opbc), opbc) +Base.similar(opbc::OperatorBroadcasted, ::Type{Eltype}) where {Eltype} = + Field(Eltype, axes(opbc)) + +# Define similar to match DataStyle and AbstractFieldStyle broadcasting +Base.similar(opbc::OperatorBroadcasted) = + similar(opbc, drop_auto_broadcasters(eltype(opbc))) function Base.Broadcast.materialize(opbc::OperatorBroadcasted) copy(Base.Broadcast.instantiate(opbc)) diff --git a/src/Operators/finitedifference.jl b/src/Operators/finitedifference.jl index 3b5a846be3..7b1a321c31 100644 --- a/src/Operators/finitedifference.jl +++ b/src/Operators/finitedifference.jl @@ -167,8 +167,6 @@ struct RightBoundaryWindow{name} <: BoundaryWindow end An abstract type for finite difference operators. Instances of this should define: -- [`getidx_return_type`](@ref) -- [`stencil_return_type`](@ref) - [`return_eltype`](@ref) - [`return_space`](@ref) - [`stencil_interior_width`](@ref) @@ -180,18 +178,6 @@ abstract type FiniteDifferenceOperator <: AbstractOperator end return_eltype(::FiniteDifferenceOperator, arg) = eltype(arg) -""" - getidx_return_type(::Base.Broadcasted) - getidx_return_type(::StencilBroadcasted) - getidx_return_type(::Field) - getidx_return_type(::Any) - ... - -The return type of `getidx` on the arguemnt. -Defaults to the type of the argument. -""" -function getidx_return_type end - # boundary width error fallback @noinline invalid_boundary_condition_error(op_type::Type, bc_type::Type) = error("Boundary `$bc_type` is not supported for operator `$op_type`") @@ -223,18 +209,6 @@ get_boundary( ::RightBoundaryWindow{name}, ) where {name} = get_boundary(op.bcs, name) -has_boundary( - op::FiniteDifferenceOperator, - ::LeftBoundaryWindow{name}, -) where {name} = hasfield(typeof(op.bcs), name) - -has_boundary( - op::FiniteDifferenceOperator, - ::RightBoundaryWindow{name}, -) where {name} = hasfield(typeof(op.bcs), name) - -has_boundary(op::FiniteDifferenceOperator, ::Interior) = false - strip_space(op::FiniteDifferenceOperator, parent_space) = unionall_type(typeof(op))( NamedTuple{keys(op.bcs)}( @@ -352,14 +326,6 @@ Defines the stencil of the operator `Op` in the interior of the domain at `idx`; """ function stencil_interior end -""" - stencil_return_type(::Op, args...) - -The return type of the given stencil and arguments. -""" -function stencil_return_type end - - """ boundary_width(::Op, ::BC, args...) @@ -371,30 +337,29 @@ defined for a specific `Op`/`BC` combination. function boundary_width end """ - stencil_left_boundary(::Op, ::BC, idx, args...) + stencil_left_boundary(op, bc, idx, hidx, args...) -Defines the stencil of operator `Op` at `idx` near the left boundary, with boundary condition `BC`. +The result of stencil operator `op` at horizontal index `hidx` and some vertical +index `idx` near the left boundary, with boundary condition `bc`. For operators +that cannot be evaluated without a boundary condition, using the +`NullBoundaryCondition` will always generate `NaN` values. """ -function stencil_left_boundary end +stencil_left_boundary(op, ::NullBoundaryCondition, space, _, _, args...) = + new(return_eltype(op, args...)) * Spaces.undertype(space)(NaN) """ - stencil_right_boundary(::Op, ::BC, idx, args...) + stencil_right_boundary(op, bc, idx, hidx, args...) -Defines the stencil of operator `Op` at `idx` near the right boundary, with boundary condition `BC`. +The result of stencil operator `op` at horizontal index `hidx` and some vertical +index `idx` near the right boundary, with boundary condition `bc`. For operators +that cannot be evaluated without a boundary condition, using the +`NullBoundaryCondition` will always generate `NaN` values. """ -function stencil_right_boundary end - +stencil_right_boundary(op, ::NullBoundaryCondition, space, _, _, args...) = + new(return_eltype(op, args...)) * Spaces.undertype(space)(NaN) abstract type InterpolationOperator <: FiniteDifferenceOperator end -# single argument interpolation must be the return type of getidx on the -# argument, which should be cheaper / simpler than return_eltype(op, args...) -@inline stencil_return_type(::InterpolationOperator, arg) = - getidx_return_type(arg) - -@inline stencil_return_type(op::FiniteDifferenceOperator, args...) = - return_eltype(op, args...) - function assert_no_bcs(op, kwargs) length(kwargs) == 0 && return nothing error("InterpolateF2C does not accept boundary conditions.") @@ -435,7 +400,7 @@ Base.@propagate_inbounds function stencil_interior( ) a⁺ = getidx(space, arg, idx + half, hidx) a⁻ = getidx(space, arg, idx - half, hidx) - RecursiveApply.rdiv(a⁺ ⊞ a⁻, 2) + (a⁺ + a⁻) / 2 end boundary_width(::InterpolateF2C, ::AbstractBoundaryCondition) = 0 @@ -494,7 +459,7 @@ Base.@propagate_inbounds function stencil_interior( ) a⁺ = getidx(space, arg, idx + half, hidx) a⁻ = getidx(space, arg, idx - half, hidx) - RecursiveApply.rdiv(a⁺ ⊞ a⁻, 2) + (a⁺ + a⁻) / 2 end boundary_width(::InterpolateC2F, ::AbstractBoundaryCondition) = 1 @@ -535,7 +500,7 @@ Base.@propagate_inbounds function stencil_left_boundary( getidx(space, bc.val, nothing, hidx), Geometry.LocalGeometry(space, idx, hidx), ) - a⁺ ⊟ RecursiveApply.rdiv(v₃, 2) + a⁺ - v₃ / 2 end Base.@propagate_inbounds function stencil_right_boundary( ::InterpolateC2F, @@ -551,7 +516,7 @@ Base.@propagate_inbounds function stencil_right_boundary( getidx(space, bc.val, nothing, hidx), Geometry.LocalGeometry(space, idx, hidx), ) - a⁻ ⊞ RecursiveApply.rdiv(v₃, 2) + a⁻ + v₃ / 2 end Base.@propagate_inbounds function stencil_left_boundary( @@ -1071,7 +1036,7 @@ end abstract type WeightedInterpolationOperator <: InterpolationOperator end # TODO: this is not in general correct and the return type -# should be based on the component operator types (rdiv, rmul) but we don't have a good way +# should be based on the component operator types (/, *) but we don't have a good way # of creating ex. one(field_type) for complex fields for inference return_eltype(::WeightedInterpolationOperator, weights, arg) = eltype(arg) @@ -1121,7 +1086,7 @@ Base.@propagate_inbounds function stencil_interior( w⁻ = getidx(space, weight, idx - half, hidx) a⁺ = getidx(space, arg, idx + half, hidx) a⁻ = getidx(space, arg, idx - half, hidx) - RecursiveApply.rdiv((w⁺ ⊠ a⁺) ⊞ (w⁻ ⊠ a⁻), (w⁺ ⊞ w⁻)) + (w⁺ * a⁺ + w⁻ * a⁻) / (w⁺ + w⁻) end boundary_width(::WeightedInterpolateF2C, ::AbstractBoundaryCondition) = 0 @@ -1181,7 +1146,7 @@ Base.@propagate_inbounds function stencil_interior( w⁻ = getidx(space, weight, idx - half, hidx) a⁺ = getidx(space, arg, idx + half, hidx) a⁻ = getidx(space, arg, idx - half, hidx) - RecursiveApply.rdiv((w⁺ ⊠ a⁺) ⊞ (w⁻ ⊠ a⁻), (w⁺ ⊞ w⁻)) + (w⁺ * a⁺ + w⁻ * a⁻) / (w⁺ + w⁻) end boundary_width(::WeightedInterpolateC2F, ::AbstractBoundaryCondition) = 1 @@ -1225,7 +1190,7 @@ Base.@propagate_inbounds function stencil_left_boundary( getidx(space, bc.val, nothing, hidx), Geometry.LocalGeometry(space, idx, hidx), ) - a⁺ ⊟ RecursiveApply.rdiv(v₃, 2) + a⁺ - v₃ / 2 end Base.@propagate_inbounds function stencil_right_boundary( ::WeightedInterpolateC2F, @@ -1242,7 +1207,7 @@ Base.@propagate_inbounds function stencil_right_boundary( getidx(space, bc.val, nothing, hidx), Geometry.LocalGeometry(space, idx, hidx), ) - a⁻ ⊞ RecursiveApply.rdiv(v₃, 2) + a⁻ + v₃ / 2 end Base.@propagate_inbounds function stencil_left_boundary( @@ -1331,13 +1296,7 @@ return_space( arg_space::AllCenterFiniteDifferenceSpace, ) = velocity_space -function upwind_biased_product(v, a⁻, a⁺) - RecursiveApply.rdiv( - ((v ⊞ RecursiveApply.rmap(abs, v)) ⊠ a⁻) ⊞ - ((v ⊟ RecursiveApply.rmap(abs, v)) ⊠ a⁺), - 2, - ) -end +upwind_biased_product(v, a⁻, a⁺) = ((v + abs(v)) * a⁻ + (v - abs(v)) * a⁺) / 2 stencil_interior_width(::UpwindBiasedProductC2F, velocity, arg) = ((0, 0), (-half, half)) @@ -1504,11 +1463,11 @@ function compute_Δ𝛼_linvanleer(a⁻, a⁰, a⁺, v, dt, ::MonotoneHarmonic) if sign(a⁰ - a⁻) == sign(a⁺ - a⁰) && Δ𝜙_avg != 0 return ((a⁰ - a⁻) * (a⁺ - a⁰)) / (Δ𝜙_avg) * (1 - c) else - return eltype(v)(0) + return zero(v) end end -posdiff(x, y) = ifelse(x - y ≥ 0, x - y, eltype(x)(0)) +posdiff(x, y) = ifelse(x - y ≥ 0, x - y, zero(x)) function compute_Δ𝛼_linvanleer(a⁻, a⁰, a⁺, v, dt, ::PositiveDefinite) Δ𝜙_avg = ((a⁰ - a⁻) + (a⁺ - a⁰)) / 2 @@ -1529,11 +1488,11 @@ function slope_limited_product(v, a⁻, a⁻⁻, a⁺, a⁺⁺, dt, constraint) if v >= 0 # Eqn (2,5a,5b,5c) Δ𝛼 = compute_Δ𝛼_linvanleer(a⁻⁻, a⁻, a⁺, v, dt, constraint) - return v ⊠ (a⁻ ⊞ RecursiveApply.rdiv(Δ𝛼, 2)) + return v * (a⁻ + Δ𝛼 / 2) else # Eqn (2,5a,5b,5c) Δ𝛼 = compute_Δ𝛼_linvanleer(a⁻, a⁺, a⁺⁺, v, dt, constraint) - return v ⊠ (a⁺ ⊟ RecursiveApply.rdiv(Δ𝛼, 2)) + return v * (a⁺ - Δ𝛼 / 2) end end @@ -1699,13 +1658,11 @@ return_space( arg_space::AllCenterFiniteDifferenceSpace, ) = velocity_space -function upwind_3rdorder_biased_product(v, a⁻, a⁻⁻, a⁺, a⁺⁺) - RecursiveApply.rdiv( - (v ⊠ (7 ⊠ (a⁺ + a⁻) ⊟ (a⁺⁺ + a⁻⁻))) ⊟ - (RecursiveApply.rmap(abs, v) ⊠ (3 ⊠ (a⁺ - a⁻) ⊟ (a⁺⁺ - a⁻⁻))), - 12, - ) -end +upwind_3rdorder_biased_product(v, a⁻, a⁻⁻, a⁺, a⁺⁺) = + ( + v * (7 * (a⁺ + a⁻) - (a⁺⁺ + a⁻⁻)) - + abs(v) * (3 * (a⁺ - a⁻) - (a⁺⁺ - a⁻⁻)) + ) / 12 stencil_interior_width(::Upwind3rdOrderBiasedProductC2F, velocity, arg) = ((0, 0), (-half - 1, half + 1)) @@ -1864,33 +1821,13 @@ return_space( arg_space::AllCenterFiniteDifferenceSpace, ) = velocity_space -function fct_boris_book(v, a⁻⁻, a⁻, a⁺, a⁺⁺) - if v != zero(eltype(v)) - sign(v) ⊠ (RecursiveApply.rmap( - max, - zero(eltype(v)), - RecursiveApply.rmap( - min, - RecursiveApply.rmap(abs, v), - RecursiveApply.rmap( - min, - sign(v) ⊠ (a⁺⁺ - a⁺), - sign(v) ⊠ (a⁻ - a⁻⁻), - ), - ), - )) - else - RecursiveApply.rmap( - max, - zero(eltype(v)), - RecursiveApply.rmap( - min, - v, - RecursiveApply.rmap(min, (a⁺⁺ - a⁺), (a⁻ - a⁻⁻)), - ), - ) - end -end +fct_boris_book(v, a⁻⁻, a⁻, a⁺, a⁺⁺) = + ifelse( + iszero(v), + max(v, min(v, a⁺⁺ - a⁺, a⁻ - a⁻⁻)), + sign(v) * + max(zero(v), min(abs(v), sign(v) * (a⁺⁺ - a⁺), sign(v) * (a⁻ - a⁻⁻))), + ) stencil_interior_width(::FCTBorisBook, velocity, arg) = ((0, 0), (-half - 1, half + 1)) @@ -2003,63 +1940,6 @@ return_space( Φᵗᵈ_space::AllCenterFiniteDifferenceSpace, ) = A_space -function fct_zalesak( - Aⱼ₋₁₂, - Aⱼ₊₁₂, - Aⱼ₊₃₂, - ϕⱼ₋₁, - ϕⱼ, - ϕⱼ₊₁, - ϕⱼ₊₂, - ϕⱼ₋₁ᵗᵈ, - ϕⱼᵗᵈ, - ϕⱼ₊₁ᵗᵈ, - ϕⱼ₊₂ᵗᵈ, -) - # 1/dt is in ϕⱼ₋₁, ϕⱼ, ϕⱼ₊₁, ϕⱼ₊₂, ϕⱼ₋₁ᵗᵈ, ϕⱼᵗᵈ, ϕⱼ₊₁ᵗᵈ, ϕⱼ₊₂ᵗᵈ - - stable_zero = zero(eltype(Aⱼ₊₁₂)) - stable_one = one(eltype(Aⱼ₊₁₂)) - - # 𝒮5.4.2 (1) Durran (5.32) Zalesak's cosmetic correction - # which is usually omitted but used in Durran's textbook - # implementation of the flux corrected transport method. - # (Textbook suggests mixed results in 3 reported scenarios) - if ( - Aⱼ₊₁₂ * (ϕⱼ₊₁ᵗᵈ - ϕⱼᵗᵈ) < stable_zero && ( - Aⱼ₊₁₂ * (ϕⱼ₊₂ᵗᵈ - ϕⱼ₊₁ᵗᵈ) < stable_zero || - Aⱼ₊₁₂ * (ϕⱼᵗᵈ - ϕⱼ₋₁ᵗᵈ) < stable_zero - ) - ) - Aⱼ₊₁₂ = stable_zero - end - - # 𝒮5.4.2 (2) - # If flow is nondivergent, ϕᵗᵈ are not needed in the formulae below - ϕⱼᵐᵃˣ = max(ϕⱼ₋₁, ϕⱼ, ϕⱼ₊₁, ϕⱼ₋₁ᵗᵈ, ϕⱼᵗᵈ, ϕⱼ₊₁ᵗᵈ) - ϕⱼᵐⁱⁿ = min(ϕⱼ₋₁, ϕⱼ, ϕⱼ₊₁, ϕⱼ₋₁ᵗᵈ, ϕⱼᵗᵈ, ϕⱼ₊₁ᵗᵈ) - Pⱼ⁺ = max(stable_zero, Aⱼ₋₁₂) - min(stable_zero, Aⱼ₊₁₂) - # Zalesak also requires, in equation (5.33) Δx/Δt, which for the - # reference element we may assume Δζ = 1 between interfaces - Qⱼ⁺ = (ϕⱼᵐᵃˣ - ϕⱼᵗᵈ) - Rⱼ⁺ = (Pⱼ⁺ > stable_zero ? min(stable_one, Qⱼ⁺ / Pⱼ⁺) : stable_zero) - Pⱼ⁻ = max(stable_zero, Aⱼ₊₁₂) - min(stable_zero, Aⱼ₋₁₂) - Qⱼ⁻ = (ϕⱼᵗᵈ - ϕⱼᵐⁱⁿ) - Rⱼ⁻ = (Pⱼ⁻ > stable_zero ? min(stable_one, Qⱼ⁻ / Pⱼ⁻) : stable_zero) - ϕⱼ₊₁ᵐᵃˣ = max(ϕⱼ, ϕⱼ₊₁, ϕⱼ₊₂, ϕⱼᵗᵈ, ϕⱼ₊₁ᵗᵈ, ϕⱼ₊₂ᵗᵈ) - ϕⱼ₊₁ᵐⁱⁿ = min(ϕⱼ, ϕⱼ₊₁, ϕⱼ₊₂, ϕⱼᵗᵈ, ϕⱼ₊₁ᵗᵈ, ϕⱼ₊₂ᵗᵈ) - Pⱼ₊₁⁺ = max(stable_zero, Aⱼ₊₁₂) - min(stable_zero, Aⱼ₊₃₂) - Qⱼ₊₁⁺ = (ϕⱼ₊₁ᵐᵃˣ - ϕⱼ₊₁ᵗᵈ) - Rⱼ₊₁⁺ = (Pⱼ₊₁⁺ > stable_zero ? min(stable_one, Qⱼ₊₁⁺ / Pⱼ₊₁⁺) : stable_zero) - Pⱼ₊₁⁻ = max(stable_zero, Aⱼ₊₃₂) - min(stable_zero, Aⱼ₊₁₂) - Qⱼ₊₁⁻ = (ϕⱼ₊₁ᵗᵈ - ϕⱼ₊₁ᵐⁱⁿ) - Rⱼ₊₁⁻ = (Pⱼ₊₁⁻ > stable_zero ? min(stable_one, Qⱼ₊₁⁻ / Pⱼ₊₁⁻) : stable_zero) - - Cⱼ₊₁₂ = (Aⱼ₊₁₂ ≥ stable_zero ? min(Rⱼ₊₁⁺, Rⱼ⁻) : min(Rⱼ⁺, Rⱼ₊₁⁻)) - - return Cⱼ₊₁₂ * Aⱼ₊₁₂ -end - stencil_interior_width(::FCTZalesak, A_space, Φ_space, Φᵗᵈ_space) = ((-1, 1), (-half - 1, half + 1), (-half - 1, half + 1)) @@ -2072,45 +1952,57 @@ Base.@propagate_inbounds function stencil_interior( Φ_field, Φᵗᵈ_field, ) - # cell center variables - ϕⱼ₋₁ = getidx(space, Φ_field, idx - half - 1, hidx) - ϕⱼ = getidx(space, Φ_field, idx - half, hidx) - ϕⱼ₊₁ = getidx(space, Φ_field, idx + half, hidx) - ϕⱼ₊₂ = getidx(space, Φ_field, idx + half + 1, hidx) - # cell center variables - ϕⱼ₋₁ᵗᵈ = getidx(space, Φᵗᵈ_field, idx - half - 1, hidx) - ϕⱼᵗᵈ = getidx(space, Φᵗᵈ_field, idx - half, hidx) - ϕⱼ₊₁ᵗᵈ = getidx(space, Φᵗᵈ_field, idx + half, hidx) - ϕⱼ₊₂ᵗᵈ = getidx(space, Φᵗᵈ_field, idx + half + 1, hidx) - # cell face variables - Aⱼ₊₁₂ = Geometry.contravariant3( - getidx(space, A_field, idx, hidx), - Geometry.LocalGeometry(space, idx, hidx), - ) - Aⱼ₋₁₂ = Geometry.contravariant3( - getidx(space, A_field, idx - 1, hidx), - Geometry.LocalGeometry(space, idx - 1, hidx), - ) - Aⱼ₊₃₂ = Geometry.contravariant3( - getidx(space, A_field, idx + 1, hidx), - Geometry.LocalGeometry(space, idx + 1, hidx), - ) + # 1/dt is in ϕ₋₃₂, ϕ₋₁₂, ϕ₊₁₂, ϕ₊₃₂, ϕ₋₃₂ᵗᵈ, ϕ₋₁₂ᵗᵈ, ϕ₊₁₂ᵗᵈ, ϕ₊₃₂ᵗᵈ + ϕ₋₃₂ = getidx(space, Φ_field, idx - half - 1, hidx) + ϕ₋₁₂ = getidx(space, Φ_field, idx - half, hidx) + ϕ₊₁₂ = getidx(space, Φ_field, idx + half, hidx) + ϕ₊₃₂ = getidx(space, Φ_field, idx + half + 1, hidx) + ϕ₋₃₂ᵗᵈ = getidx(space, Φᵗᵈ_field, idx - half - 1, hidx) + ϕ₋₁₂ᵗᵈ = getidx(space, Φᵗᵈ_field, idx - half, hidx) + ϕ₊₁₂ᵗᵈ = getidx(space, Φᵗᵈ_field, idx + half, hidx) + ϕ₊₃₂ᵗᵈ = getidx(space, Φᵗᵈ_field, idx + half + 1, hidx) + + lg₋₁ = Geometry.LocalGeometry(space, idx - 1, hidx) + lg = Geometry.LocalGeometry(space, idx, hidx) + lg₊₁ = Geometry.LocalGeometry(space, idx + 1, hidx) + A₋₁ = Geometry.contravariant3(getidx(space, A_field, idx - 1, hidx), lg₋₁) + A = Geometry.contravariant3(getidx(space, A_field, idx, hidx), lg) + A₊₁ = Geometry.contravariant3(getidx(space, A_field, idx + 1, hidx), lg₊₁) - return Geometry.Contravariant3Vector( - fct_zalesak( - Aⱼ₋₁₂, - Aⱼ₊₁₂, - Aⱼ₊₃₂, - ϕⱼ₋₁, - ϕⱼ, - ϕⱼ₊₁, - ϕⱼ₊₂, - ϕⱼ₋₁ᵗᵈ, - ϕⱼᵗᵈ, - ϕⱼ₊₁ᵗᵈ, - ϕⱼ₊₂ᵗᵈ, - ), + # 𝒮5.4.2 (1) Durran (5.32) Zalesak's cosmetic correction + # which is usually omitted but used in Durran's textbook + # implementation of the flux corrected transport method. + # (Textbook suggests mixed results in 3 reported scenarios) + A = ifelse( + max( + A * (ϕ₊₁₂ᵗᵈ - ϕ₋₁₂ᵗᵈ), + min(A * (ϕ₊₃₂ᵗᵈ - ϕ₊₁₂ᵗᵈ), A * (ϕ₋₁₂ᵗᵈ - ϕ₋₃₂ᵗᵈ)), + ) >= 0, + A, + zero(A), ) + + P₋₁₂⁻ = max(0, A) - min(0, A₋₁) + P₋₁₂⁺ = max(0, A₋₁) - min(0, A) + P₊₁₂⁻ = max(0, A₊₁) - min(0, A) + P₊₁₂⁺ = max(0, A) - min(0, A₊₁) + + # 𝒮5.4.2 (2) + # If flow is nondivergent, ϕᵗᵈ are not needed in the formulae below + ϕ₋₁₂ᵐᵃˣ = max(ϕ₋₃₂, ϕ₋₁₂, ϕ₊₁₂, ϕ₋₃₂ᵗᵈ, ϕ₋₁₂ᵗᵈ, ϕ₊₁₂ᵗᵈ) + ϕ₋₁₂ᵐⁱⁿ = min(ϕ₋₃₂, ϕ₋₁₂, ϕ₊₁₂, ϕ₋₃₂ᵗᵈ, ϕ₋₁₂ᵗᵈ, ϕ₊₁₂ᵗᵈ) + ϕ₊₁₂ᵐᵃˣ = max(ϕ₋₁₂, ϕ₊₁₂, ϕ₊₃₂, ϕ₋₁₂ᵗᵈ, ϕ₊₁₂ᵗᵈ, ϕ₊₃₂ᵗᵈ) + ϕ₊₁₂ᵐⁱⁿ = min(ϕ₋₁₂, ϕ₊₁₂, ϕ₊₃₂, ϕ₋₁₂ᵗᵈ, ϕ₊₁₂ᵗᵈ, ϕ₊₃₂ᵗᵈ) + + # Zalesak also requires, in equation (5.33) Δx/Δt, which for the + # reference element we may assume Δζ = 1 between interfaces + R₋₁₂⁻ = ifelse(P₋₁₂⁻ > 0, min(1, (ϕ₋₁₂ᵗᵈ - ϕ₋₁₂ᵐⁱⁿ) / P₋₁₂⁻), zero(A)) + R₋₁₂⁺ = ifelse(P₋₁₂⁺ > 0, min(1, (ϕ₋₁₂ᵐᵃˣ - ϕ₋₁₂ᵗᵈ) / P₋₁₂⁺), zero(A)) + R₊₁₂⁻ = ifelse(P₊₁₂⁻ > 0, min(1, (ϕ₊₁₂ᵗᵈ - ϕ₊₁₂ᵐⁱⁿ) / P₊₁₂⁻), zero(A)) + R₊₁₂⁺ = ifelse(P₊₁₂⁺ > 0, min(1, (ϕ₊₁₂ᵐᵃˣ - ϕ₊₁₂ᵗᵈ) / P₊₁₂⁺), zero(A)) + + A_fct = ifelse(A >= 0, min(R₊₁₂⁺, R₋₁₂⁻), min(R₋₁₂⁺, R₊₁₂⁻)) * A + return Geometry.Contravariant3Vector(A_fct) end boundary_width(::FCTZalesak, ::AbstractBoundaryCondition) = 2 @@ -2164,6 +2056,7 @@ A subtype of [`AbstractTVDSlopeLimiter`](@ref) limiter. See `TVDLimitedFluxC2F` for the general formulation. """ struct RZeroLimiter <: AbstractTVDSlopeLimiter end +limiter_coeff(r, ::RZeroLimiter) = zero(r) """ U = RHalfLimiter(;boundaries) @@ -2173,6 +2066,7 @@ A subtype of [`AbstractTVDSlopeLimiter`](@ref) limiter. See `TVDLimitedFluxC2F` for the general formulation. """ struct RHalfLimiter <: AbstractTVDSlopeLimiter end +limiter_coeff(r, ::RHalfLimiter) = one(r) / 2 """ U = RMaxLimiter(;boundaries) @@ -2182,6 +2076,7 @@ A subtype of [`AbstractTVDSlopeLimiter`](@ref) limiter. See `TVDLimitedFluxC2F` for the general formulation. """ struct RMaxLimiter <: AbstractTVDSlopeLimiter end +limiter_coeff(r, ::RMaxLimiter) = one(r) """ U = MinModLimiter(;boundaries) @@ -2191,6 +2086,7 @@ A subtype of [`AbstractTVDSlopeLimiter`](@ref) limiter. See `TVDLimitedFluxC2F` for the general formulation. """ struct MinModLimiter <: AbstractTVDSlopeLimiter end +limiter_coeff(r, ::MinModLimiter) = max(0, min(1, r)) """ U = KorenLimiter(;boundaries) @@ -2200,6 +2096,7 @@ A subtype of [`AbstractTVDSlopeLimiter`](@ref) limiter. See `TVDLimitedFluxC2F` for the general formulation. """ struct KorenLimiter <: AbstractTVDSlopeLimiter end +limiter_coeff(r, ::KorenLimiter) = max(0, min(2r, (1 + 2r) / 3, 2)) """ U = SuperbeeLimiter(;boundaries) @@ -2209,6 +2106,7 @@ A subtype of [`AbstractTVDSlopeLimiter`](@ref) limiter. See `TVDLimitedFluxC2F` for the general formulation. """ struct SuperbeeLimiter <: AbstractTVDSlopeLimiter end +limiter_coeff(r, ::SuperbeeLimiter) = max(0, min(1, r), min(2, r)) """ U = MonotonizedCentralLimiter(;boundaries) @@ -2218,34 +2116,7 @@ A subtype of [`AbstractTVDSlopeLimiter`](@ref) limiter. See `TVDLimitedFluxC2F` for the general formulation. """ struct MonotonizedCentralLimiter <: AbstractTVDSlopeLimiter end - -@inline function compute_limiter_coeff(r, ::RZeroLimiter) - return zero(eltype(r)) -end - -@inline function compute_limiter_coeff(r, ::RHalfLimiter) - return one(eltype(r)) * 1 / 2 -end - -@inline function compute_limiter_coeff(r, ::RMaxLimiter) - return one(eltype(r)) -end - -@inline function compute_limiter_coeff(r, ::MinModLimiter) - return max(zero(eltype(r)), min(one(eltype(r)), r)) -end - -@inline function compute_limiter_coeff(r, ::KorenLimiter) - return max(zero(eltype(r)), min(2r, min(1 / 3 + 2r / 3, 2))) -end - -@inline function compute_limiter_coeff(r, ::SuperbeeLimiter) - return max(zero(eltype(r)), min(one(eltype(r)), r), min(2, r)) -end - -@inline function compute_limiter_coeff(r, ::MonotonizedCentralLimiter) - return max(zero(eltype(r)), min(2r, (1 + r) / 2, 2)) -end +limiter_coeff(r, ::MonotonizedCentralLimiter) = max(0, min(2r, (1 + r) / 2, 2)) """ TVDLimitedFluxC2F{BCS, M} <: AdvectionOperator @@ -2305,15 +2176,6 @@ return_space( u_space::AllFaceFiniteDifferenceSpace, ) = A_space -function tvd_limited_flux(Aⱼ₋₁₂, Aⱼ₊₁₂, ϕⱼ₋₁, ϕⱼ, ϕⱼ₊₁, ϕⱼ₊₂, rⱼ₊₁₂, constraint) - stable_zero = zero(eltype(Aⱼ₊₁₂)) - stable_one = one(eltype(Aⱼ₊₁₂)) - Cⱼ₊₁₂ = compute_limiter_coeff(rⱼ₊₁₂, constraint) - @assert Cⱼ₊₁₂ <= 2 - @assert Cⱼ₊₁₂ >= 0 - return Cⱼ₊₁₂ * Aⱼ₊₁₂ -end - stencil_interior_width(::TVDLimitedFluxC2F, A_space, Φ_space, u_space) = ((-1, 1), (-half - 1, half + 1), (-1, +1)) @@ -2326,38 +2188,20 @@ Base.@propagate_inbounds function stencil_interior( Φ_field, 𝓊_field, ) - # cell center variables - ϕⱼ₋₁ = getidx(space, Φ_field, idx - half - 1, hidx) - ϕⱼ = getidx(space, Φ_field, idx - half, hidx) - ϕⱼ₊₁ = getidx(space, Φ_field, idx + half, hidx) - ϕⱼ₊₂ = getidx(space, Φ_field, idx + half + 1, hidx) - 𝓊 = Geometry.contravariant3( - getidx(space, 𝓊_field, idx, hidx), - Geometry.LocalGeometry(space, idx, hidx), - ) - # cell face variables - Aⱼ₊₁₂ = Geometry.contravariant3( - getidx(space, A_field, idx, hidx), - Geometry.LocalGeometry(space, idx, hidx), - ) - Aⱼ₋₁₂ = Geometry.contravariant3( - getidx(space, A_field, idx - 1, hidx), - Geometry.LocalGeometry(space, idx - 1, hidx), - ) - # See filter options below - rⱼ₊₁₂ = compute_slope_ratio(ϕⱼ, ϕⱼ₋₁, ϕⱼ₊₁, ϕⱼ₊₂, 𝓊) + ϕ₋₃₂ = getidx(space, Φ_field, idx - half - 1, hidx) + ϕ₋₁₂ = getidx(space, Φ_field, idx - half, hidx) + ϕ₊₁₂ = getidx(space, Φ_field, idx + half, hidx) + ϕ₊₃₂ = getidx(space, Φ_field, idx + half + 1, hidx) - return Geometry.Contravariant3Vector( - tvd_limited_flux(Aⱼ₋₁₂, Aⱼ₊₁₂, ϕⱼ₋₁, ϕⱼ, ϕⱼ₊₁, ϕⱼ₊₂, rⱼ₊₁₂, op.method), - ) -end + lg = Geometry.LocalGeometry(space, idx, hidx) + 𝓊 = Geometry.contravariant3(getidx(space, 𝓊_field, idx, hidx), lg) + A = Geometry.contravariant3(getidx(space, A_field, idx, hidx), lg) -@inline function compute_slope_ratio(ϕⱼ, ϕⱼ₋₁, ϕⱼ₊₁, ϕⱼ₊₂, 𝓊) - if 𝓊 >= 0 - return (ϕⱼ - ϕⱼ₋₁) / (ϕⱼ₊₁ - ϕⱼ + eps(eltype(ϕⱼ))) - else - return (ϕⱼ₊₂ - ϕⱼ₊₁) / (ϕⱼ₊₁ - ϕⱼ + eps(eltype(ϕⱼ))) - end + Δϕ = ϕ₊₁₂ - ϕ₋₁₂ + eps(typeof(ϕ₋₁₂)) + # Δϕ_clipped = sign(Δϕ) * max(abs(Δϕ), eps(typeof(Δϕ))) + r = ifelse(𝓊 >= 0, ϕ₋₁₂ - ϕ₋₃₂, ϕ₊₃₂ - ϕ₊₁₂) / Δϕ # Δϕ_clipped + + return Geometry.Contravariant3Vector(limiter_coeff(r, op.method) * A) end boundary_width(::TVDLimitedFluxC2F, ::AbstractBoundaryCondition) = 2 @@ -2436,8 +2280,8 @@ Base.@propagate_inbounds function stencil_interior( getidx(space, velocity, idx, hidx), Geometry.LocalGeometry(space, idx, hidx), ) - ∂θ₃ = RecursiveApply.rdiv(θ⁺ ⊟ θ⁻, 2) - return w³ ⊠ ∂θ₃ + ∂θ₃ = (θ⁺ - θ⁻) / 2 + return w³ * ∂θ₃ end boundary_width(::AdvectionF2F, ::AbstractBoundaryCondition) = 1 @@ -2502,9 +2346,9 @@ Base.@propagate_inbounds function stencil_interior( getidx(space, velocity, idx - half, hidx), Geometry.LocalGeometry(space, idx - half, hidx), ) - ∂θ₃⁺ = θ⁺ ⊟ θ - ∂θ₃⁻ = θ ⊟ θ⁻ - return RecursiveApply.rdiv((w³⁺ ⊠ ∂θ₃⁺) ⊞ (w³⁻ ⊠ ∂θ₃⁻), 2) + ∂θ₃⁺ = θ⁺ - θ + ∂θ₃⁻ = θ - θ⁻ + return (w³⁺ * ∂θ₃⁺ + w³⁻ * ∂θ₃⁻) / 2 end boundary_width(::AdvectionC2C, ::AbstractBoundaryCondition) = 1 @@ -2529,9 +2373,9 @@ Base.@propagate_inbounds function stencil_left_boundary( getidx(space, velocity, idx - half, hidx), Geometry.LocalGeometry(space, idx - half, hidx), ) - ∂θ₃⁺ = θ⁺ ⊟ θ - ∂θ₃⁻ = 2 ⊠ (θ ⊟ θ⁻) - return RecursiveApply.rdiv((w³⁺ ⊠ ∂θ₃⁺) ⊞ (w³⁻ ⊠ ∂θ₃⁻), 2) + ∂θ₃⁺ = θ⁺ - θ + ∂θ₃⁻ = 2 * (θ - θ⁻) + return (w³⁺ * ∂θ₃⁺ + w³⁻ * ∂θ₃⁻) / 2 end Base.@propagate_inbounds function stencil_right_boundary( ::AdvectionC2C, @@ -2554,9 +2398,9 @@ Base.@propagate_inbounds function stencil_right_boundary( getidx(space, velocity, idx - half, hidx), Geometry.LocalGeometry(space, idx - half, hidx), ) - ∂θ₃⁺ = 2 ⊠ (θ⁺ ⊟ θ) - ∂θ₃⁻ = θ ⊟ θ⁻ - return RecursiveApply.rdiv((w³⁺ ⊠ ∂θ₃⁺) ⊞ (w³⁻ ⊠ ∂θ₃⁻), 2) + ∂θ₃⁺ = 2 * (θ⁺ - θ) + ∂θ₃⁻ = θ - θ⁻ + return (w³⁺ * ∂θ₃⁺ + w³⁻ * ∂θ₃⁻) / 2 end Base.@propagate_inbounds function stencil_left_boundary( @@ -2575,8 +2419,8 @@ Base.@propagate_inbounds function stencil_left_boundary( getidx(space, velocity, idx + half, hidx), Geometry.LocalGeometry(space, idx + half, hidx), ) - ∂θ₃⁺ = θ⁺ ⊟ θ - return (w³⁺ ⊠ ∂θ₃⁺) + ∂θ₃⁺ = θ⁺ - θ + return (w³⁺ * ∂θ₃⁺) end Base.@propagate_inbounds function stencil_right_boundary( ::AdvectionC2C, @@ -2594,8 +2438,8 @@ Base.@propagate_inbounds function stencil_right_boundary( getidx(space, velocity, idx - half, hidx), Geometry.LocalGeometry(space, idx - half, hidx), ) - ∂θ₃⁻ = θ ⊟ θ⁻ - return (w³⁻ ⊠ ∂θ₃⁻) + ∂θ₃⁻ = θ - θ⁻ + return (w³⁻ * ∂θ₃⁻) end """ @@ -2651,9 +2495,9 @@ Base.@propagate_inbounds function stencil_interior( getidx(space, velocity, idx - half, hidx), Geometry.LocalGeometry(space, idx - half, hidx), ) - ∂θ₃⁺ = θ⁺ ⊟ θ - ∂θ₃⁻ = θ ⊟ θ⁻ - return (abs(w³⁺) ⊠ ∂θ₃⁺) ⊟ (abs(w³⁻) ⊠ ∂θ₃⁻) + ∂θ₃⁺ = θ⁺ - θ + ∂θ₃⁻ = θ - θ⁻ + return abs(w³⁺) * ∂θ₃⁺ - abs(w³⁻) * ∂θ₃⁻ end boundary_width(::FluxCorrectionC2C, ::AbstractBoundaryCondition) = 1 @@ -2673,8 +2517,8 @@ Base.@propagate_inbounds function stencil_left_boundary( getidx(space, velocity, idx + half, hidx), Geometry.LocalGeometry(space, idx + half, hidx), ) - ∂θ₃⁺ = θ⁺ ⊟ θ - return (abs(w³⁺) ⊠ ∂θ₃⁺) + ∂θ₃⁺ = θ⁺ - θ + return abs(w³⁺) * ∂θ₃⁺ end Base.@propagate_inbounds function stencil_right_boundary( ::FluxCorrectionC2C, @@ -2692,8 +2536,8 @@ Base.@propagate_inbounds function stencil_right_boundary( getidx(space, velocity, idx - half, hidx), Geometry.LocalGeometry(space, idx - half, hidx), ) - ∂θ₃⁻ = θ ⊟ θ⁻ - return ⊟(abs(w³⁻) ⊠ ∂θ₃⁻) + ∂θ₃⁻ = θ - θ⁻ + return -abs(w³⁻) * ∂θ₃⁻ end """ @@ -2749,9 +2593,9 @@ Base.@propagate_inbounds function stencil_interior( getidx(space, velocity, idx - half, hidx), Geometry.LocalGeometry(space, idx - half, hidx), ) - ∂θ₃⁺ = θ⁺ ⊟ θ - ∂θ₃⁻ = θ ⊟ θ⁻ - return (abs(w³⁺) ⊠ ∂θ₃⁺) ⊟ (abs(w³⁻) ⊠ ∂θ₃⁻) + ∂θ₃⁺ = θ⁺ - θ + ∂θ₃⁻ = θ - θ⁻ + return abs(w³⁺) * ∂θ₃⁺ - abs(w³⁻) * ∂θ₃⁻ end boundary_width(::FluxCorrectionF2F, ::AbstractBoundaryCondition) = 1 @@ -2771,8 +2615,8 @@ Base.@propagate_inbounds function stencil_left_boundary( getidx(space, velocity, idx + half, hidx), Geometry.LocalGeometry(space, idx + half, hidx), ) - ∂θ₃⁺ = θ⁺ ⊟ θ - return (abs(w³⁺) ⊠ ∂θ₃⁺) + ∂θ₃⁺ = θ⁺ - θ + return abs(w³⁺) * ∂θ₃⁺ end Base.@propagate_inbounds function stencil_right_boundary( ::FluxCorrectionF2F, @@ -2790,8 +2634,8 @@ Base.@propagate_inbounds function stencil_right_boundary( getidx(space, velocity, idx - half, hidx), Geometry.LocalGeometry(space, idx - half, hidx), ) - ∂θ₃⁻ = θ ⊟ θ⁻ - return ⊟(abs(w³⁻) ⊠ ∂θ₃⁻) + ∂θ₃⁻ = θ - θ⁻ + return -abs(w³⁻) * ∂θ₃⁻ end @@ -2905,7 +2749,7 @@ Base.@propagate_inbounds function stencil_interior( arg, ) Geometry.Covariant3Vector(1) ⊗ ( - getidx(space, arg, idx + half, hidx) ⊟ + getidx(space, arg, idx + half, hidx) - getidx(space, arg, idx - half, hidx) ) end @@ -2923,7 +2767,7 @@ Base.@propagate_inbounds function stencil_left_boundary( ) @assert idx == left_center_boundary_idx(space) Geometry.Covariant3Vector(1) ⊗ ( - getidx(space, arg, idx + half, hidx) ⊟ + getidx(space, arg, idx + half, hidx) - getidx(space, bc.val, nothing, hidx) ) end @@ -2937,7 +2781,7 @@ Base.@propagate_inbounds function stencil_right_boundary( ) @assert idx == right_center_boundary_idx(space) Geometry.Covariant3Vector(1) ⊗ ( - getidx(space, bc.val, nothing, hidx) ⊟ + getidx(space, bc.val, nothing, hidx) - getidx(space, arg, idx - half, hidx) ) end @@ -3017,7 +2861,7 @@ Base.@propagate_inbounds function stencil_interior( arg, ) Geometry.Covariant3Vector(1) ⊗ ( - getidx(space, arg, idx + half, hidx) ⊟ + getidx(space, arg, idx + half, hidx) - getidx(space, arg, idx - half, hidx) ) end @@ -3034,7 +2878,7 @@ Base.@propagate_inbounds function stencil_left_boundary( @assert idx == left_face_boundary_idx(space) # ∂x[i] = 2(∂x[i + half] - val) Geometry.Covariant3Vector(2) ⊗ ( - getidx(space, arg, idx + half, hidx) ⊟ + getidx(space, arg, idx + half, hidx) - getidx(space, bc.val, nothing, hidx) ) end @@ -3048,7 +2892,7 @@ Base.@propagate_inbounds function stencil_right_boundary( ) @assert idx == right_face_boundary_idx(space) Geometry.Covariant3Vector(2) ⊗ ( - getidx(space, bc.val, nothing, hidx) ⊟ + getidx(space, bc.val, nothing, hidx) - getidx(space, arg, idx - half, hidx) ) end @@ -3156,7 +3000,7 @@ Base.@propagate_inbounds function stencil_interior( Geometry.LocalGeometry(space, idx, hidx), ) return Geometry.Covariant3Vector(1) ⊗ - ((1 - sign(v)) / 2 ⊠ a⁺ + sign(v) ⊠ a - (1 + sign(v)) / 2 ⊠ a⁻) + ((1 - sign(v)) / 2 * a⁺ + sign(v) * a - (1 + sign(v)) / 2 * a⁻) end boundary_width(::UpwindBiasedGradient, ::AbstractBoundaryCondition) = 1 @@ -3171,7 +3015,7 @@ Base.@propagate_inbounds function stencil_left_boundary( @assert idx == left_face_boundary_idx(space) a⁺ = getidx(space, arg, idx + 1, hidx) a = getidx(space, arg, idx, hidx) - return Geometry.Covariant3Vector(1) ⊗ (a⁺ ⊟ a) + return Geometry.Covariant3Vector(1) ⊗ (a⁺ - a) end Base.@propagate_inbounds function stencil_right_boundary( ::UpwindBiasedGradient, @@ -3184,7 +3028,7 @@ Base.@propagate_inbounds function stencil_right_boundary( @assert idx == right_face_boundary_idx(space) a = getidx(space, arg, idx, hidx) a⁻ = getidx(space, arg, idx - 1, hidx) - return Geometry.Covariant3Vector(1) ⊗ (a ⊟ a⁻) + return Geometry.Covariant3Vector(1) ⊗ (a - a⁻) end abstract type DivergenceOperator <: FiniteDifferenceOperator end @@ -3253,7 +3097,7 @@ Base.@propagate_inbounds function stencil_interior( getidx(space, arg, idx - half, hidx), Geometry.LocalGeometry(space, idx - half, hidx), ) - (Ju³₊ ⊟ Ju³₋) ⊠ local_geometry.invJ + (Ju³₊ - Ju³₋) * local_geometry.invJ end boundary_width(::DivergenceF2C, ::AbstractBoundaryCondition) = 0 @@ -3276,7 +3120,7 @@ Base.@propagate_inbounds function stencil_left_boundary( getidx(space, bc.val, nothing, hidx), Geometry.LocalGeometry(space, idx - half, hidx), ) - (Ju³₊ ⊟ Ju³₋) ⊠ local_geometry.invJ + (Ju³₊ - Ju³₋) * local_geometry.invJ end Base.@propagate_inbounds function stencil_right_boundary( ::DivergenceF2C, @@ -3296,7 +3140,7 @@ Base.@propagate_inbounds function stencil_right_boundary( getidx(space, arg, idx - half, hidx), Geometry.LocalGeometry(space, idx - half, hidx), ) - (Ju³₊ ⊟ Ju³₋) ⊠ local_geometry.invJ + (Ju³₊ - Ju³₋) * local_geometry.invJ end boundary_width(::DivergenceF2C, ::SetDivergence) = 1 @@ -3425,7 +3269,7 @@ Base.@propagate_inbounds function stencil_interior( getidx(space, arg, idx - half, hidx), Geometry.LocalGeometry(space, idx - half, hidx), ) - (Ju³₊ ⊟ Ju³₋) ⊠ local_geometry.invJ + (Ju³₊ - Ju³₋) * local_geometry.invJ end boundary_width(::DivergenceC2F, ::AbstractBoundaryCondition) = 1 @@ -3448,7 +3292,7 @@ Base.@propagate_inbounds function stencil_left_boundary( getidx(space, bc.val, nothing, hidx), local_geometry, ) - (Ju³₊ ⊟ Ju³) ⊠ (2 * local_geometry.invJ) + (Ju³₊ - Ju³) * (2 * local_geometry.invJ) end Base.@propagate_inbounds function stencil_right_boundary( ::DivergenceC2F, @@ -3468,7 +3312,7 @@ Base.@propagate_inbounds function stencil_right_boundary( getidx(space, arg, idx - half, hidx), Geometry.LocalGeometry(space, idx - half, hidx), ) - (Ju³ ⊟ Ju³₋) ⊠ (2 * local_geometry.invJ) + (Ju³ - Ju³₋) * (2 * local_geometry.invJ) end # left / right SetDivergence boundary conditions @@ -3770,8 +3614,7 @@ end @inline function should_call_left_boundary(idx, space, op, args...) Topologies.isperiodic(space) && return false loc = left_boundary_window(space) - return Operators.has_boundary(op, loc) && - idx < Operators.left_interior_idx( + return idx < Operators.left_interior_idx( space, op, Operators.get_boundary(op, loc), @@ -3782,8 +3625,7 @@ end @inline function should_call_right_boundary(idx, space, op, args...) Topologies.isperiodic(space) && return false loc = right_boundary_window(space) - return Operators.has_boundary(op, loc) && - idx > Operators.right_interior_idx( + return idx > Operators.right_interior_idx( space, op, Operators.get_boundary(op, loc), @@ -3845,20 +3687,6 @@ Base.@propagate_inbounds function getidx( end end -@inline getidx_return_type(scalar::Tuple{<:Any}) = eltype(scalar) -@inline getidx_return_type(scalar::Ref) = eltype(scalar) -@inline getidx_return_type(x::T) where {T} = T -@inline getidx_return_type(f::Fields.Field) = eltype(f) - -@inline getidx_return_type(bc::Base.Broadcast.Broadcasted) = - Base.promote_op(bc.f, map(getidx_return_type, bc.args)...) - -@inline getidx_return_type(op::AbstractOperator, args...) = - stencil_return_type(bc.op, bc.args...) - -@inline getidx_return_type(bc::StencilBroadcasted) = - stencil_return_type(bc.op, bc.args...) - # broadcasting a ColumnStencilStyle gives the StencilBroadcasted's style Base.Broadcast.BroadcastStyle( ::Type{<:StencilBroadcasted{Style}}, @@ -3910,15 +3738,14 @@ Base.@propagate_inbounds function getidx( return @inbounds field_data[CartesianIndex(i, j, 1, v, h)] end - # unwap boxed scalars @inline getidx(parent_space, scalar::Tuple{T}, idx, hidx) where {T} = scalar[1] @inline getidx(parent_space, scalar::Ref, idx, hidx) = scalar[] @inline getidx(parent_space, field::Fields.PointField, idx, hidx) = field[] @inline getidx(parent_space, field::Fields.PointField, idx) = field[] -# recursive fallback for scalar, just return -@inline getidx(parent_space, scalar, idx, hidx) = scalar +# enable automatic nested broadcasting over single-valued boundary conditions +@inline getidx(parent_space, scalar, idx, hidx) = add_auto_broadcasters(scalar) # getidx error fallbacks @noinline inferred_getidx_error(idx_type::Type, space_type::Type) = @@ -3979,7 +3806,12 @@ function Base.Broadcast.broadcasted( # TODO: we should probably disallow this, as it # may help with latency. FT = Spaces.undertype(axes(StencilBroadcasted{Style}(op, args))) - StencilBroadcasted{Style}(promote_bcs(op, FT), args) + args′ = + unrolled_map(args) do arg + is_auto_broadcastable(eltype(arg)) ? + Base.Broadcast.broadcasted(add_auto_broadcasters, arg) : arg + end + return StencilBroadcasted{Style}(promote_bcs(op, FT), args′) end # check that inferred output field space is equal to dest field space @@ -4030,17 +3862,6 @@ if hasfield(Method, :recursion_relation) end end -function Base.similar( - bc::Base.Broadcast.Broadcasted{S}, - ::Type{Eltype}, -) where {Eltype, S <: AbstractStencilStyle} - sp = axes(bc) - return Field(Eltype, sp) -end - -Base.similar(bc::Base.Broadcast.Broadcasted{<:AbstractStencilStyle}) = - Base.similar(bc, eltype(bc)) - function _serial_copyto!(field_out::Field, bc, Ni::Int, Nj::Int, Nh::Int) space = axes(field_out) bounds = window_bounds(space, bc) @@ -4138,17 +3959,6 @@ Base.@propagate_inbounds function apply_stencil!( end return field_out end -# Compute slope ratio 𝜃 and limiter coefficient 𝜙 -#𝜃 = compute_slope_ratio(a⁻, a⁻⁻, a⁺, a⁺⁺, v) -#𝜙 = compute_limiter_coeff(𝜃, method) - - -#@assert 0 <= 𝜙 <= 2 -#if v >= 0 -# return v ⊠ (a⁻ ⊞ RecursiveApply.rdiv((a⁺ - a⁻) ⊠ 𝜙 ,2)) -#else -# return v ⊠ (a⁺ ⊟ RecursiveApply.rdiv((a⁺ - a⁻) ⊠ 𝜙 ,2)) # Current working solution -#end """ fd_shmem_is_supported(bc::Base.Broadcast.AbstractBroadcasted) diff --git a/src/Operators/integrals.jl b/src/Operators/integrals.jl index 13df0d18d6..20faaa8ed9 100644 --- a/src/Operators/integrals.jl +++ b/src/Operators/integrals.jl @@ -1,7 +1,8 @@ -import ..RecursiveApply: rzero, ⊠, ⊞ import RootSolvers import ClimaComms +broadcast_zero(field) = zero(eltype(Base.broadcastable(field))) + """ column_integral_definite!(ϕ_top, ᶜ∂ϕ∂z, [ϕ_bot]) @@ -12,14 +13,14 @@ area differential `J/Δz`, with `J` denoting the metric Jacobian. The input `ᶜ∂ϕ∂z` should be a cell-center `Field` or `AbstractBroadcasted`, and the output `ϕ_top` should be a horizontal `Field`. The default value of `ϕ_bot` is 0. """ -function column_integral_definite!(ϕ_top, ᶜ∂ϕ∂z, ϕ_bot = rzero(eltype(ϕ_top))) +function column_integral_definite!(ϕ_top, ᶜ∂ϕ∂z, ϕ_bot = broadcast_zero(ϕ_top)) ᶜJ = Fields.local_geometry_field(axes(ᶜ∂ϕ∂z)).J f_space = Spaces.face_space(axes(ᶜ∂ϕ∂z)) J_bot = Fields.level(Fields.local_geometry_field(f_space).J, half) Δz_bot = Fields.level(Fields.Δz_field(f_space), half) ΔA_bot = Base.broadcasted(/, J_bot, Δz_bot) - ᶜΔϕ = Base.broadcasted(⊠, ᶜ∂ϕ∂z, Base.broadcasted(/, ᶜJ, ΔA_bot)) - column_reduce!(⊞, ϕ_top, ᶜΔϕ; init = ϕ_bot) + ᶜΔϕ = Base.broadcasted(*, ᶜ∂ϕ∂z, Base.broadcasted(/, ᶜJ, ΔA_bot)) + column_reduce!(+, ϕ_top, ᶜΔϕ; init = ϕ_bot) end """ @@ -43,19 +44,19 @@ is used, `ΔA = ΔA_{bot}` at all values of `z`, and the output `ᶠϕ` satisfie is used, the vertical gradient is replaced with an area-weighted gradient. The default value of `ϕ_bot` is 0, and the default value of `rtol` is 0.001. """ -function column_integral_indefinite!(ᶠϕ, ᶜ∂ϕ∂z, ϕ_bot = rzero(eltype(ᶠϕ))) +function column_integral_indefinite!(ᶠϕ, ᶜ∂ϕ∂z, ϕ_bot = broadcast_zero(ᶠϕ)) ᶜJ = Fields.local_geometry_field(axes(ᶜ∂ϕ∂z)).J J_bot = Fields.level(Fields.local_geometry_field(ᶠϕ).J, half) Δz_bot = Fields.level(Fields.Δz_field(ᶠϕ), half) ΔA_bot = Base.broadcasted(/, J_bot, Δz_bot) - ᶜΔϕ = Base.broadcasted(⊠, ᶜ∂ϕ∂z, Base.broadcasted(/, ᶜJ, ΔA_bot)) - column_accumulate!(⊞, ᶠϕ, ᶜΔϕ; init = ϕ_bot) + ᶜΔϕ = Base.broadcasted(*, ᶜ∂ϕ∂z, Base.broadcasted(/, ᶜJ, ΔA_bot)) + column_accumulate!(+, ᶠϕ, ᶜΔϕ; init = ϕ_bot) end function column_integral_indefinite!( ∂ϕ∂z::F, ᶠϕ, - ϕ_bot = eltype(ᶠϕ)(0), - rtol = eltype(ᶠϕ)(0.001), + ϕ_bot = broadcast_zero(ᶠϕ), + rtol = eltype(parent(ᶠϕ))(0.001), ) where {F <: Function} device = ClimaComms.device(ᶠϕ) c_space = Spaces.center_space(axes(ᶠϕ)) diff --git a/src/Operators/numericalflux.jl b/src/Operators/numericalflux.jl index 40fdeb2bd4..f0ae1ba03c 100644 --- a/src/Operators/numericalflux.jl +++ b/src/Operators/numericalflux.jl @@ -28,17 +28,20 @@ function add_numerical_flux_internal!(fn, dydt, args...) Nq = Quadratures.degrees_of_freedom(Spaces.quadrature_style(space)) topology = Spaces.topology(space) internal_surface_geometry = Spaces.grid(space).internal_surface_geometry + dydt_bc = Base.broadcastable(dydt) + args_bc = + map(arg -> arg isa Fields.Field ? Base.broadcastable(arg) : arg, args) for (iface, (elem⁻, face⁻, elem⁺, face⁺, reversed)) in enumerate(Topologies.interior_faces(topology)) internal_surface_geometry_slab = slab(internal_surface_geometry, iface) - arg_slabs⁻ = map(arg -> slab(Fields.todata(arg), elem⁻), args) - arg_slabs⁺ = map(arg -> slab(Fields.todata(arg), elem⁺), args) + arg_slabs⁻ = map(arg -> slab(Fields.todata(arg), elem⁻), args_bc) + arg_slabs⁺ = map(arg -> slab(Fields.todata(arg), elem⁺), args_bc) - dydt_slab⁻ = slab(Fields.field_values(dydt), elem⁻) - dydt_slab⁺ = slab(Fields.field_values(dydt), elem⁺) + dydt_slab⁻ = slab(Fields.field_values(dydt_bc), elem⁻) + dydt_slab⁺ = slab(Fields.field_values(dydt_bc), elem⁺) for q in 1:Nq sgeom⁻ = internal_surface_geometry_slab[slab_index(q)] @@ -46,24 +49,21 @@ function add_numerical_flux_internal!(fn, dydt, args...) i⁻, j⁻ = Topologies.face_node_index(face⁻, Nq, q, false) i⁺, j⁺ = Topologies.face_node_index(face⁺, Nq, q, reversed) - numflux⁻ = fn( - sgeom⁻.normal, - map( - slab -> - slab isa DataSlab2D ? slab[slab_index(i⁻, j⁻)] : slab, - arg_slabs⁻, - ), - map( - slab -> - slab isa DataSlab2D ? slab[slab_index(i⁺, j⁺)] : slab, - arg_slabs⁺, - ), + argvals⁻ = map( + slab -> slab isa DataSlab2D ? slab[slab_index(i⁻, j⁻)] : slab, + arg_slabs⁻, ) + argvals⁺ = map( + slab -> slab isa DataSlab2D ? slab[slab_index(i⁺, j⁺)] : slab, + arg_slabs⁺, + ) + numflux⁻ = + add_auto_broadcasters(fn(sgeom⁻.normal, argvals⁻, argvals⁺)) dydt_slab⁻[slab_index(i⁻, j⁻)] = - dydt_slab⁻[slab_index(i⁻, j⁻)] ⊟ (sgeom⁻.sWJ ⊠ numflux⁻) + dydt_slab⁻[slab_index(i⁻, j⁻)] - (sgeom⁻.sWJ * numflux⁻) dydt_slab⁺[slab_index(i⁺, j⁺)] = - dydt_slab⁺[slab_index(i⁺, j⁺)] ⊞ (sgeom⁻.sWJ ⊠ numflux⁻) + dydt_slab⁺[slab_index(i⁺, j⁺)] + (sgeom⁻.sWJ * numflux⁻) end end end @@ -78,9 +78,9 @@ struct CentralNumericalFlux{F} end function (fn::CentralNumericalFlux)(normal, argvals⁻, argvals⁺) - Favg = - RecursiveApply.rdiv(fn.fluxfn(argvals⁻...) ⊞ fn.fluxfn(argvals⁺...), 2) - return RecursiveApply.rmap(f -> f' * normal, Favg) + F⁻ = add_auto_broadcasters(fn.fluxfn(argvals⁻...)) + F⁺ = add_auto_broadcasters(fn.fluxfn(argvals⁺...)) + return ((F⁻ + F⁺) / 2)' * normal end """ @@ -96,10 +96,10 @@ end function (fn::RusanovNumericalFlux)(normal, argvals⁻, argvals⁺) y⁻ = argvals⁻[1] y⁺ = argvals⁺[1] - Favg = - RecursiveApply.rdiv(fn.fluxfn(argvals⁻...) ⊞ fn.fluxfn(argvals⁺...), 2) + F⁻ = add_auto_broadcasters(fn.fluxfn(argvals⁻...)) + F⁺ = add_auto_broadcasters(fn.fluxfn(argvals⁺...)) λ = max(fn.wavespeedfn(argvals⁻...), fn.wavespeedfn(argvals⁺...)) - return RecursiveApply.rmap(f -> f' * normal, Favg) ⊞ (λ / 2) ⊠ (y⁻ ⊟ y⁺) + return ((F⁻ + F⁺) / 2)' * normal + (λ / 2) * (y⁻ - y⁺) end @@ -108,6 +108,9 @@ function add_numerical_flux_boundary!(fn, dydt, args...) Nq = Quadratures.degrees_of_freedom(Spaces.quadrature_style(space)) topology = Spaces.topology(space) boundary_surface_geometries = Spaces.grid(space).boundary_surface_geometries + dydt_bc = Base.broadcastable(dydt) + args_bc = + map(arg -> arg isa Fields.Field ? Base.broadcastable(arg) : arg, args) for (iboundary, boundarytag) in enumerate(Topologies.boundary_tags(topology)) @@ -117,22 +120,19 @@ function add_numerical_flux_boundary!(fn, dydt, args...) surface_geometry_slab = slab(boundary_surface_geometries[iboundary], iface) - arg_slabs⁻ = map(arg -> slab(Fields.todata(arg), elem⁻), args) - dydt_slab⁻ = slab(Fields.field_values(dydt), elem⁻) + arg_slabs⁻ = map(arg -> slab(Fields.todata(arg), elem⁻), args_bc) + dydt_slab⁻ = slab(Fields.field_values(dydt_bc), elem⁻) for q in 1:Nq sgeom⁻ = boundary_surface_geometry_slab[slab_index(q)] i⁻, j⁻ = Topologies.face_node_index(face⁻, Nq, q, false) - numflux⁻ = fn( - sgeom⁻.normal, - map( - slab -> - slab isa DataSlab2D ? slab[slab_index(i⁻, j⁻)] : - slab, - arg_slabs⁻, - ), + argvals⁻ = map( + slab -> + slab isa DataSlab2D ? slab[slab_index(i⁻, j⁻)] : slab, + arg_slabs⁻, ) + numflux⁻ = add_auto_broadcasters(fn(sgeom⁻.normal, argvals⁻)) dydt_slab⁻[slab_index(i⁻, j⁻)] = - dydt_slab⁻[slab_index(i⁻, j⁻)] ⊟ (sgeom⁻.sWJ ⊠ numflux⁻) + dydt_slab⁻[slab_index(i⁻, j⁻)] - (sgeom⁻.sWJ * numflux⁻) end end end diff --git a/src/Operators/spectralelement.jl b/src/Operators/spectralelement.jl index da4d25b38f..93090b2010 100644 --- a/src/Operators/spectralelement.jl +++ b/src/Operators/spectralelement.jl @@ -111,11 +111,18 @@ function Base.Broadcast.broadcasted(op::SpectralElementOperator, args...) Base.Broadcast.broadcasted(style, op, args′...) end -Base.Broadcast.broadcasted( +function Base.Broadcast.broadcasted( ::SpectralStyle, op::SpectralElementOperator, args..., -) = SpectralBroadcasted{SpectralStyle}(op, args) +) + args′ = + unrolled_map(args) do arg + is_auto_broadcastable(eltype(arg)) ? + Base.Broadcast.broadcasted(add_auto_broadcasters, arg) : arg + end + return SpectralBroadcasted{SpectralStyle}(op, args′) +end Base.eltype(sbc::SpectralBroadcasted) = operator_return_eltype(sbc.op, map(eltype, sbc.args)...) @@ -175,16 +182,6 @@ function Base.Broadcast.instantiate( end end -function Base.similar( - bc::Base.Broadcast.Broadcasted{<:AbstractSpectralStyle}, - ::Type{Eltype}, -) where {Eltype} - space = axes(bc) - return Field(Eltype, space) -end - - - # Functions for SlabBlockSpectralStyle function Base.copyto!( out::Field, @@ -548,7 +545,7 @@ Divergence() = Divergence{()}() Divergence{()}(space) = Divergence{operator_axes(space)}() operator_return_eltype(op::Divergence{I}, ::Type{S}) where {I, S} = - RecursiveApply.rmaptype(Geometry.divergence_result_type, S) + Geometry.divergence_result_type(S) function apply_operator(op::Divergence{(1,)}, space, slabidx, arg) FT = Spaces.undertype(space) @@ -563,20 +560,15 @@ function apply_operator(op::Divergence{(1,)}, space, slabidx, arg) ij = CartesianIndex((i,)) local_geometry = get_local_geometry(space, ij, slabidx) v = get_node(space, arg, ij, slabidx) - Jv¹ = - local_geometry.J ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant1(v, local_geometry), - v, - ) + Jv¹ = local_geometry.J * Geometry.contravariant1(v, local_geometry) for ii in 1:Nq - out[slab_index(ii)] = out[slab_index(ii)] ⊞ (D[ii, i] ⊠ Jv¹) + out[slab_index(ii)] += D[ii, i] * Jv¹ end end @inbounds for i in 1:Nq ij = CartesianIndex((i,)) local_geometry = get_local_geometry(space, ij, slabidx) - out[slab_index(i)] = - RecursiveApply.rmul(out[slab_index(i)], local_geometry.invJ) + out[slab_index(i)] *= local_geometry.invJ end return Field(SArray(out), space) end @@ -599,28 +591,19 @@ Base.@propagate_inbounds function apply_operator( ij = CartesianIndex((i, j)) local_geometry = get_local_geometry(space, ij, slabidx) v = get_node(space, arg, ij, slabidx) - Jv¹ = - local_geometry.J ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant1(v, local_geometry), - v, - ) + Jv¹ = local_geometry.J * Geometry.contravariant1(v, local_geometry) for ii in 1:Nq - out[slab_index(ii, j)] = out[slab_index(ii, j)] ⊞ (D[ii, i] ⊠ Jv¹) + out[slab_index(ii, j)] += D[ii, i] * Jv¹ end - Jv² = - local_geometry.J ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant2(v, local_geometry), - v, - ) + Jv² = local_geometry.J * Geometry.contravariant2(v, local_geometry) for jj in 1:Nq - out[slab_index(i, jj)] = out[slab_index(i, jj)] ⊞ (D[jj, j] ⊠ Jv²) + out[slab_index(i, jj)] += D[jj, j] * Jv² end end @inbounds for j in 1:Nq, i in 1:Nq ij = CartesianIndex((i, j)) local_geometry = get_local_geometry(space, ij, slabidx) - out[slab_index(i, j)] = - RecursiveApply.rmul(out[slab_index(i, j)], local_geometry.invJ) + out[slab_index(i, j)] *= local_geometry.invJ end return Field(SArray(out), space) end @@ -713,11 +696,12 @@ struct SplitDivergence{I} <: SpectralElementOperator{I} end SplitDivergence() = SplitDivergence{()}() SplitDivergence{()}(space) = SplitDivergence{operator_axes(space)}() -operator_return_eltype(::SplitDivergence{I}, ::Type{S1}, ::Type{S2}) where {I, S1, S2} = - Geometry.rmul_return_type( - RecursiveApply.rmaptype(Geometry.divergence_result_type, S1), - S2, - ) +operator_return_eltype( + ::SplitDivergence{I}, + ::Type{S1}, + ::Type{S2}, +) where {I, S1, S2} = + Geometry.mul_return_type(Geometry.divergence_result_type(S1), S2) function apply_operator(op::SplitDivergence{(1,)}, space, slabidx, arg1, arg2) FT = Spaces.undertype(space) @@ -732,11 +716,9 @@ function apply_operator(op::SplitDivergence{(1,)}, space, slabidx, arg1, arg2) @inbounds for i in 1:Nq ij = CartesianIndex((i,)) local_geometry = get_local_geometry(space, ij, slabidx) + u = get_node(space, arg1, ij, slabidx) Ju1[slab_index(i)] = - local_geometry.J ⊠ RecursiveApply.rmap( - u -> Geometry.contravariant1(u, local_geometry), - get_node(space, arg1, ij, slabidx), - ) + local_geometry.J * Geometry.contravariant1(u, local_geometry) psi[slab_index(i)] = get_node(space, arg2, ij, slabidx) end @@ -744,19 +726,19 @@ function apply_operator(op::SplitDivergence{(1,)}, space, slabidx, arg1, arg2) fill!(parent(out), zero(FT)) @inbounds for i in 1:Nq for j in 1:(i - 1) # loop over half the indices, since F1[i,j] = F1[j,i] - F1 = RecursiveApply.rdiv( - (Ju1[slab_index(i)] ⊞ Ju1[slab_index(j)]) ⊠ - (psi[slab_index(i)] ⊞ psi[slab_index(j)]), - 2, - ) - out[slab_index(i)] = out[slab_index(i)] ⊞ D[i, j] ⊠ F1 - out[slab_index(j)] = out[slab_index(j)] ⊞ D[j, i] ⊠ F1 + F1 = + ( + (Ju1[slab_index(i)] + Ju1[slab_index(j)]) * + (psi[slab_index(i)] + psi[slab_index(j)]) + ) / 2 + out[slab_index(i)] += D[i, j] * F1 + out[slab_index(j)] += D[j, i] * F1 end end @inbounds for i in 1:Nq ij = CartesianIndex((i,)) local_geometry = get_local_geometry(space, ij, slabidx) - out[slab_index(i)] = out[slab_index(i)] ⊠ local_geometry.invJ + out[slab_index(i)] *= local_geometry.invJ end return Field(SArray(out), space) @@ -778,15 +760,9 @@ function apply_operator(op::SplitDivergence{(1, 2)}, space, slabidx, arg1, arg2) local_geometry = get_local_geometry(space, ij, slabidx) u = get_node(space, arg1, ij, slabidx) Ju1[slab_index(i, j)] = - local_geometry.J ⊠ RecursiveApply.rmap( - u -> Geometry.contravariant1(u, local_geometry), - u, - ) + local_geometry.J * Geometry.contravariant1(u, local_geometry) Ju2[slab_index(i, j)] = - local_geometry.J ⊠ RecursiveApply.rmap( - u -> Geometry.contravariant2(u, local_geometry), - u, - ) + local_geometry.J * Geometry.contravariant2(u, local_geometry) psi[slab_index(i, j)] = get_node(space, arg2, ij, slabidx) end @@ -794,28 +770,28 @@ function apply_operator(op::SplitDivergence{(1, 2)}, space, slabidx, arg1, arg2) fill!(parent(out), zero(FT)) @inbounds for j in 1:Nq, i in 1:Nq for k in 1:(i - 1) # loop over half the indices, since F1[i,k] = F1[k,i] - F1 = RecursiveApply.rdiv( - (Ju1[slab_index(i, j)] ⊞ Ju1[slab_index(k, j)]) ⊠ - (psi[slab_index(i, j)] ⊞ psi[slab_index(k, j)]), - 2, - ) - out[slab_index(i, j)] = out[slab_index(i, j)] ⊞ D[i, k] ⊠ F1 - out[slab_index(k, j)] = out[slab_index(k, j)] ⊞ D[k, i] ⊠ F1 + F1 = + ( + (Ju1[slab_index(i, j)] + Ju1[slab_index(k, j)]) * + (psi[slab_index(i, j)] + psi[slab_index(k, j)]) + ) / 2 + out[slab_index(i, j)] += D[i, k] * F1 + out[slab_index(k, j)] += D[k, i] * F1 end for k in 1:(j - 1) # loop over half the indices, since F2[j,k] = F2[k,j] - F2 = RecursiveApply.rdiv( - (Ju2[slab_index(i, j)] ⊞ Ju2[slab_index(i, k)]) ⊠ - (psi[slab_index(i, j)] ⊞ psi[slab_index(i, k)]), - 2, - ) - out[slab_index(i, j)] = out[slab_index(i, j)] ⊞ D[j, k] ⊠ F2 - out[slab_index(i, k)] = out[slab_index(i, k)] ⊞ D[k, j] ⊠ F2 + F2 = + ( + (Ju2[slab_index(i, j)] + Ju2[slab_index(i, k)]) * + (psi[slab_index(i, j)] + psi[slab_index(i, k)]) + ) / 2 + out[slab_index(i, j)] += D[j, k] * F2 + out[slab_index(i, k)] += D[k, j] * F2 end end @inbounds for j in 1:Nq, i in 1:Nq ij = CartesianIndex((i, j)) local_geometry = get_local_geometry(space, ij, slabidx) - out[slab_index(i, j)] = out[slab_index(i, j)] ⊠ local_geometry.invJ + out[slab_index(i, j)] *= local_geometry.invJ end return Field(SArray(out), space) @@ -863,7 +839,7 @@ WeakDivergence() = WeakDivergence{()}() WeakDivergence{()}(space) = WeakDivergence{operator_axes(space)}() operator_return_eltype(::WeakDivergence{I}, ::Type{S}) where {I, S} = - RecursiveApply.rmaptype(Geometry.divergence_result_type, S) + Geometry.divergence_result_type(S) function apply_operator(op::WeakDivergence{(1,)}, space, slabidx, arg) FT = Spaces.undertype(space) @@ -878,20 +854,15 @@ function apply_operator(op::WeakDivergence{(1,)}, space, slabidx, arg) ij = CartesianIndex((i,)) local_geometry = get_local_geometry(space, ij, slabidx) v = get_node(space, arg, ij, slabidx) - WJv¹ = - local_geometry.WJ ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant1(v, local_geometry), - v, - ) + WJv¹ = local_geometry.WJ * Geometry.contravariant1(v, local_geometry) for ii in 1:Nq - out[slab_index(ii)] = out[slab_index(ii)] ⊞ (D[i, ii] ⊠ WJv¹) + out[slab_index(ii)] += D[i, ii] * WJv¹ end end @inbounds for i in 1:Nq ij = CartesianIndex((i,)) local_geometry = get_local_geometry(space, ij, slabidx) - out[slab_index(i)] = - RecursiveApply.rdiv(out[slab_index(i)], ⊟(local_geometry.WJ)) + out[slab_index(i)] /= -local_geometry.WJ end return Field(SArray(out), space) end @@ -909,28 +880,19 @@ function apply_operator(op::WeakDivergence{(1, 2)}, space, slabidx, arg) ij = CartesianIndex((i, j)) local_geometry = get_local_geometry(space, ij, slabidx) v = get_node(space, arg, ij, slabidx) - WJv¹ = - local_geometry.WJ ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant1(v, local_geometry), - v, - ) + WJv¹ = local_geometry.WJ * Geometry.contravariant1(v, local_geometry) for ii in 1:Nq - out[slab_index(ii, j)] = out[slab_index(ii, j)] ⊞ (D[i, ii] ⊠ WJv¹) + out[slab_index(ii, j)] += D[i, ii] * WJv¹ end - WJv² = - local_geometry.WJ ⊠ RecursiveApply.rmap( - v -> Geometry.contravariant2(v, local_geometry), - v, - ) + WJv² = local_geometry.WJ * Geometry.contravariant2(v, local_geometry) for jj in 1:Nq - out[slab_index(i, jj)] = out[slab_index(i, jj)] ⊞ (D[j, jj] ⊠ WJv²) + out[slab_index(i, jj)] += D[j, jj] * WJv² end end @inbounds for j in 1:Nq, i in 1:Nq ij = CartesianIndex((i, j)) local_geometry = get_local_geometry(space, ij, slabidx) - out[slab_index(i, j)] = - RecursiveApply.rdiv(out[slab_index(i, j)], ⊟(local_geometry.WJ)) + out[slab_index(i, j)] /= -local_geometry.WJ end return Field(SArray(out), space) end @@ -962,7 +924,7 @@ Gradient() = Gradient{()}() Gradient{()}(space) = Gradient{operator_axes(space)}() operator_return_eltype(::Gradient{I}, ::Type{S}) where {I, S} = - RecursiveApply.rmaptype(T -> Geometry.gradient_result_type(Val(I), T), S) + Geometry.gradient_result_type(Val(I), S) function apply_operator(op::Gradient{(1,)}, space, slabidx, arg) FT = Spaces.undertype(space) @@ -1004,11 +966,11 @@ Base.@propagate_inbounds function apply_operator( x = get_node(space, arg, ij, slabidx) for ii in 1:Nq ∂f∂ξ₁ = Geometry.Covariant12Vector(D[ii, i], zero(eltype(D))) ⊗ x - out[slab_index(ii, j)] = out[slab_index(ii, j)] ⊞ ∂f∂ξ₁ + out[slab_index(ii, j)] += ∂f∂ξ₁ end for jj in 1:Nq ∂f∂ξ₂ = Geometry.Covariant12Vector(zero(eltype(D)), D[jj, j]) ⊗ x - out[slab_index(i, jj)] = out[slab_index(i, jj)] ⊞ ∂f∂ξ₂ + out[slab_index(i, jj)] += ∂f∂ξ₂ end end return Field(SArray(out), space) @@ -1053,7 +1015,7 @@ WeakGradient() = WeakGradient{()}() WeakGradient{()}(space) = WeakGradient{operator_axes(space)}() operator_return_eltype(::WeakGradient{I}, ::Type{S}) where {I, S} = - RecursiveApply.rmaptype(T -> Geometry.gradient_result_type(Val(I), T), S) + Geometry.gradient_result_type(Val(I), S) function apply_operator(op::WeakGradient{(1,)}, space, slabidx, arg) FT = Spaces.undertype(space) @@ -1068,17 +1030,17 @@ function apply_operator(op::WeakGradient{(1,)}, space, slabidx, arg) ij = CartesianIndex((i,)) local_geometry = get_local_geometry(space, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ - Wx = W ⊠ get_node(space, arg, ij, slabidx) + Wx = W * get_node(space, arg, ij, slabidx) for ii in 1:Nq Dᵀ₁Wf = Geometry.Covariant1Vector(D[i, ii]) ⊗ Wx - out[slab_index(ii)] = out[slab_index(ii)] ⊟ Dᵀ₁Wf + out[slab_index(ii)] -= Dᵀ₁Wf end end @inbounds for i in 1:Nq ij = CartesianIndex((i,)) local_geometry = get_local_geometry(space, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ - out[slab_index(i)] = RecursiveApply.rdiv(out[slab_index(i)], W) + out[slab_index(i)] /= W end return Field(SArray(out), space) end @@ -1097,21 +1059,21 @@ function apply_operator(op::WeakGradient{(1, 2)}, space, slabidx, arg) ij = CartesianIndex((i, j)) local_geometry = get_local_geometry(space, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ - Wx = W ⊠ get_node(space, arg, ij, slabidx) + Wx = W * get_node(space, arg, ij, slabidx) for ii in 1:Nq Dᵀ₁Wf = Geometry.Covariant12Vector(D[i, ii], zero(eltype(D))) ⊗ Wx - out[slab_index(ii, j)] = out[slab_index(ii, j)] ⊟ Dᵀ₁Wf + out[slab_index(ii, j)] -= Dᵀ₁Wf end for jj in 1:Nq Dᵀ₂Wf = Geometry.Covariant12Vector(zero(eltype(D)), D[j, jj]) ⊗ Wx - out[slab_index(i, jj)] = out[slab_index(i, jj)] ⊟ Dᵀ₂Wf + out[slab_index(i, jj)] -= Dᵀ₂Wf end end @inbounds for j in 1:Nq, i in 1:Nq ij = CartesianIndex((i, j)) local_geometry = get_local_geometry(space, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ - out[slab_index(i, j)] = RecursiveApply.rdiv(out[slab_index(i, j)], W) + out[slab_index(i, j)] /= W end return Field(SArray(out), space) end @@ -1164,7 +1126,7 @@ Curl() = Curl{()}() Curl{()}(space) = Curl{operator_axes(space)}() operator_return_eltype(::Curl{I}, ::Type{S}) where {I, S} = - RecursiveApply.rmaptype(T -> Geometry.curl_result_type(Val(I), T), S) + Geometry.curl_result_type(Val(I), S) function apply_operator(op::Curl{(1,)}, space, slabidx, arg) FT = Spaces.undertype(space) @@ -1182,9 +1144,8 @@ function apply_operator(op::Curl{(1,)}, space, slabidx, arg) v = get_node(space, arg, ij, slabidx) v₃ = Geometry.covariant3(v, local_geometry) for ii in 1:Nq - D₁v₃ = D[ii, i] ⊠ v₃ - out[slab_index(ii)] = - out[slab_index(ii)] ⊞ Geometry.Contravariant2Vector(⊟(D₁v₃)) + D₁v₃ = D[ii, i] * v₃ + out[slab_index(ii)] += Geometry.Contravariant2Vector(-D₁v₃) end end elseif RT <: Geometry.Contravariant3Vector @@ -1194,9 +1155,8 @@ function apply_operator(op::Curl{(1,)}, space, slabidx, arg) v = get_node(space, arg, ij, slabidx) v₂ = Geometry.covariant2(v, local_geometry) for ii in 1:Nq - D₁v₂ = D[ii, i] ⊠ v₂ - out[slab_index(ii)] = - out[slab_index(ii)] ⊞ Geometry.Contravariant3Vector(D₁v₂) + D₁v₂ = D[ii, i] * v₂ + out[slab_index(ii)] += Geometry.Contravariant3Vector(D₁v₂) end end elseif RT <: Geometry.Contravariant23Vector @@ -1207,11 +1167,10 @@ function apply_operator(op::Curl{(1,)}, space, slabidx, arg) v₂ = Geometry.covariant2(v, local_geometry) v₃ = Geometry.covariant3(v, local_geometry) for ii in 1:Nq - D₁v₃ = D[ii, i] ⊠ v₃ - D₁v₂ = D[ii, i] ⊠ v₂ - out[slab_index(ii)] = - out[slab_index(ii)] ⊞ - Geometry.Contravariant23Vector(⊟(D₁v₃), D₁v₂) + D₁v₃ = D[ii, i] * v₃ + D₁v₂ = D[ii, i] * v₂ + out[slab_index(ii)] += + Geometry.Contravariant23Vector(-D₁v₃, D₁v₂) end end else @@ -1220,8 +1179,7 @@ function apply_operator(op::Curl{(1,)}, space, slabidx, arg) @inbounds for i in 1:Nq ij = CartesianIndex((i,)) local_geometry = get_local_geometry(space, ij, slabidx) - out[slab_index(i)] = - RecursiveApply.rmul(out[slab_index(i)], local_geometry.invJ) + out[slab_index(i)] *= local_geometry.invJ end return Field(SArray(out), space) end @@ -1244,16 +1202,13 @@ function apply_operator(op::Curl{(1, 2)}, space, slabidx, arg) v = get_node(space, arg, ij, slabidx) v₁ = Geometry.covariant1(v, local_geometry) for jj in 1:Nq - D₂v₁ = D[jj, j] ⊠ v₁ - out[slab_index(i, jj)] = - out[slab_index(i, jj)] ⊞ - Geometry.Contravariant3Vector(⊟(D₂v₁)) + D₂v₁ = D[jj, j] * v₁ + out[slab_index(i, jj)] += Geometry.Contravariant3Vector(-D₂v₁) end v₂ = Geometry.covariant2(v, local_geometry) for ii in 1:Nq - D₁v₂ = D[ii, i] ⊠ v₂ - out[slab_index(ii, j)] = - out[slab_index(ii, j)] ⊞ Geometry.Contravariant3Vector(D₁v₂) + D₁v₂ = D[ii, i] * v₂ + out[slab_index(ii, j)] += Geometry.Contravariant3Vector(D₁v₂) end end # input data is a Covariant3Vector field @@ -1264,15 +1219,13 @@ function apply_operator(op::Curl{(1, 2)}, space, slabidx, arg) v = get_node(space, arg, ij, slabidx) v₃ = Geometry.covariant3(v, local_geometry) for ii in 1:Nq - D₁v₃ = D[ii, i] ⊠ v₃ - out[slab_index(ii, j)] = - out[slab_index(ii, j)] ⊞ - Geometry.Contravariant12Vector(zero(D₁v₃), ⊟(D₁v₃)) + D₁v₃ = D[ii, i] * v₃ + out[slab_index(ii, j)] += + Geometry.Contravariant12Vector(zero(D₁v₃), -D₁v₃) end for jj in 1:Nq - D₂v₃ = D[jj, j] ⊠ v₃ - out[slab_index(i, jj)] = - out[slab_index(i, jj)] ⊞ + D₂v₃ = D[jj, j] * v₃ + out[slab_index(i, jj)] += Geometry.Contravariant12Vector(D₂v₃, zero(D₂v₃)) end end @@ -1285,18 +1238,16 @@ function apply_operator(op::Curl{(1, 2)}, space, slabidx, arg) v₂ = Geometry.covariant2(v, local_geometry) v₃ = Geometry.covariant3(v, local_geometry) for ii in 1:Nq - D₁v₃ = D[ii, i] ⊠ v₃ - D₁v₂ = D[ii, i] ⊠ v₂ - out[slab_index(ii, j)] = - out[slab_index(ii, j)] ⊞ - Geometry.Contravariant123Vector(zero(D₁v₃), ⊟(D₁v₃), D₁v₂) + D₁v₃ = D[ii, i] * v₃ + D₁v₂ = D[ii, i] * v₂ + out[slab_index(ii, j)] += + Geometry.Contravariant123Vector(zero(D₁v₃), -D₁v₃, D₁v₂) end for jj in 1:Nq - D₂v₃ = D[jj, j] ⊠ v₃ - D₂v₁ = D[jj, j] ⊠ v₁ - out[slab_index(i, jj)] = - out[slab_index(i, jj)] ⊞ - Geometry.Contravariant123Vector(D₂v₃, zero(D₂v₃), ⊟(D₂v₁)) + D₂v₃ = D[jj, j] * v₃ + D₂v₁ = D[jj, j] * v₁ + out[slab_index(i, jj)] += + Geometry.Contravariant123Vector(D₂v₃, zero(D₂v₃), -D₂v₁) end end else @@ -1305,8 +1256,7 @@ function apply_operator(op::Curl{(1, 2)}, space, slabidx, arg) @inbounds for j in 1:Nq, i in 1:Nq ij = CartesianIndex((i, j)) local_geometry = get_local_geometry(space, ij, slabidx) - out[slab_index(i, j)] = - RecursiveApply.rmul(out[slab_index(i, j)], local_geometry.invJ) + out[slab_index(i, j)] *= local_geometry.invJ end return Field(SArray(out), space) end @@ -1352,7 +1302,7 @@ WeakCurl() = WeakCurl{()}() WeakCurl{()}(space) = WeakCurl{operator_axes(space)}() operator_return_eltype(::WeakCurl{I}, ::Type{S}) where {I, S} = - RecursiveApply.rmaptype(T -> Geometry.curl_result_type(Val(I), T), S) + Geometry.curl_result_type(Val(I), S) function apply_operator(op::WeakCurl{(1,)}, space, slabidx, arg) FT = Spaces.undertype(space) @@ -1370,11 +1320,10 @@ function apply_operator(op::WeakCurl{(1,)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) v = get_node(space, arg, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ - Wv₃ = W ⊠ Geometry.covariant3(v, local_geometry) + Wv₃ = W * Geometry.covariant3(v, local_geometry) for ii in 1:Nq - Dᵀ₁Wv₃ = D[i, ii] ⊠ Wv₃ - out[slab_index(ii)] = - out[slab_index(ii)] ⊞ Geometry.Contravariant2Vector(Dᵀ₁Wv₃) + Dᵀ₁Wv₃ = D[i, ii] * Wv₃ + out[slab_index(ii)] += Geometry.Contravariant2Vector(Dᵀ₁Wv₃) end end elseif RT <: Geometry.Contravariant3Vector @@ -1383,12 +1332,10 @@ function apply_operator(op::WeakCurl{(1,)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) v = get_node(space, arg, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ - Wv₂ = W ⊠ Geometry.covariant2(v, local_geometry) + Wv₂ = W * Geometry.covariant2(v, local_geometry) for ii in 1:Nq - Dᵀ₁Wv₂ = D[i, ii] ⊠ Wv₂ - out[slab_index(ii)] = - out[slab_index(ii)] ⊞ - Geometry.Contravariant3Vector(⊟(Dᵀ₁Wv₂)) + Dᵀ₁Wv₂ = D[i, ii] * Wv₂ + out[slab_index(ii)] += Geometry.Contravariant3Vector(-Dᵀ₁Wv₂) end end elseif RT <: Geometry.Contravariant23Vector @@ -1397,14 +1344,13 @@ function apply_operator(op::WeakCurl{(1,)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) v = get_node(space, arg, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ - Wv₃ = W ⊠ Geometry.covariant3(v, local_geometry) - Wv₂ = W ⊠ Geometry.covariant2(v, local_geometry) + Wv₃ = W * Geometry.covariant3(v, local_geometry) + Wv₂ = W * Geometry.covariant2(v, local_geometry) for ii in 1:Nq - Dᵀ₁Wv₃ = D[i, ii] ⊠ Wv₃ - Dᵀ₁Wv₂ = D[i, ii] ⊠ Wv₂ - out[slab_index(ii)] = - out[slab_index(ii)] ⊞ - Geometry.Contravariant23Vector(Dᵀ₁Wv₃, ⊟(Dᵀ₁Wv₂)) + Dᵀ₁Wv₃ = D[i, ii] * Wv₃ + Dᵀ₁Wv₂ = D[i, ii] * Wv₂ + out[slab_index(ii)] += + Geometry.Contravariant23Vector(Dᵀ₁Wv₃, -Dᵀ₁Wv₂) end end else @@ -1413,8 +1359,7 @@ function apply_operator(op::WeakCurl{(1,)}, space, slabidx, arg) @inbounds for i in 1:Nq ij = CartesianIndex((i,)) local_geometry = get_local_geometry(space, ij, slabidx) - out[slab_index(i)] = - RecursiveApply.rdiv(out[slab_index(i)], local_geometry.WJ) + out[slab_index(i)] /= local_geometry.WJ end return Field(SArray(out), space) end @@ -1436,19 +1381,17 @@ function apply_operator(op::WeakCurl{(1, 2)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) v = get_node(space, arg, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ - Wv₁ = W ⊠ Geometry.covariant1(v, local_geometry) + Wv₁ = W * Geometry.covariant1(v, local_geometry) for jj in 1:Nq - Dᵀ₂Wv₁ = D[j, jj] ⊠ Wv₁ - out[slab_index(i, jj)] = - out[slab_index(i, jj)] ⊞ + Dᵀ₂Wv₁ = D[j, jj] * Wv₁ + out[slab_index(i, jj)] += Geometry.Contravariant3Vector(Dᵀ₂Wv₁) end - Wv₂ = W ⊠ Geometry.covariant2(v, local_geometry) + Wv₂ = W * Geometry.covariant2(v, local_geometry) for ii in 1:Nq - Dᵀ₁Wv₂ = D[i, ii] ⊠ Wv₂ - out[slab_index(ii, j)] = - out[slab_index(ii, j)] ⊞ - Geometry.Contravariant3Vector(⊟(Dᵀ₁Wv₂)) + Dᵀ₁Wv₂ = D[i, ii] * Wv₂ + out[slab_index(ii, j)] += + Geometry.Contravariant3Vector(-Dᵀ₁Wv₂) end end # input data is a Covariant3Vector field @@ -1458,18 +1401,16 @@ function apply_operator(op::WeakCurl{(1, 2)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) v = get_node(space, arg, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ - Wv₃ = W ⊠ Geometry.covariant3(v, local_geometry) + Wv₃ = W * Geometry.covariant3(v, local_geometry) for ii in 1:Nq - Dᵀ₁Wv₃ = D[i, ii] ⊠ Wv₃ - out[slab_index(ii, j)] = - out[slab_index(ii, j)] ⊞ + Dᵀ₁Wv₃ = D[i, ii] * Wv₃ + out[slab_index(ii, j)] += Geometry.Contravariant12Vector(zero(Dᵀ₁Wv₃), Dᵀ₁Wv₃) end for jj in 1:Nq - Dᵀ₂Wv₃ = D[j, jj] ⊠ Wv₃ - out[slab_index(i, jj)] = - out[slab_index(i, jj)] ⊞ - Geometry.Contravariant12Vector(⊟(Dᵀ₂Wv₃), zero(Dᵀ₂Wv₃)) + Dᵀ₂Wv₃ = D[j, jj] * Wv₃ + out[slab_index(i, jj)] += + Geometry.Contravariant12Vector(-Dᵀ₂Wv₃, zero(Dᵀ₂Wv₃)) end end elseif RT <: Geometry.Contravariant123Vector @@ -1478,25 +1419,24 @@ function apply_operator(op::WeakCurl{(1, 2)}, space, slabidx, arg) local_geometry = get_local_geometry(space, ij, slabidx) v = get_node(space, arg, ij, slabidx) W = local_geometry.WJ * local_geometry.invJ - Wv₁ = W ⊠ Geometry.covariant1(v, local_geometry) - Wv₂ = W ⊠ Geometry.covariant2(v, local_geometry) - Wv₃ = W ⊠ Geometry.covariant3(v, local_geometry) + Wv₁ = W * Geometry.covariant1(v, local_geometry) + Wv₂ = W * Geometry.covariant2(v, local_geometry) + Wv₃ = W * Geometry.covariant3(v, local_geometry) for ii in 1:Nq - Dᵀ₁Wv₃ = D[i, ii] ⊠ Wv₃ - Dᵀ₁Wv₂ = D[i, ii] ⊠ Wv₂ - out[slab_index(ii, j)] = - out[slab_index(ii, j)] ⊞ Geometry.Contravariant123Vector( - zero(Dᵀ₁Wv₃), - Dᵀ₁Wv₃, - ⊟(Dᵀ₁Wv₂), - ) + Dᵀ₁Wv₃ = D[i, ii] * Wv₃ + Dᵀ₁Wv₂ = D[i, ii] * Wv₂ + out[slab_index(ii, j)] += Geometry.Contravariant123Vector( + zero(Dᵀ₁Wv₃), + Dᵀ₁Wv₃, + -Dᵀ₁Wv₂, + ) end for jj in 1:Nq - Dᵀ₂Wv₃ = D[j, jj] ⊠ Wv₃ - Dᵀ₂Wv₁ = D[j, jj] ⊠ Wv₁ - out[slab_index(i, jj)] = - out[slab_index(i, jj)] ⊞ Geometry.Contravariant123Vector( - ⊟(Dᵀ₂Wv₃), + Dᵀ₂Wv₃ = D[j, jj] * Wv₃ + Dᵀ₂Wv₁ = D[j, jj] * Wv₁ + out[slab_index(i, jj)] += + Geometry.Contravariant123Vector( + -Dᵀ₂Wv₃, zero(Dᵀ₂Wv₃), Dᵀ₂Wv₁, ) @@ -1508,8 +1448,7 @@ function apply_operator(op::WeakCurl{(1, 2)}, space, slabidx, arg) @inbounds for j in 1:Nq, i in 1:Nq ij = CartesianIndex((i, j)) local_geometry = get_local_geometry(space, ij, slabidx) - out[slab_index(i, j)] = - RecursiveApply.rdiv(out[slab_index(i, j)], local_geometry.WJ) + out[slab_index(i, j)] /= local_geometry.WJ end return Field(SArray(out), space) end @@ -1554,10 +1493,10 @@ function apply_operator(op::Interpolate{(1,)}, space_out, slabidx, arg) @inbounds for i in 1:Nq_out # manually inlined rmatmul with slab_getnode ij = CartesianIndex((1,)) - r = Imat[i, 1] ⊠ get_node(space_in, arg, ij, slabidx) + r = Imat[i, 1] * get_node(space_in, arg, ij, slabidx) for ii in 2:Nq_in ij = CartesianIndex((ii,)) - r = RecursiveApply.rmuladd( + r = muladd( Imat[i, ii], get_node(space_in, arg, ij, slabidx), r, @@ -1584,10 +1523,10 @@ function apply_operator(op::Interpolate{(1, 2)}, space_out, slabidx, arg) # manually inlined rmatmul1 with slab get_node # we do this to remove one allocated intermediate array ij = CartesianIndex((1, j)) - r = Imat[i, 1] ⊠ get_node(space_in, arg, ij, slabidx) + r = Imat[i, 1] * get_node(space_in, arg, ij, slabidx) for ii in 2:Nq_in ij = CartesianIndex((ii, j)) - r = RecursiveApply.rmuladd( + r = muladd( Imat[i, ii], get_node(space_in, arg, ij, slabidx), r, @@ -1645,19 +1584,19 @@ function apply_operator(op::Restrict{(1,)}, space_out, slabidx, arg) # manually inlined rmatmul with slab get_node ij = CartesianIndex((1,)) WJ = get_local_geometry(space_in, ij, slabidx).WJ - r = ImatT[i, 1] ⊠ (WJ ⊠ get_node(space_in, arg, ij, slabidx)) + r = ImatT[i, 1] * (WJ * get_node(space_in, arg, ij, slabidx)) for ii in 2:Nq_in ij = CartesianIndex((ii,)) WJ = get_local_geometry(space_in, ij, slabidx).WJ - r = RecursiveApply.rmuladd( + r = muladd( ImatT[i, ii], - WJ ⊠ get_node(space_in, arg, ij, slabidx), + WJ * get_node(space_in, arg, ij, slabidx), r, ) end ij_out = CartesianIndex((i,)) WJ_out = get_local_geometry(space_out, ij_out, slabidx).WJ - out[slab_index(i)] = RecursiveApply.rdiv(r, WJ_out) + out[slab_index(i)] = r / WJ_out end return Field(SArray(out), space_out) end @@ -1678,13 +1617,13 @@ function apply_operator(op::Restrict{(1, 2)}, space_out, slabidx, arg) # manually inlined rmatmul1 with slab get_node ij = CartesianIndex((1, j)) WJ = get_local_geometry(space_in, ij, slabidx).WJ - r = ImatT[i, 1] ⊠ (WJ ⊠ get_node(space_in, arg, ij, slabidx)) + r = ImatT[i, 1] * (WJ * get_node(space_in, arg, ij, slabidx)) for ii in 2:Nq_in ij = CartesianIndex((ii, j)) WJ = get_local_geometry(space_in, ij, slabidx).WJ - r = RecursiveApply.rmuladd( + r = muladd( ImatT[i, ii], - WJ ⊠ get_node(space_in, arg, ij, slabidx), + WJ * get_node(space_in, arg, ij, slabidx), r, ) end @@ -1693,8 +1632,7 @@ function apply_operator(op::Restrict{(1, 2)}, space_out, slabidx, arg) @inbounds for j in 1:Nq_out, i in 1:Nq_out ij_out = CartesianIndex((i, j)) WJ_out = get_local_geometry(space_out, ij_out, slabidx).WJ - out[slab_index(i, j)] = - RecursiveApply.rdiv(rmatmul2(ImatT, temp, i, j), WJ_out) + out[slab_index(i, j)] = rmatmul2(ImatT, temp, i, j) / WJ_out end return Field(SArray(out), space_out) end @@ -1721,9 +1659,9 @@ function tensor_product!( in_slab = slab(indata, v, h) out_slab = slab(out, v, h) for i in 1:Ni_out - r = M[i, 1] ⊠ in_slab[slab_index(1)] + r = M[i, 1] * in_slab[slab_index(1)] for ii in 2:Ni_in - r = RecursiveApply.rmuladd(M[i, ii], in_slab[slab_index(ii)], r) + r = muladd(M[i, ii], in_slab[slab_index(ii)], r) end out_slab[slab_index(i)] = r end @@ -1776,7 +1714,8 @@ function tensor_product!( inout::Data2D{S, Nij}, M::SMatrix{Nij, Nij}, ) where {S, Nij} - tensor_product!(inout, inout, M) + inout_bc = Base.broadcastable(inout) + tensor_product!(inout_bc, inout_bc, M) end """ @@ -1836,14 +1775,14 @@ import .Spaces: slab_type Recursive matrix product along the 1st dimension of `S`. Equivalent to: - mapreduce(⊠, ⊞, W[i,:], S[:,j]) + mapreduce(*, +, W[i,:], S[:,j]) """ function rmatmul1(W, S, i, j) Nq = size(W, 2) - @inbounds r = W[i, 1] ⊠ S[slab_index(1, j)] + @inbounds r = W[i, 1] * S[slab_index(1, j)] @inbounds for ii in 2:Nq - r = RecursiveApply.rmuladd(W[i, ii], S[slab_index(ii, j)], r) + r = muladd(W[i, ii], S[slab_index(ii, j)], r) end return r end @@ -1853,13 +1792,13 @@ end Recursive matrix product along the 2nd dimension `S`. Equivalent to: - mapreduce(⊠, ⊞, W[j,:], S[i, :]) + mapreduce(*, +, W[j,:], S[i, :]) """ function rmatmul2(W, S, i, j) Nq = size(W, 2) - @inbounds r = W[j, 1] ⊠ S[slab_index(i, 1)] + @inbounds r = W[j, 1] * S[slab_index(i, 1)] @inbounds for jj in 2:Nq - r = RecursiveApply.rmuladd(W[j, jj], S[slab_index(i, jj)], r) + r = muladd(W[j, jj], S[slab_index(i, jj)], r) end return r end diff --git a/src/RecursiveApply/RecursiveApply.jl b/src/RecursiveApply/RecursiveApply.jl deleted file mode 100755 index f57b3105da..0000000000 --- a/src/RecursiveApply/RecursiveApply.jl +++ /dev/null @@ -1,206 +0,0 @@ -""" - RecursiveApply - -This module contains operators to recurse over nested `Tuple`s or `NamedTuple`s. - -To extend to another type `T`, define `RecursiveApply.rmap(fn, args::T...)` -""" -module RecursiveApply - -export ⊞, ⊠, ⊟ - -# These functions need to be generated for type stability (since T.parameters is -# a SimpleVector, the compiler cannot always infer its size and elements). -@generated first_param(::Type{T}) where {T} = :($(first(T.parameters))) -@generated tail_params(::Type{T}) where {T} = - :($(Tuple{Base.tail((T.parameters...,))...})) - -# Applying `rmaptype` returns `Tuple{...}` for tuple -# types, which cannot follow the recursion pattern as -# it cannot be splatted, so we add a separate method, -# `rmaptype_Tuple`, for the part of the recursion. -rmaptype_Tuple(fn::F, ::Type{Tuple{}}) where {F} = () -rmaptype_Tuple(fn::F, ::Type{T}) where {F, E, T <: Tuple{E}} = - (rmaptype(fn, first_param(T)),) -rmaptype_Tuple(fn::F, ::Type{T}) where {F, T <: Tuple} = - (rmaptype(fn, first_param(T)), rmaptype_Tuple(fn, tail_params(T))...) - -rmaptype_Tuple(_, ::Type{Tuple{}}, ::Type{Tuple{}}) = () -rmaptype_Tuple(_, ::Type{Tuple{}}, ::Type{T}) where {T <: Tuple} = () -rmaptype_Tuple(_, ::Type{T}, ::Type{Tuple{}}) where {T <: Tuple} = () -rmaptype_Tuple( - fn::F, - ::Type{T1}, - ::Type{T2}, -) where {F, T1 <: Tuple, T2 <: Tuple} = ( - rmaptype(fn, first_param(T1), first_param(T2)), - rmaptype_Tuple(fn, tail_params(T1), tail_params(T2))..., -) - -""" - rmap(fn, X...) - -Recursively apply `fn` to each element of `X` -""" -rmap(fn::F, X) where {F} = fn(X) -rmap(fn::F, X::Tuple{}) where {F} = () -rmap(fn::F, X::Tuple) where {F} = - (rmap(fn, first(X)), rmap(fn, Base.tail(X))...) -rmap(fn::F, X::NamedTuple) where {F} = - NamedTuple{nt_names(X)}(rmap(fn, Tuple(X))) - -rmap(fn::F, X, Y) where {F} = fn(X, Y) -rmap(fn::F, X::Tuple{}, Y::Tuple{}) where {F} = () -rmap(fn::F, X::Tuple{}, Y) where {F} = () -rmap(fn::F, X, Y::Tuple{}) where {F} = () -rmap(fn::F, X::Tuple, Y::Tuple) where {F} = - (rmap(fn, first(X), first(Y)), rmap(fn, Base.tail(X), Base.tail(Y))...) -rmap(fn::F, X::Tuple, Y::Tuple{}) where {F} = - (rmap(fn, first(X)), rmap(fn, Base.tail(X))...) - -rmap(fn::F, X::Tuple{}, Y::Tuple) where {F} = - (rmap(fn, first(Y)), rmap(fn, Base.tail(Y))...) -rmap(fn::F, X, Y::Tuple) where {F} = - (rmap(fn, X, first(Y)), rmap(fn, X, Base.tail(Y))...) - -rmap(fn::F, X::Tuple, Y) where {F} = - (rmap(fn, first(X), Y), rmap(fn, Base.tail(X), Y)...) - -function rmap(fn::F, X::NamedTuple, Y::NamedTuple) where {F} - @assert nt_names(X) === nt_names(Y) - return NamedTuple{nt_names(X)}(rmap(fn, Tuple(X), Tuple(Y))) -end -rmap(fn::F, X::NamedTuple, Y) where {F} = - NamedTuple{nt_names(X)}(rmap(fn, Tuple(X), Y)) -rmap(fn::F, X::NamedTuple, Y::Tuple) where {F} = - NamedTuple{nt_names(X)}(rmap(fn, Tuple(X), Y)) -rmap(fn::F, X::NamedTuple, Y::Tuple{}) where {F} = - NamedTuple{nt_names(X)}(rmap(fn, Tuple(X))) -rmap(fn::F, X, Y::NamedTuple) where {F} = - NamedTuple{nt_names(Y)}(rmap(fn, X, Tuple(Y))) -rmap(fn::F, X::Tuple, Y::NamedTuple) where {F} = - NamedTuple{nt_names(Y)}(rmap(fn, X, Tuple(Y))) -rmap(fn::F, X::Tuple{}, Y::NamedTuple) where {F} = - NamedTuple{nt_names(Y)}(rmap(fn, Tuple(Y))) - -nt_names(::NamedTuple{names}) where {names} = names - -rmin(X, Y) = rmap(min, X, Y) -rmax(X, Y) = rmap(max, X, Y) - - -""" - rmaptype(fn, T) - rmaptype(fn, T1, T2) - -Recursively apply `fn` to each type parameter of the type `T`, or to each type -parameter of the types `T1` and `T2`, where `fn` returns a type. -""" -rmaptype(fn::F, ::Type{T}) where {F, T} = fn(T) -rmaptype(fn::F, ::Type{T}) where {F, T <: Tuple} = - Tuple{rmaptype_Tuple(fn, T)...} -rmaptype(fn::F, ::Type{T}) where {F, names, Tup, T <: NamedTuple{names, Tup}} = - NamedTuple{names, rmaptype(fn, Tup)} - -rmaptype(fn::F, ::Type{T1}, ::Type{T2}) where {F, T1, T2} = fn(T1, T2) -rmaptype(fn::F, ::Type{T1}, ::Type{T2}) where {F, T1 <: Tuple, T2 <: Tuple} = - Tuple{rmaptype_Tuple(fn, T1, T2)...} -rmaptype( - fn::F, - ::Type{T1}, - ::Type{T2}, -) where { - F, - names, - Tup1, - Tup2, - T1 <: NamedTuple{names, Tup1}, - T2 <: NamedTuple{names, Tup2}, -} = NamedTuple{names, rmaptype(fn, Tup1, Tup2)} - -""" - rpromote_type(Ts...) - -Recursively apply `promote_type` to the input types. -""" -rpromote_type(Ts...) = reduce((T1, T2) -> rmaptype(promote_type, T1, T2), Ts) -rpromote_type() = Union{} - -""" - rzero(X) - -Recursively zero out each element of `X`. -""" -rzero(X) = rzero(typeof(X)) -rzero(::Type{T}) where {T} = zero(T) -rzero(::Type{Tuple{}}) = () -rzero(::Type{T}) where {E, T <: Tuple{E}} = (rzero(E),) -rzero(::Type{T}) where {T <: Tuple} = - (rzero(first_param(T)), rzero(tail_params(T))...) -rzero(::Type{Tup}) where {names, T, Tup <: NamedTuple{names, T}} = - NamedTuple{names}(rzero(T)) - -""" - rconvert(T, X) - -Identical to `convert(T, X)`, but with improved type stability for nested types. -""" -rconvert(::Type{T}, X::T) where {T} = X -rconvert(::Type{T}, X) where {T} = - rmap((zero_value, x) -> convert(typeof(zero_value), x), rzero(T), X) -# TODO: Remove this function once Julia's default convert function is -# type-stable for nested Tuple/NamedTuple types. - -""" - rmul(X, Y) - X ⊠ Y - -Recursively scale each element of `X` by `Y`. -""" -rmul(X, Y) = rmap(*, X, Y) -const ⊠ = rmul - -""" - radd(X, Y) - X ⊞ Y - -Recursively add elements of `X` and `Y`. -""" -radd(X) = X -radd(X, Y) = rmap(+, X, Y) -const ⊞ = radd - -# Adapted from Base/operators.jl for general nary operator fallbacks -for op in (:rmul, :radd) - @eval begin - ($op)(a, b, c, xs...) = Base.afoldl($op, ($op)(($op)(a, b), c), xs...) - end -end - -""" - rsub(X, Y) - X ⊟ Y - -Recursively subtract elements of `Y` from `X`. -""" -rsub(X) = rmap(-, X) -rsub(X, Y) = rmap(-, X, Y) -const ⊟ = rsub - -""" - rdiv(X, Y) - -Recursively divide each element of `X` by `Y` -""" -rdiv(X, Y) = rmap(/, X, Y) - -""" - rmuladd(w, X, Y) - -Recursively add elements of `w * X + Y`. -""" -rmuladd(w::Number, X, Y) = rmap((x, y) -> muladd(w, x, y), X, Y) -rmuladd(X, w::Number, Y) = rmap((x, y) -> muladd(x, w, y), X, Y) -rmuladd(x::Number, w::Number, Y) = rmap(y -> muladd(x, w, y), Y) - -end # module diff --git a/src/Remapping/Remapping.jl b/src/Remapping/Remapping.jl index 48bab928c8..465e89f418 100644 --- a/src/Remapping/Remapping.jl +++ b/src/Remapping/Remapping.jl @@ -19,8 +19,6 @@ import ..DataLayouts, import ClimaCore.Utilities: half import ClimaCore.Spaces: cuda_synchronize -using ..RecursiveApply - include("remapping_utils.jl") include("interpolate_array.jl") include("distributed_remapping.jl") diff --git a/src/Spaces/dss.jl b/src/Spaces/dss.jl index 2c117eace8..bd204a2b60 100644 --- a/src/Spaces/dss.jl +++ b/src/Spaces/dss.jl @@ -1,7 +1,6 @@ import ..Topologies: DSSBuffer, create_dss_buffer, - assert_same_eltype, dss!, dss_1d!, dss_transform!, @@ -89,7 +88,6 @@ function weighted_dss!( space::Union{AbstractSpectralElementSpace, ExtrudedFiniteDifferenceSpace}, dss_buffer::Union{DSSBuffer, Nothing}, ) - assert_same_eltype(data, dss_buffer) weighted_dss_start!(data, space, dss_buffer) weighted_dss_internal!(data, space, dss_buffer) weighted_dss_ghost!(data, space, dss_buffer) @@ -109,7 +107,6 @@ function weighted_dss_prepare!( }, dss_buffer::DSSBuffer, ) - assert_same_eltype(data, dss_buffer) device = ClimaComms.device(topology(space)) hspace = horizontal_space(space) dss_transform!( @@ -228,12 +225,11 @@ function weighted_dss_internal!( ) Quadratures.requires_dss(quadrature_style(space)) || return nothing sizeof(eltype(data)) > 0 || return nothing - assert_same_eltype(data, dss_buffer) device = ClimaComms.device(topology(hspace)) if hspace isa SpectralElementSpace1D dss_1d!( device, - data, + Base.broadcastable(data), topology(hspace), local_geometry_data(space), dss_weights(space), @@ -313,7 +309,6 @@ function weighted_dss_ghost!( ) Quadratures.requires_dss(quadrature_style(space)) || return data sizeof(eltype(data)) > 0 || return data - assert_same_eltype(data, dss_buffer) ClimaComms.finish(dss_buffer.graph_context) device = ClimaComms.device(topology(hspace)) load_from_recv_buffer!(device, dss_buffer) diff --git a/src/Topologies/Topologies.jl b/src/Topologies/Topologies.jl index e99690aa26..1fbb2b7067 100644 --- a/src/Topologies/Topologies.jl +++ b/src/Topologies/Topologies.jl @@ -3,7 +3,7 @@ module Topologies import ClimaComms, Adapt import ..ClimaCore -import ..Utilities: Cache, cart_ind, linear_ind +import ..Utilities: Cache, cart_ind, linear_ind, AutoBroadcaster, nested_broadcast import ..Geometry import ..Domains: Domains, coordinate_type import ..Meshes: Meshes, domain, coordinates diff --git a/src/Topologies/dss.jl b/src/Topologies/dss.jl index 3f0b1c03ba..c637f4e2eb 100644 --- a/src/Topologies/dss.jl +++ b/src/Topologies/dss.jl @@ -66,7 +66,7 @@ create_dss_buffer( local_geometry::Union{DSSTypes2D, Nothing} = nothing, dss_weights::Union{DSSTypes2D, Nothing} = nothing, ) = create_dss_buffer( - data, + Base.broadcastable(data), topology, DataLayouts.VIFH, local_geometry, @@ -168,12 +168,6 @@ end Base.eltype(::DSSBuffer{S}) where {S} = S -assert_same_eltype(::DataLayouts.AbstractData, ::DSSBuffer) = - error("Incorrect buffer eltype") -assert_same_eltype(::DataLayouts.AbstractData{S}, ::DSSBuffer{S}) where {S} = - nothing -assert_same_eltype(::DataLayouts.AbstractData, ::Nothing) = nothing - """ dss_transform!( device::ClimaComms.AbstractDevice, @@ -213,7 +207,7 @@ function dss_transform!( dss_transform!( device, dss_buffer.perimeter_data, - data, + Base.broadcastable(data), perimeter, local_geometry, dss_weights, @@ -223,27 +217,6 @@ function dss_transform!( return nothing end -# `dss_transform` of a `Covariant12Vector` returns a -# `UVWVector`, however, we only need to store a `UVVector` -# in `perimeter_data`. Therefore, we drop the vertical dimension: -# via `drop_vert_dim` -""" - drop_vert_dim(::Type{T}, X) - -Convert the type of `X` to type `T` recursively -using `_drop_vert_dim`, which converts from `UVWVector` -to `UVVector` if `T <: UVVector`. -""" -@inline drop_vert_dim(::Type{T}, X) where {T} = - RecursiveApply.rmap(RecursiveApply.rzero(T), X) do zero_value, x - _drop_vert_dim(typeof(zero_value), x) - end -@inline _drop_vert_dim( - ::Type{T}, - x::Geometry.UVWVector, -) where {T <: Geometry.UVVector} = Geometry.UVVector(x.u, x.v) -@inline _drop_vert_dim(::Type{T}, x::T) where {T} = x - """ function dss_transform!( ::ClimaComms.AbstractCPUDevice, @@ -289,9 +262,7 @@ function dss_transform!( local_geometry[loc], dss_weights[loc], ) - perimeter_data[CI(p, 1, 1, level, elem)] = - drop_vert_dim(eltype(perimeter_data), src) - + perimeter_data[CI(p, 1, 1, level, elem)] = src end end end @@ -334,7 +305,7 @@ function dss_untransform!( dss_untransform!( device, perimeter_data, - data, + Base.broadcastable(data), local_geometry, perimeter, localelems, @@ -466,9 +437,9 @@ function dss_local_vertices!( for level in 1:Nv # gather: compute sum over shared vertices sum_data = mapreduce( - ⊞, + +, vertex; - init = RecursiveApply.rzero(eltype(slab(perimeter_data, 1, 1))), + init = zero(eltype(slab(perimeter_data, 1, 1))), ) do (lidx, vert) ip = perimeter_vertex_node_index(vert) perimeter_slab = slab(perimeter_data, level, lidx) @@ -502,7 +473,7 @@ function dss_local_faces!( perimeter_slab2 = slab(perimeter_data, level, lidx2) for (ip1, ip2) in zip(pr1, pr2) val = - perimeter_slab1[slab_index(ip1)] ⊞ + perimeter_slab1[slab_index(ip1)] + perimeter_slab2[slab_index(ip2)] perimeter_slab1[slab_index(ip1)] = val perimeter_slab2[slab_index(ip2)] = val @@ -538,11 +509,9 @@ function dss_local_ghost!( for level in 1:Nv # gather: compute sum over shared vertices sum_data = mapreduce( - ⊞, + +, vertex; - init = RecursiveApply.rzero( - eltype(slab(perimeter_data, 1, 1)), - ), + init = zero(eltype(slab(perimeter_data, 1, 1))), ) do (isghost, idx, vert) ip = perimeter_vertex_node_index(vert) if !isghost @@ -550,10 +519,7 @@ function dss_local_ghost!( perimeter_slab = slab(perimeter_data, level, lidx) perimeter_slab[slab_index(ip)] else - RecursiveApply.rmap( - zero, - slab(perimeter_data, 1, 1)[slab_index(1)], - ) + zero(slab(perimeter_data, 1, 1)[slab_index(1)]) end end for (isghost, idx, vert) in vertex @@ -680,7 +646,7 @@ Computed unweighted/pure DSS of `data`. function dss!(data::DSSTypes1D, topology::IntervalTopology) sizeof(eltype(data)) > 0 || return nothing device = ClimaComms.device(topology) - dss_1d!(device, data, topology) + dss_1d!(device, Base.broadcastable(data), topology) return nothing end function dss!(data::DSSTypes2D, topology::Topology2D) @@ -726,7 +692,7 @@ function dss_1d!( left_idx = CartesianIndex(Ni, 1, 1, level, left_face_elem) right_idx = CartesianIndex(1, 1, 1, level, right_face_elem) val = - dss_transform(data, local_geometry, dss_weights, left_idx) ⊞ + dss_transform(data, local_geometry, dss_weights, left_idx) + dss_transform(data, local_geometry, dss_weights, right_idx) data[left_idx] = dss_untransform(T, val, local_geometry, left_idx) data[right_idx] = dss_untransform(T, val, local_geometry, right_idx) diff --git a/src/Topologies/dss_transform.jl b/src/Topologies/dss_transform.jl index c20a219093..1e68e69511 100644 --- a/src/Topologies/dss_transform.jl +++ b/src/Topologies/dss_transform.jl @@ -1,5 +1,4 @@ import ..Topologies: Topology2D -using ..RecursiveApply import UnrolledUtilities: unrolled_map """ @@ -28,63 +27,28 @@ Base.@propagate_inbounds dss_transform( I, ) = arg[I] -@inline function dss_transform( - arg::Tuple{}, - local_geometry::Geometry.LocalGeometry, - weight, -) - () -end -@inline function dss_transform( - args::Tuple, - local_geometry::Geometry.LocalGeometry, - weight, -) - unrolled_map(arg -> dss_transform(arg, local_geometry, weight), args) -end -@inline function dss_transform( - arg::NamedTuple{names}, - local_geometry::Geometry.LocalGeometry, - weight, -) where {names} - NamedTuple{names}(dss_transform(Tuple(arg), local_geometry, weight)) -end @inline dss_transform( - arg::Number, + arg, local_geometry::Geometry.LocalGeometry, weight, ) = arg * weight @inline dss_transform( - arg::Geometry.AxisTensor{T, N, <:Tuple{Vararg{Geometry.CartesianAxis}}}, + arg::AutoBroadcaster, local_geometry::Geometry.LocalGeometry, weight, -) where {T, N} = arg * weight +) = nested_broadcast(arg -> dss_transform(arg, local_geometry, weight), arg) + +const NonTransformedAxis = Union{ + Geometry.LocalAxis, + Geometry.CartesianAxis, + Geometry.Covariant3Axis, + Geometry.Contravariant3Axis, +} @inline dss_transform( - arg::Geometry.CartesianVector, + arg::Geometry.AxisVector{<:Any, <:NonTransformedAxis}, local_geometry::Geometry.LocalGeometry, weight, ) = arg * weight -@inline dss_transform( - arg::Geometry.AxisTensor{T, N, <:Tuple{Vararg{Geometry.LocalAxis}}}, - local_geometry::Geometry.LocalGeometry, - weight, -) where {T, N} = arg * weight -@inline dss_transform( - arg::Geometry.AxisTensor{T, N, <:Tuple{}}, - local_geometry::Geometry.LocalGeometry, - weight, -) where {T, N} = arg * weight -@inline dss_transform( - arg::Geometry.LocalVector, - local_geometry::Geometry.LocalGeometry, - weight, -) = arg * weight -@inline dss_transform( - arg::Geometry.Covariant3Vector, - local_geometry::Geometry.LocalGeometry, - weight, -) = arg * weight - @inline function dss_transform( arg::Geometry.AxisVector, local_geometry::Geometry.LocalGeometry, @@ -137,38 +101,17 @@ Base.@propagate_inbounds dss_untransform( ) where {T} = dss_untransform(T, targ, local_geometry[I]) @inline dss_untransform(::Type{T}, targ, local_geometry::Nothing, I) where {T} = dss_untransform(T, targ, local_geometry) -@inline function dss_untransform( - ::Type{NamedTuple{names, T}}, - targ::NamedTuple{names}, - local_geometry, -) where {names, T} - NamedTuple{names}(dss_untransform(T, Tuple(targ), local_geometry)) -end + +@inline dss_untransform(::Type{T}, targ::T, local_geometry) where {T} = targ @inline dss_untransform( - ::Type{Tuple{}}, - targ::Tuple{}, - local_geometry::Geometry.LocalGeometry, -) = () -@inline function dss_untransform( ::Type{T}, - targ::Tuple, + targ::AutoBroadcaster, local_geometry::Geometry.LocalGeometry, -) where {T <: Tuple} - ( - dss_untransform( - Base.tuple_type_head(T), - Base.first(targ), - local_geometry, - ), - dss_untransform( - Base.tuple_type_tail(T), - Base.tail(targ), - local_geometry, - )..., - ) -end +) where {T <: AutoBroadcaster} = + nested_broadcast(zero(T), targ) do zero_value, targ + dss_untransform(typeof(zero_value), targ, local_geometry) + end -@inline dss_untransform(::Type{T}, targ::T, local_geometry) where {T} = targ @inline dss_untransform( ::Type{T}, targ::T, diff --git a/src/Utilities/Utilities.jl b/src/Utilities/Utilities.jl index 390c89c98f..4c3e334c0c 100644 --- a/src/Utilities/Utilities.jl +++ b/src/Utilities/Utilities.jl @@ -1,8 +1,12 @@ module Utilities -import UnrolledUtilities: unrolled_map +using UnrolledUtilities + +import ForwardDiff +import InteractiveUtils include("plushalf.jl") +include("auto_broadcaster.jl") include("cache.jl") module Unrolled # TODO: Move all of these functions into UnrolledUtilities.jl @@ -118,18 +122,104 @@ to ensure that recursive functions over nested types have inferrable outputs. @inline fieldtype_vals(::Type{T}) where {T} = ntuple(Val ∘ Base.Fix1(fieldtype, T), Val(fieldcount(T))) +# :new may be called with uninitialized fields as of JuliaLang/julia#52169, but +# this leads to segfaults or other compiler errors for immutable DataType fields +@inline can_alloc_uninitialized(::Tuple{Bool, Val{T}}) where {T <: Type} = + throw(ArgumentError("Cannot allocate unspecified $T")) +@inline can_alloc_uninitialized((mutable, _)::Tuple{Bool, Val{Type{T}}}) where {T} = + mutable +@inline can_alloc_uninitialized((mutable, _)::Tuple{Bool, Val{T}}) where {T} = + if T isa Union{Union, UnionAll} + throw(ArgumentError("Cannot allocate value of ambiguous type $T")) + else + mutable_flags = ntuple(Base.Fix1(!isconst, T), Val(fieldcount(T))) + flags_and_type_vals = zip(mutable_flags, fieldtype_vals(T)) + mutable || unrolled_all(can_alloc_uninitialized, flags_and_type_vals) + end + """ new(T, [fields]) -Exposes the `new` pseudo-function that allocates a value of type `T` with the -specified fields. Can also be called without a second argument to leave the -allocated value with uninitialized fields. +Exposes the `new` pseudo-function that allocates a value of type `T`, which can +otherwise only be explicitly called from inner constructors. + +If provided, the second argument is used to initialize fields of the new value +(unlike the lowered pseudo-function, this will not automatically convert to the +`fieldtypes` of `T`). Otherwise, the fields are initialized with arbitrary data, +with special handling of `DataType` fields to avoid errors during compilation. + +# Examples +```jldoctest; setup = :(import ClimaCore.Utilities: new), filter = r"\\d+" +julia> new(Int) +4889520192 + +julia> new(Complex{Int}, (1, 2)) +1 + 2im + +julia> new(@NamedTuple{a::Type{Int}, b::Int, c::Complex{Int}}) +(a = Int64, b = 4889520192, c = 6162822528 + 8036417625im) + +julia> new(@NamedTuple{a::DataType, b::Int, c::Complex{Int}}, (Int, 1, 1 + 2im)) +(a = Int64, b = 1, c = 1 + 2im) +``` +""" +@inline new(::Type{T}) where {T} = maybe_nested_new(Val(T)) +@eval @inline new(::Type{T}, fields) where {T} = $(Expr(:splatnew, :T, :fields)) + +# Wrap each type in a Val to guarantee recursive inlining +@inline maybe_nested_new(::Val{Type{T}}) where {T} = T +@eval @inline maybe_nested_new(val::Val{T}) where {T} = + can_alloc_uninitialized((false, val)) ? $(Expr(:new, :T)) : nested_new(val) + +# A Tuple{Type{T}, ...} turns into a Tuple{DataType, ...} when it is allocated; +# a @NamedTuple{_::Type{T}, ...} also turns into a @NamedTuple{_::DataType, ...} +@inline nested_new(::Val{T}) where {T} = + new(T, unrolled_map(maybe_nested_new, fieldtype_vals(T))) +@inline nested_new(::Val{T}) where {T <: Tuple} = + unrolled_map(maybe_nested_new, fieldtype_vals(T)) +@inline nested_new(::Val{T}) where {names, T <: NamedTuple{names}} = + NamedTuple{names}(unrolled_map(maybe_nested_new, fieldtype_vals(T))) + +""" + unsafe_eltype(itr) + +Analogue of `eltype` with support for un-materialized broadcast expressions, +adapted from `Base.Broadcast.combine_eltypes`. Does not perform any safety +checks, and may potentially return non-concrete types (like an empty `Union{}`). +""" +@inline unsafe_eltype(itr) = eltype(itr) +@inline unsafe_eltype((; f, args)::Base.Broadcast.Broadcasted) = + unrolled_any(has_inferred_error, args) ? Union{} : + Core.Compiler.return_type(f, Tuple{unrolled_map(unsafe_eltype, args)...}) + +@inline has_inferred_error(itr) = unsafe_eltype(itr) == Union{} + +struct InferenceError <: Exception + f::Any + args_type::Type{<:Tuple} +end +function Base.showerror(io::IO, (; f, args_type)::InferenceError) + println(io, "Concrete type of result could not be inferred:\n") + InteractiveUtils.code_warntype(io, f, args_type) +end + +""" + safe_eltype(itr) -In contrast to the pseudo-function, this only asserts that all fields match the -`fieldtypes` of `T`, rather than automatically converting them to those types. +Analogue of `eltype` with support for un-materialized broadcast expressions, +adapted from `Base.Broadcast.combine_eltypes`. Throws an error when the concrete +element type of a broadcast expression cannot be inferred, indicating which part +of the expression first encounters a type instability or error during inference. """ -@generated new(::Type{T}) where {T} = Expr(:new, :T) -@generated new(::Type{T}, fields) where {T} = - Expr(:splatnew, :T, :(fields::$(Tuple{fieldtypes(T)...}))) +@inline safe_eltype(itr) = + has_inferred_error(itr) || + !(isconcretetype(unsafe_eltype(itr)) || unsafe_eltype(itr) <: Type) ? + eltype_error(itr) : unsafe_eltype(itr) + +eltype_error(itr) = throw(InferenceError(eltype, Tuple{typeof(itr)})) +eltype_error(bc::Base.Broadcast.Broadcasted) = + has_inferred_error(bc) ? + bc.f(unrolled_map(new ∘ safe_eltype, bc.args)...) : # f throws runtime error + throw(InferenceError(bc.f, Tuple{unrolled_map(safe_eltype, bc.args)...})) end # module diff --git a/src/Utilities/auto_broadcaster.jl b/src/Utilities/auto_broadcaster.jl new file mode 100644 index 0000000000..670484067f --- /dev/null +++ b/src/Utilities/auto_broadcaster.jl @@ -0,0 +1,404 @@ +# Default types that can be used as arguments to auto-broadcasted math functions +const DefaultBroadcastable = Union{Tuple, NamedTuple} +const DefaultNonAutoBroadcaster = + Union{DefaultBroadcastable, Number, AbstractArray} + +""" + AutoBroadcaster(itr) + +Wrapper for an iterator that forces certain functions to be broadcasted over the +iterator's elements. This allows different types of broadcasting to be applied +simultaneously; e.g., ClimaCore's `Field`s and similar types use the standard +dot syntax to denote parallelized iteration over spatial locations, and they +wrap their values in `AutoBroadcaster`s for unrolled iteration over subfields. +All statically-sized iterators for which [`is_auto_broadcastable`](@ref) is true +are compatible with `AutoBroadcaster`s. + +In the context of `AutoBroadcaster`s, broadcasting a function applies it with +[`unrolled_map`](https://clima.github.io/UnrolledUtilities.jl/dev/#Package-Features), +iterating over all arguments for which `is_auto_broadcastable` is true +(including those not wrapped in `AutoBroadcaster`s), while other arguments are +passed to the function directly. This behavior is triggered by using +`AutoBroadcaster`s, optionally in conjunction with compatible iterators that are +not wrapped in `AutoBroadcaster`s, in the following ways: + - passing them to standard math functions or constructors + - passing them to `ifelse` (for iterating over conditional values) + - applying them as function calls (for iterating over functions) + - explicitly calling [`nested_broadcast`](@ref) + +Nested `AutoBroadcaster`s constructed with [`add_auto_broadcasters`](@ref) +evaluate broadcasts recursively, mapping across every layer of nested iterators +so that broadcasted functions are only applied to non-iterators in the innermost +layers. Aside from automatic broadcasting, `AutoBroadcaster`s are essentially +identical to their underlying iterators, with support for common operations like +`iterate`, `propertynames`, `getindex`, and `reduce`. + +# Examples +```jldoctest; setup = :(import ClimaCore.Utilities, ClimaCore.Geometry.StaticArrays) +julia> x = Utilities.AutoBroadcaster((1, 2.0, StaticArrays.SVector(3, 4))) +(1, 2.0, [3, 4]) + +julia> zero(typeof(x)) +(0, 0.0, [0, 0]) + +julia> 2 * x - (2, 3, [4, 5]) +(0, 1.0, [2, 3]) + +julia> y = Utilities.add_auto_broadcasters((1, 2, (a = 3, b = 4, c = (5, 6, (7, 8))))) +(1, 2, (a = 3, b = 4, c = (5, 6, (7, 8)))) + +julia> min(y, abs(5 - y)) +(1, 2, (a = 2, b = 1, c = (0, 1, (2, 3)))) + +julia> x' * y * x ÷ 5 +(0, 1.0, (a = 15, b = 20, c = (25, 30, (35, 40)))) +``` +""" +struct AutoBroadcaster{I} + itr::I +end + +unwrap(::Type{AutoBroadcaster{I}}) where {I} = I +unwrap(x::AutoBroadcaster) = getfield(x, :itr) # getproperty is overwritten below +unwrap(x) = x + +""" + is_auto_broadcastable(::Type) + is_auto_broadcastable(itr) + +Indicates whether an [`AutoBroadcaster`](@ref) should broadcast over iterators +of the given type. By default, this is only true for `Tuple` and `NamedTuple` +types, but it can be extended to any statically-sized type compatible with +[UnrolledUtilities.jl](https://github.com/CliMA/UnrolledUtilities.jl). + +For convenience, `is_auto_broadcastable` also supports passing a concrete +iterator instead of its type, but this method should not be extended directly. +""" +is_auto_broadcastable(::Type{<:DefaultBroadcastable}) = true +is_auto_broadcastable(::Type) = false +is_auto_broadcastable(::Type{Union{}}) = false # to resolve ambiguity +is_auto_broadcastable(itr) = is_auto_broadcastable(typeof(itr)) + +""" + add_auto_broadcasters(itr) + add_auto_broadcasters(::Type) + +Recursively applies the [`AutoBroadcaster`](@ref) constructor to iterators for +which [`is_auto_broadcastable`](@ref) is true, as well as their elements for +which it is true, while leaving values for which it is false unmodified. Can +also be passed an iterator's type to infer the result type for such an iterator. +""" +add_auto_broadcasters(itr) = + itr isa AutoBroadcaster || is_auto_broadcastable(itr) ? + AutoBroadcaster(unrolled_map(add_auto_broadcasters, unwrap(itr))) : itr +add_auto_broadcasters(::Type{T}) where {T} = + Core.Compiler.return_type(add_auto_broadcasters, Tuple{T}) + +""" + drop_auto_broadcasters(itr) + drop_auto_broadcasters(::Type) + +Recursively unwraps constructors applied by [`add_auto_broadcasters`](@ref), +extracting the iterator from every [`AutoBroadcaster`](@ref) in `itr`. Can also +be passed an iterator's type to infer the result type for such an iterator. +""" +drop_auto_broadcasters(itr) = + itr isa AutoBroadcaster || is_auto_broadcastable(itr) ? + unrolled_map(drop_auto_broadcasters, unwrap(itr)) : itr +drop_auto_broadcasters(::Type{T}) where {T} = + Core.Compiler.return_type(drop_auto_broadcasters, Tuple{T}) + +""" + auto_broadcasted([style], f, args, [axes]) + +Analogue of `Base.Broadcast.Broadcasted(style, f, args, axes)` that can pass the +arguments of `f` through either [`add_auto_broadcasters`](@ref) or +[`drop_auto_broadcasters`](@ref) if doing so will help avoid an inferred error. + +When the [`unsafe_eltype`](@ref) of `Broadcasted(style, f, args, axes)` +indicates that `f` will throw an error, a new `Broadcasted` wrapper is +constructed with `add_auto_broadcasters` applied to every argument, and then +another is constructed with `drop_auto_broadcasters` applied to every argument. +If one of the new wrappers no longer corresponds to a guaranteed error, it is +returned instead of the original wrapper. Otherwise, the default result of +`Broadcasted(style, f, args, axes)` is returned without modifications. + +# Examples +```jldoctest; setup = :(import ClimaCore.Utilities), filter = r"\\{.+\\}" +julia> x = (im, (1, 2.0), [3, 4]) +(im, (1, 2.0), [3, 4]) + +julia> y = [x, x, x, x]; + +julia> bc = Base.Broadcast.Broadcasted(*, (Base.Broadcast.Broadcasted(adjoint, (y,)), y)); + +julia> sum(Base.materialize(bc)) +ERROR: MethodError: no method matching adjoint(::Tuple{...}) +[...] + +julia> bc = Utilities.auto_broadcasted(*, (Utilities.auto_broadcasted(adjoint, (y,)), y)); + +julia> sum(Base.materialize(bc)) +(4 + 0im, (4, 16.0), 100) +``` +""" +auto_broadcasted(f::F, args, axes...) where {F} = + auto_broadcasted(Base.Broadcast.combine_styles(args...), f, args, axes...) +function auto_broadcasted(style::Base.BroadcastStyle, f::F, args, axes...) where {F} + wrapped_f(args...) = f(unrolled_map(add_auto_broadcasters, args)...) + unwrapped_f(args...) = f(unrolled_map(drop_auto_broadcasters, args)...) + bc = Base.Broadcast.Broadcasted(style, f, args, axes...) + unsafe_eltype(bc) != Union{} && return bc + bc′ = Base.Broadcast.Broadcasted(style, wrapped_f, args, axes...) + unsafe_eltype(bc′) != Union{} && return bc′ + bc′′ = Base.Broadcast.Broadcasted(style, unwrapped_f, args, axes...) + unsafe_eltype(bc′′) != Union{} && return bc′′ + return bc # error in bc is not caused by missing or extra AutoBroadcasters +end + +""" + nested_broadcast(f, args...) + +Analogue of `broadcast` that is applied recursively over nested iterators, as +long as at least one argument is an [`AutoBroadcaster`](@ref). All loops over +iterator elements are unrolled and inlined to optimize performance. + +This function is automatically called when an [`AutoBroadcaster`](@ref) is +passed to any standard math function or constructor, but for generic operations +it must be called explicitly. + +# Examples +```jldoctest; setup = :(import ClimaCore.Utilities) +julia> x = Utilities.add_auto_broadcasters(((:a, :b, :c), (:d, :e, :f), :g)) +((:a, :b, :c), (:d, :e, :f), :g) + +julia> Utilities.nested_broadcast(string, x) +(("a", "b", "c"), ("d", "e", "f"), "g") + +julia> y = Utilities.add_auto_broadcasters((1, 11, (111, 1111, 11111))) +(1, 11, (111, 1111, 11111)) + +julia> Utilities.nested_broadcast(Symbol, x, y * y) +((:a1, :b1, :c1), (:d121, :e121, :f121), (:g12321, :g1234321, :g123454321)) +``` +""" +nested_broadcast(f::F, args...) where {F} = _nested_broadcast(f, args) + +# Zip the arguments instead of splatting them to guarantee recursive inlining +function _nested_broadcast(f::F, args) where {F} + unrolled_any(Base.Fix2(isa, AutoBroadcaster), args) || return f(args...) + unwrapped_args = unrolled_map(unwrap, args) + broadcastable_args = unrolled_filter(is_auto_broadcastable, unwrapped_args) + lengths = unrolled_map(length, broadcastable_args) + if !unrolled_allequal(lengths) + lengths_str = join(unique(lengths), ", ", " and ") + throw(DimensionMismatch("Arguments have unequal lengths $lengths_str")) + end + broadcast_axis = StaticOneTo(first(lengths)) + uniform_length_args = unrolled_map(unwrapped_args) do x + is_auto_broadcastable(x) ? x : Iterators.map(Returns(x), broadcast_axis) + end + zipped_args = unrolled_map(tuple, uniform_length_args...) + result_itr = unrolled_map(Base.Fix1(_nested_broadcast, f), zipped_args) + return AutoBroadcaster(result_itr) +end + +# Wrap each Type in a struct to guarantee recursive inlining +nested_broadcast(::Type{T}, args...) where {T} = + nested_broadcast(Base.Fix1((T, args) -> T(args...), T) ∘ tuple, args...) + +# Nested version of f.(typeof.(x), typeof.(y), ...) for x::type1, y::type2, etc. +nested_broadcast_over_types(f::F, types...) where {F} = nested_broadcast( + (args...) -> f(unrolled_map(typeof, args)...), + unrolled_map(new, types)..., +) + +# Nested version of typeof(new.(f.(typeof.(x), typeof.(y), ...))) for x::type1... +nested_broadcast_result_type(f::F, types...) where {F} = + typeof(nested_broadcast_over_types((types...) -> new(f(types...)), types...)) + +######################################### +## Automatic Unwrapping and Rewrapping ## +######################################### + +Base.eltype(::Type{X}) where {X <: AutoBroadcaster} = eltype(unwrap(X)) + +Base.Tuple(x::AutoBroadcaster) = Tuple(unwrap(x)) +Base.NamedTuple{names}(x::AutoBroadcaster) where {names} = + NamedTuple{names}(unwrap(x)) + +Base.propertynames(x::AutoBroadcaster) = propertynames(unwrap(x)) +Base.getproperty(x::AutoBroadcaster, name::Symbol) = getproperty(unwrap(x), name) + +for f in (:keys, :values, :pairs, :isempty, :length, :firstindex, :lastindex) + @eval Base.$f(x::AutoBroadcaster) = $f(unwrap(x)) +end +Base.show(io::IO, x::AutoBroadcaster) = show(io, unwrap(x)) +Base.axes(x::AutoBroadcaster, dim...) = axes(unwrap(x), dim...) +Base.size(x::AutoBroadcaster, dim...) = size(unwrap(x), dim...) +Base.iterate(x::AutoBroadcaster, state...) = iterate(unwrap(x), state...) +Base.merge(args::AutoBroadcaster...) = + AutoBroadcaster(merge(unrolled_map(unwrap, args)...)) +Base.@propagate_inbounds Base.getindex(x::AutoBroadcaster, index) = + getindex(unwrap(x), index) +Base.@propagate_inbounds Base.setindex(x::AutoBroadcaster, value, index) = + AutoBroadcaster(Base.setindex(unwrap(x), value, index)) + +# Broadcasts/maps/reductions are not recursive, unlike the math operations below +Base.broadcastable(x::AutoBroadcaster) = Base.broadcastable(unwrap(x)) +Base.map(f::F, arg::AutoBroadcaster, args::AutoBroadcaster...) where {F} = + AutoBroadcaster(map(f, unwrap(arg), unrolled_map(unwrap, args)...)) +Base.mapreduce( + f::F, + op::O, + arg::AutoBroadcaster, + args::AutoBroadcaster...; + init..., +) where {F, O} = + mapreduce(f, op, unwrap(arg), unrolled_map(unwrap, args)...; init...) + +# Circumvent the built-in convert function, which can introduce type +# instabilities for nested Tuples and NamedTuples on Julia 1.10 +Base.convert(::Type{X}, x::X) where {X <: AutoBroadcaster} = x +Base.convert(::Type{I}, x::AutoBroadcaster) where {I <: DefaultBroadcastable} = + nested_convert(I, x) +Base.convert(::Type{X}, itr) where {X <: AutoBroadcaster} = + nested_convert(X, itr) +nested_convert(::Type{T}, arg) where {T} = _nested_convert((new(T), arg)) + +# Turn types into values and zip the arguments to guarantee recursive inlining +_nested_convert((x, y)) = + x isa AutoBroadcaster ? AutoBroadcaster(_nested_convert((unwrap(x), y))) : + is_auto_broadcastable(x) ? + unrolled_map(_nested_convert, unrolled_map(tuple, x, unwrap(y))) : + convert(typeof(x), unwrap(y)) + +############################################### +## Automatic Broadcasting of Math Operations ## +############################################### + +const AutoBroadcasterOrSimilar = Union{AutoBroadcaster, DefaultBroadcastable} + +# Type functions extended in ForwardDiff.jl +for f in (:zero, :one, :eps, :float) + @eval Base.$f(::Type{X}) where {X <: AutoBroadcaster} = + nested_broadcast_over_types($f, X) +end +Base.precision(::Type{X}; base...) where {X <: AutoBroadcaster} = + nested_broadcast_over_types(x -> precision(x; base...), X) +Base.promote_rule( + ::Type{X}, + ::Type{Y}, +) where {X <: AutoBroadcaster, Y <: AutoBroadcasterOrSimilar} = + nested_broadcast_result_type(Base.promote_type, X, Y) + +# Common type functions absent from ForwardDiff.jl +for f in (:big, :real, :complex, :widen) + @eval Base.$f(::Type{X}) where {X <: AutoBroadcaster} = + nested_broadcast_result_type($f, X) +end + +# Types of constructors for subtypes of T that have an unconstrained argument, +# leading to ambiguities with the method (::Type{<:T})(::AutoBroadcaster) = ... +function ambiguous_constructor_types(T) + types = [] + if isabstracttype(T) + for T_subtype in InteractiveUtils.subtypes(T) + append!(types, ambiguous_constructor_types(T_subtype)) + end + end + vars = [] + empty_var = TypeVar(Symbol()) + while true + new_type = reduce((T, var) -> UnionAll(var, T), vars; init = Type{T}) + constructor = reduce((T, _) -> UnionAll(empty_var, T), vars; init = T) + hasmethod(constructor, Tuple{AutoBroadcaster}) && push!(types, new_type) + T isa DataType && break + push!(vars, T.var) + T = T.body + end + return types +end + +# All Number constructors (only defined for Integer and Dual in ForwardDiff.jl), +# with constructors for a few subtypes defined separately to avoid ambiguities +for constructor_type in ambiguous_constructor_types(Number) + @eval (T::$constructor_type)(x::AutoBroadcaster) = nested_broadcast(T, x) +end +(T::Type{<:Number})(x::AutoBroadcaster) = nested_broadcast(T, x) + +# Permutations of n type constraints that include at least one :AutoBroadcaster +function constraint_permutations(n) + all_constraint_names = (:AutoBroadcaster, :DefaultNonAutoBroadcaster) + permutations = Iterators.product(map(Returns(all_constraint_names), 1:n)...) + return Iterators.filter(Base.Fix1(any, ==(:AutoBroadcaster)), permutations) +end + +# Boolean functions extended in ForwardDiff.jl +for f in ForwardDiff.UNARY_PREDICATES + @eval Base.$f(x::AutoBroadcaster) = nested_broadcast($f, x) +end +for f in (:<, :<=, :(==), :isless), (X, Y) in constraint_permutations(2) + @eval Base.$f(x::$X, y::$Y) = nested_broadcast($f, x, y) +end # FIXME: Adding a method for isequal here causes invalidations + +# Continuously differentiable functions from Base extended in ForwardDiff.jl +const base_function_diff_rules = + Iterators.filter(==(:Base) ∘ first, ForwardDiff.DiffRules.diffrules()) +for (_, f, n) in base_function_diff_rules, types in constraint_permutations(n) + args = map(Base.Fix1(Symbol, :arg), 1:n) + typed_args = map(((arg, type),) -> :($arg::$type), zip(args, types)) + @eval Base.$f($(typed_args...)) = nested_broadcast(Base.$f, $(args...)) +end + +# Other math functions from Base extended in ForwardDiff.jl, excluding those +# that return pairs of values (e.g., sincos or sincospi), so we avoid having to +# distinguish a Tuple of 2 AutoBroadcasters from an AutoBroadcaster of 2 Tuples +for f in (:zero, :one, :eps, :float, :nextfloat, :prevfloat, :exponent) + @eval Base.$f(x::AutoBroadcaster) = nested_broadcast($f, x) +end +for f in (:floor, :ceil, :trunc, :round) + @eval Base.$f(x::AutoBroadcaster) = nested_broadcast($f, x) + @eval Base.$f(::Type{T}, x::AutoBroadcaster) where {T} = + nested_broadcast(Base.Fix1($f, T), x) +end +Base.precision(x::AutoBroadcaster; base...) = + nested_broadcast(x -> precision(x; base...), x) +Base.literal_pow(::typeof(^), x::AutoBroadcaster, p::Val) = + nested_broadcast(x -> Base.literal_pow(^, x, p), x) +for (X, Y) in constraint_permutations(2) + @eval Base.div(x::$X, y::$Y, r::RoundingMode) = + nested_broadcast((x, y) -> div(x, y, r), x, y) + @eval Base.fld(x::$X, y::$Y) = nested_broadcast(fld, x, y) + @eval Base.cld(x::$X, y::$Y) = nested_broadcast(cld, x, y) +end +for (X, Y, Z) in constraint_permutations(3) + @eval Base.fma(x::$X, y::$Y, z::$Z) = nested_broadcast(fma, x, y, z) + @eval Base.muladd(x::$X, y::$Y, z::$Z) = + nested_broadcast(muladd, x, y, z) +end + +# Common math functions absent from ForwardDiff.jl, excluding those that return +# pairs of values (e.g., minmax, divrem, or fldmod), so we avoid having to +# distinguish a Tuple of 2 AutoBroadcasters from an AutoBroadcaster of 2 tuples +for f in (:!, :~, :adjoint, :angle, :cis, :cispi, :conj, :sign) + @eval Base.$f(x::AutoBroadcaster) = nested_broadcast($f, x) +end +for f in (://, :&, :|, :xor, :fld1, :mod1), (X, Y) in constraint_permutations(2) + @eval Base.$f(x::$X, y::$Y) = nested_broadcast($f, x, y) +end + +# Internal functions called by Base.sum and Base.prod +for f in (:add_sum, :mul_prod), (X, Y) in constraint_permutations(2) + @eval Base.$f(x::$X, y::$Y) = nested_broadcast(Base.$f, x, y) +end + +# Using AutoBroadcasters/DefaultBroadcastables as if-else statement conditionals +for (X, Y) in constraint_permutations(2) + @eval Base.ifelse(cond::AutoBroadcasterOrSimilar, x::$X, y::$Y) = + nested_broadcast(ifelse, cond, x, y) +end + +# Applying AutoBroadcasters like functions +(f::AutoBroadcaster)(args...) = + nested_broadcast((f, args...) -> f(args...), f, args...) diff --git a/src/recursive_apply.jl b/src/recursive_apply.jl new file mode 100644 index 0000000000..eda6fc75a8 --- /dev/null +++ b/src/recursive_apply.jl @@ -0,0 +1,27 @@ +# This module is for backwards compatibility with previous versions of ClimaCore +module RecursiveApply + +using ..Utilities: add_auto_broadcasters, drop_auto_broadcasters + +struct WithAutoBroadcasters{F} + f::F +end + +# Call f with all arguments wrapped in AutoBroadcasters, then unwrap the result +((; f)::WithAutoBroadcasters)(x) = + drop_auto_broadcasters(f(add_auto_broadcasters(x))) +((; f)::WithAutoBroadcasters)(x, y) = + drop_auto_broadcasters(f(add_auto_broadcasters(x), add_auto_broadcasters(y))) + +for (f, rf) in ((:+, :radd), (:-, :rsub), (:*, :rmul), (:/, :rdiv)) + @eval const $rf = WithAutoBroadcasters($f) +end +for f in (:zero, :min, :max, :promote_type) + @eval const $(Symbol(:r, f)) = WithAutoBroadcasters($f) +end + +const ⊞ = radd +const ⊟ = rsub +const ⊠ = rmul + +end diff --git a/test/Geometry/axistensors.jl b/test/Geometry/axistensors.jl index 85898b9e33..ffa50de776 100644 --- a/test/Geometry/axistensors.jl +++ b/test/Geometry/axistensors.jl @@ -43,6 +43,7 @@ import ClimaCore @test -x + x * 2 - x / 2 == -x + 2 * x - 2 \ x == x / 2 @test -x' + x' * 2 - x' / 2 == -x' + 2 * x' - 2 \ x' == (x / 2)' + @test x * 3 == x ⊗ 3 == Geometry.Covariant12Vector(3.0, 6.0) @test x * y' == x ⊗ y == Geometry.AxisTensor( @@ -53,16 +54,6 @@ import ClimaCore @test Geometry.components(M * inv(M)) == @SMatrix [1.0 0.0; 0.0 1.0] @test Geometry.components(inv(M) * M) == @SMatrix [1.0 0.0; 0.0 1.0] - @test x ⊗ 3 == Geometry.Covariant12Vector(3.0, 6.0) - @test x ⊗ (1, (a = 2, b = 3)) == ( - Geometry.Covariant12Vector(1.0, 2.0), - ( - a = Geometry.Covariant12Vector(2.0, 4.0), - b = Geometry.Covariant12Vector(3.0, 6.0), - ), - ) - - @test Geometry.components(M * inv(M)) == @SMatrix [1.0 0.0; 0.0 1.0] @test Geometry.components(inv(M) * M) == @SMatrix [1.0 0.0; 0.0 1.0] diff --git a/test/Geometry/rmul_with_projection.jl b/test/Geometry/mul_with_projection.jl similarity index 58% rename from test/Geometry/rmul_with_projection.jl rename to test/Geometry/mul_with_projection.jl index c996e19d7e..29e91114d8 100644 --- a/test/Geometry/rmul_with_projection.jl +++ b/test/Geometry/mul_with_projection.jl @@ -1,6 +1,6 @@ #= julia --project=.buildkite -using Revise; include(joinpath("test", "Geometry", "rmul_with_projection.jl")) +using Revise; include(joinpath("test", "Geometry", "mul_with_projection.jl")) =# using Test using JET @@ -8,15 +8,16 @@ import Random using StaticArrays: @SMatrix import ClimaCore.Geometry -import ClimaCore.Geometry: rmul_with_projection, rmul_return_type +import ClimaCore.Geometry: mul_with_projection, mul_return_type +import ClimaCore.Utilities: add_auto_broadcasters nested_type(value) = nested_type(value, value, value) nested_type(value1, value2, value3) = (; a = (), b = value1, c = (value2, (; d = (value3,)), (;))) -function test_rmul_with_projection(x::X, y::Y, lg, expected_result) where {X, Y} - result = rmul_with_projection(x, y, lg) - result_type = rmul_return_type(X, Y) +function test_mul_with_projection(x::X, y::Y, lg, expected_result) where {X, Y} + result = mul_with_projection(x, y, lg) + result_type = mul_return_type(X, Y) # Compute the maximum error as an integer multiple of machine epsilon. FT = Geometry.undertype(typeof(lg)) @@ -29,15 +30,15 @@ function test_rmul_with_projection(x::X, y::Y, lg, expected_result) where {X, Y} ) @test max_error <= 1 # correctness - @test (@allocated rmul_with_projection(x, y, lg)) == 0 # allocations - @test_opt rmul_with_projection(x, y, lg) # type instabilities + @test (@allocated mul_with_projection(x, y, lg)) == 0 # allocations + @test_opt mul_with_projection(x, y, lg) # type instabilities @test result_type == typeof(result) # correctness - @test (@allocated rmul_return_type(X, Y)) == 0 # allocations - @test_opt rmul_return_type(X, Y) # type instabilities + @test (@allocated mul_return_type(X, Y)) == 0 # allocations + @test_opt mul_return_type(X, Y) # type instabilities end -@testset "rmul_with_projection Unit Tests" begin +@testset "mul_with_projection Unit Tests" begin Random.seed!(1) # ensures reproducibility FT = Float64 @@ -60,65 +61,65 @@ end projected_tensor = Geometry.project(dual_axis, tensor, lg) # Test all valid combinations of single values. - test_rmul_with_projection(number, number, lg, number * number) - test_rmul_with_projection(number, vector, lg, number * vector) - test_rmul_with_projection(number, tensor, lg, number * tensor) - test_rmul_with_projection(number, covector, lg, number * covector) - test_rmul_with_projection(number, cotensor, lg, number * cotensor) - test_rmul_with_projection(vector, number, lg, vector * number) - test_rmul_with_projection(vector, covector, lg, vector * covector) - test_rmul_with_projection(tensor, number, lg, tensor * number) - test_rmul_with_projection(tensor, vector, lg, tensor * projected_vector) - test_rmul_with_projection(tensor, tensor, lg, tensor * projected_tensor) - test_rmul_with_projection(tensor, cotensor, lg, tensor * cotensor) - test_rmul_with_projection(covector, number, lg, covector * number) - test_rmul_with_projection(covector, vector, lg, covector * projected_vector) - test_rmul_with_projection(covector, tensor, lg, covector * projected_tensor) - test_rmul_with_projection(covector, cotensor, lg, covector * cotensor) - test_rmul_with_projection(cotensor, number, lg, cotensor * number) - test_rmul_with_projection(cotensor, vector, lg, cotensor * projected_vector) - test_rmul_with_projection(cotensor, tensor, lg, cotensor * projected_tensor) - test_rmul_with_projection(cotensor, cotensor, lg, cotensor * cotensor) + test_mul_with_projection(number, number, lg, number * number) + test_mul_with_projection(number, vector, lg, number * vector) + test_mul_with_projection(number, tensor, lg, number * tensor) + test_mul_with_projection(number, covector, lg, number * covector) + test_mul_with_projection(number, cotensor, lg, number * cotensor) + test_mul_with_projection(vector, number, lg, vector * number) + test_mul_with_projection(vector, covector, lg, vector * covector) + test_mul_with_projection(tensor, number, lg, tensor * number) + test_mul_with_projection(tensor, vector, lg, tensor * projected_vector) + test_mul_with_projection(tensor, tensor, lg, tensor * projected_tensor) + test_mul_with_projection(tensor, cotensor, lg, tensor * cotensor) + test_mul_with_projection(covector, number, lg, covector * number) + test_mul_with_projection(covector, vector, lg, covector * projected_vector) + test_mul_with_projection(covector, tensor, lg, covector * projected_tensor) + test_mul_with_projection(covector, cotensor, lg, covector * cotensor) + test_mul_with_projection(cotensor, number, lg, cotensor * number) + test_mul_with_projection(cotensor, vector, lg, cotensor * projected_vector) + test_mul_with_projection(cotensor, tensor, lg, cotensor * projected_tensor) + test_mul_with_projection(cotensor, cotensor, lg, cotensor * cotensor) # Test some combinations of complicated nested values. - T = nested_type - test_rmul_with_projection( + T = add_auto_broadcasters ∘ nested_type + test_mul_with_projection( number, T(covector, vector, tensor), lg, T(number * covector, number * vector, number * tensor), ) - test_rmul_with_projection( + test_mul_with_projection( T(covector, vector, tensor), number, lg, T(covector * number, vector * number, tensor * number), ) - test_rmul_with_projection( + test_mul_with_projection( vector, T(number, number, number), lg, T(vector * number, vector * number, vector * number), ) - test_rmul_with_projection( + test_mul_with_projection( T(number, number, number), covector, lg, T(number * covector, number * covector, number * covector), ) - test_rmul_with_projection( + test_mul_with_projection( T(number, vector, number), T(covector, number, tensor), lg, T(number * covector, vector * number, number * tensor), ) - test_rmul_with_projection( + test_mul_with_projection( T(covector, number, tensor), T(number, vector, number), lg, T(covector * number, number * vector, tensor * number), ) - test_rmul_with_projection( + test_mul_with_projection( covector, T(vector, number, tensor), lg, @@ -128,7 +129,7 @@ end covector * projected_tensor, ), ) - test_rmul_with_projection( + test_mul_with_projection( T(covector, number, covector), vector, lg, @@ -138,7 +139,7 @@ end covector * projected_vector, ), ) - test_rmul_with_projection( + test_mul_with_projection( T(covector, number, covector), T(number, vector, tensor), lg, diff --git a/test/Limiters/distributed/dlimiter.jl b/test/Limiters/distributed/dlimiter.jl index 8eca406f45..9e1f733d99 100644 --- a/test/Limiters/distributed/dlimiter.jl +++ b/test/Limiters/distributed/dlimiter.jl @@ -8,7 +8,6 @@ using ClimaCore: Spaces, Limiters, Quadratures -using ClimaCore.RecursiveApply using ClimaCore: slab using Test @@ -67,12 +66,12 @@ q = map( coord -> (x = 1.2 * coord.x, y = 1.5 * coord.y), Fields.coordinate_field(hv_center_space), ) -ρq = ρ .⊠ q +ρq = ρ .* q q_ref = map( coord -> (x = coord.x, y = coord.y), Fields.coordinate_field(hv_center_space), ) -ρq_ref = ρ .⊠ q_ref +ρq_ref = ρ .* q_ref total_ρq = sum(ρq) @@ -80,7 +79,7 @@ limiter = Limiters.QuasiMonotoneLimiter(ρq) Limiters.compute_bounds!(limiter, ρq_ref, ρ) Limiters.apply_limiter!(ρq, ρ, limiter) -q = RecursiveApply.rdiv.(ρq, ρ) +q = ρq ./ ρ @test sum(ρq.x) ≈ total_ρq.x @test sum(ρq.y) ≈ total_ρq.y diff --git a/test/Limiters/limiter.jl b/test/Limiters/limiter.jl index 7e03d28c07..a0b783ca4c 100644 --- a/test/Limiters/limiter.jl +++ b/test/Limiters/limiter.jl @@ -14,7 +14,6 @@ using ClimaCore: Spaces, Limiters, Quadratures -using ClimaCore.RecursiveApply import ClimaCore.DataLayouts: slab_index using ClimaCore: slab using Test @@ -167,7 +166,7 @@ end FT[i + f for i in 1:5, j in 1:5, f in 1:2], ) ρ = DataLayouts.IJF{FT, 5}(FT[j / 2 for i in 1:5, j in 1:5, f in 1:1]) - ρq = ρ .⊠ q + ρq = ρ .* q WJ = DataLayouts.IJF{FT, 5}(ones(FT, 5, 5, 1)) q_min = (FT(3.2), FT(3.0)) q_max = (FT(5.2), FT(5.0)) @@ -179,7 +178,7 @@ end ρq_new = deepcopy(ρq) Limiters.apply_limit_slab!(ρq_new, ρ, WJ, q_bounds, eps(FT)) - q_new = RecursiveApply.rdiv.(ρq_new, ρ) + q_new = ρq_new ./ ρ for j in 1:5, i in 1:5 @test q_min[1] <= q_new[si(i, j)][1] <= q_max[1] @test q_min[2] <= q_new[si(i, j)][2] <= q_max[2] @@ -245,12 +244,12 @@ end q₀(coords, x_scale, y_scale) = (x = x_scale * coords.x, y = y_scale * coords.y) q = @. q₀(coords, x_scale, y_scale) - ρq = ρ .⊠ q + ρq = ρ .* q q_ref = map( coord -> (x = coord.x, y = coord.y), Fields.coordinate_field(space), ) - ρq_ref = ρ .⊠ q_ref + ρq_ref = ρ .* q_ref total_ρq = (; x = sum(ρq.x), y = sum(ρq.y)) @@ -258,7 +257,7 @@ end Limiters.compute_bounds!(limiter, ρq_ref, ρ) Limiters.apply_limiter!(ρq, ρ, limiter) - q = RecursiveApply.rdiv.(ρq, ρ) + q = ρq ./ ρ @test sum(ρq.x) ≈ total_ρq.x @test sum(ρq.y) ≈ total_ρq.y @@ -300,12 +299,12 @@ end q₀(coords, x_scale, y_scale) = (x = x_scale * coords.x, y = y_scale * coords.y) q = @. q₀(coords, x_scale, y_scale) - ρq = ρ .⊠ q + ρq = ρ .* q q_ref = map( coord -> (x = coord.x, y = coord.y), Fields.coordinate_field(hv_center_space), ) - ρq_ref = ρ .⊠ q_ref + ρq_ref = ρ .* q_ref total_ρq = (; x = sum(ρq.x), y = sum(ρq.y)) @@ -313,7 +312,7 @@ end Limiters.compute_bounds!(limiter, ρq_ref, ρ) Limiters.apply_limiter!(ρq, ρ, limiter) - q = RecursiveApply.rdiv.(ρq, ρ) + q = ρq ./ ρ @test sum(ρq.x) ≈ total_ρq.x @test sum(ρq.y) ≈ total_ρq.y diff --git a/test/Limiters/vertical_mass_borrowing_limiter.jl b/test/Limiters/vertical_mass_borrowing_limiter.jl index e6ea17ffd9..30f451bc68 100644 --- a/test/Limiters/vertical_mass_borrowing_limiter.jl +++ b/test/Limiters/vertical_mass_borrowing_limiter.jl @@ -5,7 +5,6 @@ using Revise; include(joinpath("test", "Limiters", "vertical_mass_borrowing_limi using ClimaComms ClimaComms.@import_required_backends using ClimaCore: Fields, Spaces, Limiters -using ClimaCore.RecursiveApply using ClimaCore.Geometry using ClimaCore.Grids using ClimaCore.CommonGrids @@ -63,7 +62,7 @@ end (; z) = coords perturb_field!(q; perturb_radius = perturb_q) perturb_field!(ρ; perturb_radius = perturb_ρ) - ρq_init = ρ .⊠ q + ρq_init = ρ .* q sum_ρq_init = sum(ρq_init) # Test that the minimum is below 0 @@ -74,7 +73,7 @@ end limiter = Limiters.VerticalMassBorrowingLimiter((0.0,)) Limiters.apply_limiter!(q, ρ, limiter) @test 0 ≤ minimum(q) - ρq = ρ .⊠ q + ρq = ρ .* q @test isapprox(sum(ρq), sum_ρq_init; atol = 1e-15) @test isapprox(sum(ρq), sum_ρq_init; rtol = 1e-10) plot_results(ClimaCore.to_cpu(ρq), ClimaCore.to_cpu(ρq_init)) @@ -113,7 +112,7 @@ end perturb_field!(scalar_field; perturb_radius = perturb_q) q.b .= scalar_field perturb_field!(ρ; perturb_radius = perturb_ρ) - ρq_init = ρ .⊠ q + ρq_init = ρ .* q sum_ρq_init = sum(ρq_init) # Test that the minimum is below 0 @@ -124,7 +123,7 @@ end limiter = Limiters.VerticalMassBorrowingLimiter((0.0, 0.0)) Limiters.apply_limiter!(q, ρ, limiter) @test 0 ≤ minimum(parent(q)) - ρq = ρ .⊠ q + ρq = ρ .* q @test isapprox(sum(ρq.a), sum_ρq_init.a; atol = 0.07) @test isapprox(sum(ρq.a), sum_ρq_init.a; rtol = 0.07) @test isapprox(sum(ρq.b), sum_ρq_init.b; atol = 0.07) @@ -161,7 +160,7 @@ end perturb_field!(q; perturb_radius = perturb_q) perturb_field!(ρ; perturb_radius = perturb_ρ) - ρq_init = ρ .⊠ q + ρq_init = ρ .* q sum_ρq_init = sum(ρq_init) # Test that the minimum is below 0 @@ -172,7 +171,7 @@ end limiter = Limiters.VerticalMassBorrowingLimiter((0.0,)) Limiters.apply_limiter!(q, ρ, limiter) @test 0 ≤ minimum(q) - ρq = ρ .⊠ q + ρq = ρ .* q @test isapprox(sum(ρq), sum_ρq_init; atol = 0.1) @test isapprox(sum(ρq), sum_ρq_init; rtol = 0.001) end diff --git a/test/MatrixFields/band_matrix_row.jl b/test/MatrixFields/band_matrix_row.jl index ac5a55bd1c..affe69667b 100644 --- a/test/MatrixFields/band_matrix_row.jl +++ b/test/MatrixFields/band_matrix_row.jl @@ -1,4 +1,5 @@ using LinearAlgebra: I +using ClimaCore.Utilities: add_auto_broadcasters include("matrix_field_test_utils.jl") @@ -29,7 +30,7 @@ include("matrix_field_test_utils.jl") TridiagonalMatrixRow(1, 0, 1) / 2 - I == zero(PentadiagonalMatrixRow{Int}) - NT = nested_type + NT = add_auto_broadcasters ∘ nested_type @test_all QuaddiagonalMatrixRow(NT(0.5), NT(1), NT(1), NT(1 // 2)) + BidiagonalMatrixRow(NT(-0.5), NT(-1 // 2)) == QuaddiagonalMatrixRow(NT(1), NT(1), NT(1), NT(1)) / 2 diff --git a/test/MatrixFields/field_matrix_solvers.jl b/test/MatrixFields/field_matrix_solvers.jl index 4f65861fd5..6b3fd987b9 100644 --- a/test/MatrixFields/field_matrix_solvers.jl +++ b/test/MatrixFields/field_matrix_solvers.jl @@ -7,7 +7,6 @@ import Logging: Debug import LinearAlgebra: I, norm, ldiv!, mul! import ClimaComms import ClimaCore.Utilities: half -import ClimaCore.RecursiveApply: ⊠ import ClimaCore.MatrixFields: @name import ClimaCore: Spaces, MatrixFields, Fields, Domains, Meshes, Topologies, Geometry @@ -379,9 +378,7 @@ end ᶠᶜmat2_u₃_scalar = ᶠᶜmat2 .* (e³,) ᶜᶠmat2_scalar_u₃ = ᶜᶠmat2 .* (e₃',) ᶠᶠmat3_u₃_u₃ = ᶠᶠmat3 .* (e³ * e₃',) - ᶜᶠmat2_ρχ_u₃ = map(Base.Fix1(map, Base.Fix2(⊠, ρχ_unit ⊠ e₃')), ᶜᶠmat2) - # We need to use Fix1 and Fix2 instead of defining anonymous functions in - # order for the result of map to be inferrable. + ᶜᶠmat2_ρχ_u₃ = ᶜᶠmat2 .* (ρχ_unit,) .* (e₃',) b_dry_dycore = Fields.FieldVector(; c = ᶜvec .* (dry_center_gs_unit,), diff --git a/test/MatrixFields/field_names.jl b/test/MatrixFields/field_names.jl index 3d0849ec5c..401680f589 100644 --- a/test/MatrixFields/field_names.jl +++ b/test/MatrixFields/field_names.jl @@ -1,6 +1,4 @@ -import LinearAlgebra: I -import ClimaCore.RecursiveApply: rzero -import ClimaCore.Utilities: replace_type_parameter +import ClimaCore.Utilities: replace_type_parameter, new import ClimaCore.MatrixFields: @name, is_subset_that_covers_set include("matrix_field_test_utils.jl") @@ -11,9 +9,6 @@ end Base.propertynames(::FooFieldName) = (:value,) Base.getproperty(foo::FooFieldName, s::Symbol) = s == :value ? getfield(foo, :_value) : error("Invalid property name") -Base.convert(::Type{FooFieldName{T}}, foo::FooFieldName) where {T} = - FooFieldName{T}(foo.value) -Base.zero(::Type{FooFieldName{T}}) where {T} = FooFieldName(zero(T)) get_x() = (; foo = FooFieldName(0), a = (; b = 1, c = ((; d = 2), (;), (3, ())))) @@ -717,7 +712,7 @@ end @testset "FieldNameDict Unit Tests" begin x = get_x() FT = Float64 - x_FT = convert(replace_type_parameter(typeof(x), Int, FT), x) + x_FT = new(replace_type_parameter(typeof(x), Int, FT)) C3 = Geometry.Covariant3Vector{FT} C12 = Geometry.Covariant12Vector{FT} @@ -727,10 +722,10 @@ end CT3XC3 = typeof(zero(CT3) * zero(C3)') C12XCT12 = typeof(zero(C12) * zero(CT12)') CT3XCT12 = typeof(zero(CT3) * zero(CT12)') - x_C12 = rzero(replace_type_parameter(typeof(x), Int, C12)) - x_CT3 = rzero(replace_type_parameter(typeof(x), Int, CT3)) - x_C12XC3 = rzero(replace_type_parameter(typeof(x), Int, C12XC3)) - x_CT3XCT12 = rzero(replace_type_parameter(typeof(x), Int, CT3XCT12)) + x_C12 = new(replace_type_parameter(typeof(x), Int, C12)) + x_CT3 = new(replace_type_parameter(typeof(x), Int, CT3)) + x_C12XC3 = new(replace_type_parameter(typeof(x), Int, C12XC3)) + x_CT3XCT12 = new(replace_type_parameter(typeof(x), Int, CT3XCT12)) I_CT3XC3 = DiagonalMatrixRow(Geometry.AxisTensor(axes(CT3XC3), I)) I_C12XCT12 = DiagonalMatrixRow(Geometry.AxisTensor(axes(C12XCT12), I)) diff --git a/test/MatrixFields/gpu_compat_bidiag_matrix_row.jl b/test/MatrixFields/gpu_compat_bidiag_matrix_row.jl index 3ed6a44319..f6eb090198 100644 --- a/test/MatrixFields/gpu_compat_bidiag_matrix_row.jl +++ b/test/MatrixFields/gpu_compat_bidiag_matrix_row.jl @@ -70,8 +70,6 @@ const ᶜright_bias_matrix = MatrixFields.operator_matrix(ᶜright_bias) one_C3xACT3(::Type{_FT}) where {_FT} = C3(_FT(1)) * CT3(_FT(1))' get_I_u₃(::Type{_FT}) where {_FT} = DiagonalMatrixRow(one_C3xACT3(_FT)) -conv(::Type{_FT}, ᶜbias_matrix) where {_FT} = - convert(BidiagonalMatrixRow{_FT}, ᶜbias_matrix) function foo(c, f) (; ᶠtridiagonal_matrix_c3, ᶠu₃, ∂ᶠu₃ʲ_err_∂ᶠu₃ʲ, adj_u₃) = f (; ᶜu₃ʲ, bdmr_l, bdmr_r, bdmr) = c @@ -79,6 +77,7 @@ function foo(c, f) FT = Spaces.undertype(space) I_u₃ = get_I_u₃(FT) dtγ = FT(1) + to_bidiagonal_row = Base.Fix1(convert, BidiagonalMatrixRow{FT}) @. ∂ᶠu₃ʲ_err_∂ᶠu₃ʲ = dtγ * ᶠtridiagonal_matrix_c3 * DiagonalMatrixRow(adjoint(CT3(ᶠu₃))) - @@ -90,14 +89,14 @@ function foo(c, f) @. ᶠtridiagonal_matrix_c3 = -(ᶠgradᵥ_matrix()) * ifelse( ᶜu₃ʲ.components.data.:1 > 0, - convert(BidiagonalMatrixRow{FT}, ᶜleft_bias_matrix()), - convert(BidiagonalMatrixRow{FT}, ᶜright_bias_matrix()), + to_bidiagonal_row(ᶜleft_bias_matrix()), + to_bidiagonal_row(ᶜright_bias_matrix()), ) # However, this can be decomposed into simpler broadcast # expressions that will run on gpus: - @. bdmr_l = convert(BidiagonalMatrixRow{FT}, ᶜleft_bias_matrix()) - @. bdmr_r = convert(BidiagonalMatrixRow{FT}, ᶜright_bias_matrix()) + @. bdmr_l = to_bidiagonal_row(ᶜleft_bias_matrix()) + @. bdmr_r = to_bidiagonal_row(ᶜright_bias_matrix()) @. bdmr = ifelse(ᶜu₃ʲ.components.data.:1 > 0, bdmr_l, bdmr_r) @. ᶠtridiagonal_matrix_c3 = -(ᶠgradᵥ_matrix()) * bdmr diff --git a/test/MatrixFields/matrix_field_test_utils.jl b/test/MatrixFields/matrix_field_test_utils.jl index eab74039f7..364fc3cf39 100644 --- a/test/MatrixFields/matrix_field_test_utils.jl +++ b/test/MatrixFields/matrix_field_test_utils.jl @@ -23,7 +23,6 @@ import ClimaCore: Quadratures using ClimaCore.MatrixFields import ClimaCore.Utilities: half -import ClimaCore.RecursiveApply: ⊠ import LinearAlgebra: I, norm, ldiv!, mul! import ClimaCore.MatrixFields: @name @@ -182,7 +181,7 @@ function dycore_prognostic_EDMF_FieldMatrix( ᶠᶜmat2_u₃_scalar = ᶠᶜmat2 .* (e³,) ᶜᶠmat2_scalar_u₃ = ᶜᶠmat2 .* (e₃',) ᶠᶠmat3_u₃_u₃ = ᶠᶠmat3 .* (e³ * e₃',) - ᶜᶠmat2_ρχ_u₃ = map(Base.Fix1(map, Base.Fix2(⊠, ρχ_unit ⊠ e₃')), ᶜᶠmat2) + ᶜᶠmat2_ρχ_u₃ = ᶜᶠmat2 .* (ρχ_unit,) .* (e₃',) ᶜᶜmat3_uₕ_scalar = ᶜᶜmat3 .* (e¹²,) ᶜᶜmat3_uₕ_uₕ = ᶜᶜmat3 .* ( @@ -192,9 +191,9 @@ function dycore_prognostic_EDMF_FieldMatrix( Geometry.Contravariant12Vector(0, 1)', ) ᶜᶠmat2_uₕ_u₃ = ᶜᶠmat2 .* (e¹² * e₃',) - ᶜᶜmat3_ρχ_scalar = map(Base.Fix1(map, Base.Fix2(⊠, ρχ_unit)), ᶜᶜmat3) - ᶜᶜmat3_ρaχ_scalar = map(Base.Fix1(map, Base.Fix2(⊠, ρaχ_unit)), ᶜᶜmat3) - ᶜᶠmat2_ρaχ_u₃ = map(Base.Fix1(map, Base.Fix2(⊠, ρaχ_unit ⊠ e₃')), ᶜᶠmat2) + ᶜᶜmat3_ρχ_scalar = ᶜᶜmat3 .* (ρχ_unit,) + ᶜᶜmat3_ρaχ_scalar = ᶜᶜmat3 .* (ρaχ_unit,) + ᶜᶠmat2_ρaχ_u₃ = ᶜᶠmat2 .* (ρaχ_unit,) .* (e₃',) dry_center_gs_unit = (; ρ = 1, ρe_tot = 1, uₕ = e¹²) center_gs_unit = (; dry_center_gs_unit..., ρatke = 1, ρχ = ρχ_unit) @@ -389,7 +388,7 @@ function random_field(::Type{T}, space) where {T} return field end -# Construct a highly nested type for testing integration with RecursiveApply. +# Construct a nested iterator for testing compatibility with generic data types. nested_type(value) = nested_type(value, value, value) nested_type(value1, value2, value3) = (; a = (), b = value1, c = (value2, (; d = (value3,)), (;))) diff --git a/test/MatrixFields/operator_matrices.jl b/test/MatrixFields/operator_matrices.jl index 725742bb90..e06e811e2d 100644 --- a/test/MatrixFields/operator_matrices.jl +++ b/test/MatrixFields/operator_matrices.jl @@ -36,8 +36,7 @@ import ClimaCore.Operators: GradientF2C, DivergenceC2F, DivergenceF2C, - CurlC2F, - return_eltype + CurlC2F include("matrix_field_test_utils.jl") @@ -87,7 +86,9 @@ function test_op_matrix( # This boundary condition doesn't matter, since it's applied after the # operator. It is zeroed out for simplicity, but it does not need to be. boundary_op = if requires_boundary_values - boundary_op_bc = SetValue(rzero(return_eltype(op, args...))) + boundary_op_bc = SetValue( + rzero(eltype(Base.Broadcast.broadcasted(op, args...))), + ) SetBoundaryOperator(; bottom = boundary_op_bc, top = boundary_op_bc) else nothing diff --git a/test/Operators/finitedifference/tensor.jl b/test/Operators/finitedifference/tensor.jl index afac9b459c..c244abda9b 100644 --- a/test/Operators/finitedifference/tensor.jl +++ b/test/Operators/finitedifference/tensor.jl @@ -2,7 +2,6 @@ using Test using ClimaComms using ClimaCore: - Geometry, Domains, Meshes, Topologies, @@ -10,6 +9,7 @@ using ClimaCore: Fields, Operators, Quadratures +using ClimaCore.Geometry using LinearAlgebra for FT in (Float32, Float64) @@ -40,20 +40,17 @@ for FT in (Float32, Float64) ) end - ∇ᵥuvw_boundary = Geometry.outer( - Geometry.WVector(FT(1)), - Geometry.UVWVector(FT(1), FT(2), FT(3)), - ) + ∇ᵥuvw_boundary = + Geometry.WVector(FT(1)) ⊗ Geometry.UVWVector(FT(1), FT(2), FT(3)) gradc2f = Operators.GradientC2F( bottom = Operators.SetGradient(∇ᵥuvw_boundary), top = Operators.SetGradient(∇ᵥuvw_boundary), ) ∇ᵥuvw = Geometry.project.(Ref(Geometry.UVWAxis()), gradc2f.(uvw)) - ∇ᵥuvw_scalar = Geometry.outer( - Geometry.UVWVector(FT(0), FT(0), FT(1)), - Geometry.UVWVector(FT(1), FT(2), FT(3)), - ) + ∇ᵥuvw_scalar = + Geometry.UVWVector(FT(0), FT(0), FT(1)) ⊗ + Geometry.UVWVector(FT(1), FT(2), FT(3)) ∇ᵥuvw_ref = fill(∇ᵥuvw_scalar, fspace) @test ∇ᵥuvw ≈ ∇ᵥuvw_ref diff --git a/test/Operators/finitedifference/unit_column.jl b/test/Operators/finitedifference/unit_column.jl index 4a8ce8cfd7..deb6bf0fb7 100644 --- a/test/Operators/finitedifference/unit_column.jl +++ b/test/Operators/finitedifference/unit_column.jl @@ -203,17 +203,6 @@ end # test that broadcasting into incorrect field space throws an error empty_faces = zeros(FT, face_space) @test_throws Exception empty_faces .= ∂.(w .* I.(θ)) - - # 5) we set boundaries on neither - I = Operators.InterpolateC2F() - ∂ = Operators.GradientF2C() - - # TODO: should we throw something else? - if are_boundschecks_forced && !(device isa ClimaComms.CUDADevice) - @test_throws BoundsError ∂.(w .* I.(θ)) - else - @warn "Bounds check on BoundsError ∂.(w .* I.(θ)) not verified." - end end end diff --git a/test/Operators/finitedifference/unit_fd_ops_shared_memory.jl b/test/Operators/finitedifference/unit_fd_ops_shared_memory.jl index f46e5c84f4..79eba774fb 100644 --- a/test/Operators/finitedifference/unit_fd_ops_shared_memory.jl +++ b/test/Operators/finitedifference/unit_fd_ops_shared_memory.jl @@ -89,7 +89,7 @@ end top = Operators.Extrapolate(), ) bc = @. lazy(div(Geometry.WVector(ᶠwinterp(ϕ, ρ)))) - test_center_windows(bc) + test_face_windows(bc) # highly nested cases ᶜinterp = Operators.InterpolateF2C() ᶠinterp = Operators.InterpolateC2F( diff --git a/test/Operators/integrals.jl b/test/Operators/integrals.jl index 59f3d91669..4c113c3e42 100644 --- a/test/Operators/integrals.jl +++ b/test/Operators/integrals.jl @@ -35,9 +35,7 @@ function test_column_integral_definite!(center_space) ᶜz = Fields.coordinate_field(center_space).z ᶠz = Fields.coordinate_field(face_space).z z_top = Fields.level(ᶠz, Operators.right_idx(face_space)) - ᶜu = Base.Broadcast.broadcasted(ᶜz) do z - (; one = one(z), powers = (z, z^2, z^3)) - end + ᶜu = map(z -> (; one = one(z), powers = (z, z^2, z^3)), ᶜz) ∫u_ref = map(z -> (; one = z, powers = (z^2 / 2, z^3 / 3, z^4 / 4)), z_top) ∫u_test = similar(∫u_ref) @@ -57,9 +55,7 @@ function test_column_integral_indefinite!(center_space) face_space = center_to_face_space(center_space) ᶜz = Fields.coordinate_field(center_space).z ᶠz = Fields.coordinate_field(face_space).z - ᶜu = Base.Broadcast.broadcasted(ᶜz) do z - (; one = one(z), powers = (z, z^2, z^3)) - end + ᶜu = map(z -> (; one = one(z), powers = (z, z^2, z^3)), ᶜz) ᶠ∫u_ref = map(z -> (; one = z, powers = (z^2 / 2, z^3 / 3, z^4 / 4)), ᶠz) ᶠ∫u_test = similar(ᶠ∫u_ref) diff --git a/test/Operators/spectralelement/opt.jl b/test/Operators/spectralelement/opt.jl index c2b93468f5..1074ce57f2 100644 --- a/test/Operators/spectralelement/opt.jl +++ b/test/Operators/spectralelement/opt.jl @@ -61,14 +61,8 @@ function opt_WeakDivergence(field) return wdiv.(field) end -function opt_ScalarDSS(field) - Spaces.weighted_dss!(@. opt_Gradient(field)) - return grad -end - -function opt_VectorDss_Curl(field) - return Spaces.weighted_dss!(@. opt_Curl(field)) -end +opt_ScalarDSS(field) = Spaces.weighted_dss!(opt_Gradient(field)) +opt_VectorDss_Curl(field) = Spaces.weighted_dss!(opt_Curl(field)) function opt_VectorDss_DivGrad(field) sdiv = Operators.Divergence() diff --git a/test/RecursiveApply/unit_recursive_apply.jl b/test/RecursiveApply/unit_recursive_apply.jl deleted file mode 100644 index 1ccc7cf49f..0000000000 --- a/test/RecursiveApply/unit_recursive_apply.jl +++ /dev/null @@ -1,100 +0,0 @@ -using JET -using Test - -using ClimaCore.RecursiveApply -using ClimaCore.Geometry - -@static if @isdefined(var"@test_opt") # v1.7 and higher - @testset "RecursiveApply optimization test" begin - for x in [ - 1.0, - 1.0f0, - (1.0, 2.0), - (1.0f0, 2.0f0), - (a = 1.0, b = (x1 = 2.0, x2 = 3.0)), - (a = 1.0f0, b = (x1 = 2.0f0, x2 = 3.0f0)), - ] - @test_opt 2 ⊠ x - @test_opt x ⊞ x - @test_opt RecursiveApply.rdiv(x, 3) - end - end -end - -@testset "RecursiveApply nary ops" begin - for x in [ - 1.0, - 1.0f0, - (1.0, 2.0), - (1.0f0, 2.0f0), - (a = 1.0, b = (x1 = 2.0, x2 = 3.0)), - (a = 1.0f0, b = (x1 = 2.0f0, x2 = 3.0f0)), - ] - FT = eltype(x[1]) - @test RecursiveApply.rmul(x, one(FT), one(FT), one(FT)) == x - @test RecursiveApply.rmul(x, one(FT), x, one(FT)) == - RecursiveApply.rmul(x, x) - @test RecursiveApply.radd(x, zero(FT), zero(FT), zero(FT)) == x - @test RecursiveApply.radd(x, zero(FT), x, zero(FT)) == - RecursiveApply.rmul(x, FT(2)) - end -end - -@testset "Highly nested types" begin - FT = Float64 - nested_types = [ - FT, - Tuple{FT, FT}, - NamedTuple{(:ϕ, :ψ), Tuple{FT, FT}}, - Tuple{ - NamedTuple{(:ϕ, :ψ), Tuple{FT, FT}}, - NamedTuple{(:ϕ, :ψ), Tuple{FT, FT}}, - }, - Tuple{FT, FT}, - NamedTuple{ - (:ρ, :uₕ, :ρe_tot, :ρq_tot, :sgs⁰, :sgsʲs), - Tuple{ - FT, - Tuple{FT, FT}, - FT, - FT, - NamedTuple{(:ρatke,), Tuple{FT}}, - Tuple{NamedTuple{(:ρa, :ρae_tot, :ρaq_tot), Tuple{FT, FT, FT}}}, - }, - }, - NamedTuple{ - (:u₃, :sgsʲs), - Tuple{Tuple{FT}, Tuple{NamedTuple{(:u₃,), Tuple{Tuple{FT}}}}}, - }, - ] - for nt in nested_types - rz = RecursiveApply.rmap(RecursiveApply.rzero, nt) - @test typeof(rz) == nt - @inferred RecursiveApply.rmap(RecursiveApply.rzero, nt) - - rz = RecursiveApply.rmap((x, y) -> RecursiveApply.rzero(x), nt, nt) - @test typeof(rz) == nt - @inferred RecursiveApply.rmap((x, y) -> RecursiveApply.rzero(x), nt, nt) - - rz = RecursiveApply.rmaptype(identity, nt) - @test rz == nt - @inferred RecursiveApply.rmaptype(zero, nt) - - rz = RecursiveApply.rmaptype((x, y) -> identity(x), nt, nt) - @test rz == nt - @inferred RecursiveApply.rmaptype((x, y) -> zero(x), nt, nt) - end -end - -@testset "NamedTuples and axis tensors" begin - FT = Float64 - nt = (; a = FT(1), b = FT(2)) - uv = Geometry.UVVector(FT(1), FT(2)) - rz = RecursiveApply.rmap(*, nt, uv) - @test typeof(rz) == NamedTuple{(:a, :b), Tuple{UVVector{FT}, UVVector{FT}}} - @inferred RecursiveApply.rmap(*, nt, uv) - @test rz.a.u == 1 - @test rz.a.v == 2 - @test rz.b.u == 2 - @test rz.b.v == 4 -end diff --git a/test/Spaces/ddss1_cs.jl b/test/Spaces/ddss1_cs.jl index 3fe1fa29c3..465408f754 100644 --- a/test/Spaces/ddss1_cs.jl +++ b/test/Spaces/ddss1_cs.jl @@ -78,27 +78,6 @@ end Spaces.weighted_dss!(x) @test Array(parent(x)) ≈ ones(size(parent(x))) # TODO: improve the quality of this test - - wrong_field = map(Fields.coordinate_field(space)) do cf - (; a = Float64(0)) - end - wrong_buffer = Spaces.create_dss_buffer(wrong_field) - @test_throws ErrorException("Incorrect buffer eltype") Spaces.weighted_dss!( - x, - wrong_buffer, - ) - @test_throws ErrorException("Incorrect buffer eltype") Spaces.weighted_dss_start!( - x, - wrong_buffer, - ) - @test_throws ErrorException("Incorrect buffer eltype") Spaces.weighted_dss_internal!( - x, - wrong_buffer, - ) - @test_throws ErrorException("Incorrect buffer eltype") Spaces.weighted_dss_ghost!( - x, - wrong_buffer, - ) end @testset "DSS of Covariant12Vector & Covariant123Vector on extruded Cubed Sphere mesh (ne = 3, serial run)" begin diff --git a/test/Utilities/unit_auto_broadcaster.jl b/test/Utilities/unit_auto_broadcaster.jl new file mode 100644 index 0000000000..4c1ac55efd --- /dev/null +++ b/test/Utilities/unit_auto_broadcaster.jl @@ -0,0 +1,54 @@ +using Test + +using ClimaCore.Utilities: add_auto_broadcasters, nested_broadcast +using ClimaCore.Geometry: UVVector + +@testset "Simple AutoBroadcasters" begin + for itr in (1, (1, 2), (a = 1, b = (c = 2, d = 3))) + x = @inferred add_auto_broadcasters(itr) + 0 + 0 + 0 + y = @inferred add_auto_broadcasters(itr) + 0 + itr + 0 + @test x + itr === y + + x = @inferred add_auto_broadcasters(itr) * 1 * 1 * 1 + y = @inferred add_auto_broadcasters(itr) * 1 * itr * 1 + @test x * itr === y + end +end + +@testset "AutoBroadcasters of AxisTensors" begin + x = @inferred add_auto_broadcasters((; a = 1, b = 2)) * UVVector(1, 2) + y = @inferred add_auto_broadcasters((; a = UVVector(1, 2), b = UVVector(2, 4))) + @test x === y +end + +@testset "Highly nested AutoBroadcasters" begin + FT = Float64 + for T in ( + typeof(∘(ntuple(Returns(tup -> (tup,)), 20)...)(zero(FT))), + typeof(∘(ntuple(Returns(tup -> (tup, tup)), 10)...)(zero(FT))), + typeof(∘(ntuple(Returns(tup -> (tup, tup, tup)), 5)...)(zero(FT))), + NamedTuple{ + (:ρ, :uₕ, :ρe_tot, :ρq_tot, :sgs⁰, :sgsʲs), + Tuple{ + FT, + Tuple{FT, FT}, + FT, + FT, + NamedTuple{(:ρatke,), Tuple{FT}}, + Tuple{NamedTuple{(:ρa, :ρae_tot, :ρaq_tot), Tuple{FT, FT, FT}}}, + }, + }, # similar to the prognostic state used in ClimaAtmos.jl + ) + X = @inferred add_auto_broadcasters(T) + @test zero(X) isa X + for x in ( + (@inferred zero(X)), + (@inferred FT(Integer(zero(X)))), + (@inferred min(Integer(zero(X)), cos(zero(X)), abs(eps(X)))), + (@inferred nested_broadcast(Returns(-), zero(X))(Int(one(X)), one(FT))), + (@inferred nested_broadcast(Returns(zero(FT)), ntuple(Returns(one(X)), 40)...)), + ) + @test x === zero(X) + end + end +end diff --git a/test/aqua.jl b/test/aqua.jl index b748e8f463..351e801f59 100644 --- a/test/aqua.jl +++ b/test/aqua.jl @@ -20,7 +20,7 @@ using Aqua # then please lower the limit based on the new number of ambiguities. # We're trying to drive this number down to zero to reduce latency. # Uncomment for debugging: - n_existing_ambiguities = 25 + n_existing_ambiguities = 23 if !(length(ambs) ≤ n_existing_ambiguities) for method_ambiguity in ambs @show method_ambiguity diff --git a/test/runtests.jl b/test/runtests.jl index d993837649..273ae3bda8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -15,8 +15,8 @@ UnitTest("DataLayouts get/set_index_field" ,"DataLayouts/unit_cartesian_ UnitTest("DataLayouts has_uniform_datalayouts" ,"DataLayouts/unit_has_uniform_datalayouts.jl"), UnitTest("DataLayouts non_extruded_broadcast" ,"DataLayouts/unit_non_extruded_broadcast.jl"), UnitTest("DataLayouts linear indexing" ,"DataLayouts/unit_linear_indexing.jl"), -UnitTest("Recursive" ,"RecursiveApply/unit_recursive_apply.jl"), UnitTest("PlusHalf" ,"Utilities/unit_plushalf.jl"), +UnitTest("AutoBroadcaster" ,"Utilities/unit_auto_broadcaster.jl"), UnitTest("DataLayouts 0D" ,"DataLayouts/data0d.jl"), UnitTest("DataLayouts 1D" ,"DataLayouts/data1d.jl"), UnitTest("DataLayouts 2D" ,"DataLayouts/data2d.jl"), @@ -24,7 +24,7 @@ UnitTest("DataLayouts 1dx" ,"DataLayouts/data1dx.jl"), UnitTest("DataLayouts 2dx" ,"DataLayouts/data2dx.jl"), UnitTest("DataLayouts mapreduce" ,"DataLayouts/unit_mapreduce.jl"), UnitTest("Geometry" ,"Geometry/geometry.jl"), -UnitTest("rmul_with_projection" ,"Geometry/rmul_with_projection.jl"), +UnitTest("mul_with_projection" ,"Geometry/mul_with_projection.jl"), UnitTest("AxisTensors" ,"Geometry/axistensors.jl"), UnitTest("Interval mesh" ,"Meshes/interval.jl"), UnitTest("Rectangle mesh" ,"Meshes/rectangle.jl"),