Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
233283a
Start stress test script
petebachant Mar 17, 2026
7ff3f12
Update bm
petebachant Mar 17, 2026
6a0e703
Add alt script
petebachant Mar 17, 2026
b0f57cd
Update alt script to detect when register spills happen
petebachant Mar 17, 2026
f88b76d
Update alt script to do more ops
petebachant Mar 17, 2026
5090ff5
Get all to run
petebachant Mar 17, 2026
6b5ce5e
Update script
petebachant Mar 17, 2026
bc8cf2a
Get all stress tests working
petebachant Mar 18, 2026
d98a177
Add reporting
petebachant Mar 19, 2026
1bee25d
Fix formatting
petebachant Mar 19, 2026
b6b5d24
Merge branches 'pb/stress-tests' and 'main' of github.com:CliMA/Clima…
petebachant Mar 23, 2026
7f0e499
Use struct for result
petebachant Mar 23, 2026
a62ef82
Remove alt script
petebachant Mar 23, 2026
b69d63f
Remove changes from cuda ext
petebachant Mar 23, 2026
2e2b80f
Revert "Remove changes from cuda ext"
petebachant Mar 23, 2026
d63fd43
Clean up
petebachant Mar 23, 2026
5651dd6
Address some PR comments
petebachant Mar 30, 2026
1a7e409
Add test
petebachant Mar 30, 2026
7ae10ad
Update stress test some more
petebachant Mar 31, 2026
3b6b5c8
Merge in main
petebachant Apr 6, 2026
fdb0002
Merge branch 'main' of github.com:CliMA/ClimaCore.jl into pb/stress-t…
petebachant Apr 6, 2026
84eabc0
Add complexity
petebachant Apr 6, 2026
addada9
Update stress test to be fewer cases
petebachant Apr 7, 2026
21cd850
Improve soft fail criteria
petebachant Apr 7, 2026
54e3df8
Improve soft-fail false positives
petebachant Apr 8, 2026
033cf6a
Document output options
petebachant Apr 8, 2026
0e008bf
rm files
petebachant Apr 8, 2026
094df5d
Add compile mode to stress test
petebachant Apr 20, 2026
21fc91b
Add stress test cases
petebachant Apr 20, 2026
8c6ef87
Merge branch 'main' of github.com:CliMA/ClimaCore.jl into pb/stress-t…
petebachant Apr 20, 2026
020c416
Add lazy broadcast tree exprs
petebachant Apr 20, 2026
5d59acc
Merge branch 'main' of github.com:CliMA/ClimaCore.jl into pb/stress-t…
petebachant Apr 20, 2026
67e578a
Remove failing complexities
petebachant Apr 21, 2026
84f5d73
Add compiler stress tests
petebachant Apr 21, 2026
0dcafde
Revert env var change in cuda_utils.jl
petebachant Apr 21, 2026
ee74a63
Gate on version
petebachant Apr 21, 2026
54ea1b8
Add docstring for generate_field_test_code
petebachant Apr 27, 2026
22bd875
Merge branch 'main' of github.com:CliMA/ClimaCore.jl into pb/stress-t…
petebachant Apr 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 28 additions & 10 deletions ext/cuda/cuda_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,17 @@ const reported_stats = Dict()
const kernel_names = IdDict()
# Call via ClimaCore.DataLayouts.empty_kernel_stats()
empty_kernel_stats(::ClimaComms.CUDADevice) = empty!(reported_stats)
collect_kernel_stats() = false
collect_kernel_stats() = _getenv_bool("CLIMA_COLLECT_CUDA_KERNEL_STATS"; default = false)

function _memory_bytes(memory, key::Symbol)
if hasproperty(memory, key)
return Int(getproperty(memory, key))
elseif memory isa NamedTuple && haskey(memory, key)
return Int(memory[key])
else
return 0
end
end

# Robustly parse boolean-like environment variables
function _getenv_bool(var::AbstractString; default::Bool = false)
Expand Down Expand Up @@ -171,26 +181,34 @@ function auto_launch!(
# CUDA.registers(kernel) > 50 || return nothing # for debugging
# occursin("single_field_solve_kernel", string(nameof(F!))) || return nothing
if !haskey(reported_stats, key)
@assert !isnothing(nitems)
kernel = CUDA.@cuda always_inline = true launch = false f!(args...)
config = CUDA.launch_configuration(kernel.fun)
threads = min(nitems, config.threads)
blocks = cld(nitems, threads)
threads = isnothing(nitems) ? nothing : min(nitems, config.threads)
blocks = isnothing(nitems) ? nothing : cld(nitems, threads)
# For now, let's just collect info, later we can benchmark
#! format: off
s = ""
s *= "Launching kernel $f! with following config:\n"
s *= " nitems: $(nitems)\n"
nitems_str = isnothing(nitems) ? "unknown" : string(nitems)
s *= " nitems: $(nitems_str)\n"
isnothing(threads_s) || (s *= " threads_s: $(threads_s)\n")
isnothing(blocks_s) || (s *= " blocks_s: $(blocks_s)\n")
s *= " threads: $(threads)\n"
s *= " blocks: $(blocks)\n"
isnothing(threads_s) || (s *= " Δthreads: $(threads - prod(threads_s))\n")
isnothing(blocks_s) || (s *= " Δblocks: $(blocks - prod(blocks_s))\n")
isnothing(threads) || (s *= " threads: $(threads)\n")
isnothing(blocks) || (s *= " blocks: $(blocks)\n")
(isnothing(threads_s) || isnothing(threads)) || (s *= " Δthreads: $(threads - prod(threads_s))\n")
(isnothing(blocks_s) || isnothing(blocks)) || (s *= " Δblocks: $(blocks - prod(blocks_s))\n")
s *= " maxthreads: $(CUDA.maxthreads(kernel))\n"
s *= " registers: $(CUDA.registers(kernel))\n"
isnothing(threads_s) || ( s *= " threads_s_frac: $(prod(threads_s)/CUDA.maxthreads(kernel))\n")
s *= " memory: $(CUDA.memory(kernel))\n"
memory = CUDA.memory(kernel)
local_bytes = _memory_bytes(memory, :local)
shared_bytes = _memory_bytes(memory, :shared)
const_bytes = _memory_bytes(memory, :constant)
s *= " memory: $(memory)\n"
s *= " CUDA_PROFILE: kernel=$(something(kernel_name, nameof(F!))) " *
"registers=$(CUDA.registers(kernel)) " *
"local=$(local_bytes) shared=$(shared_bytes) constant=$(const_bytes) " *
"maxthreads=$(CUDA.maxthreads(kernel))\n"
@info s
#! format: on
reported_stats[key] = true
Expand Down
Loading
Loading