Skip to content

Commit

Permalink
Make benchmarks run on the GPU (#77)
Browse files Browse the repository at this point in the history
* Make WCSPH benchmark run on the GPU

* Add FP32 WCSPH benchmark

* Add GPU benchmark of N-Body as well

* Fix typo
  • Loading branch information
efaulhaber authored Nov 14, 2024
1 parent bc64b76 commit 26c98ba
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 10 deletions.
14 changes: 10 additions & 4 deletions benchmarks/n_body.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,15 @@ This is a more realistic benchmark for particle-based simulations than
However, due to the higher computational cost, differences between neighborhood search
implementations are less pronounced.
"""
function benchmark_n_body(neighborhood_search, coordinates; parallel = true)
mass = 1e10 * (rand(size(coordinates, 2)) .+ 1)
function benchmark_n_body(neighborhood_search, coordinates_; parallel = true)
# Passing a different backend like `CUDA.CUDABackend`
# allows us to change the type of the array to run the benchmark on the GPU.
# Passing `parallel = true` or `parallel = false` will not change anything here.
coordinates = PointNeighbors.Adapt.adapt(parallel, coordinates_)
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)

# This preserves the data type of `coordinates`, which makes it work for GPU types
mass = 1e10 * (rand!(similar(coordinates, size(coordinates, 2))) .+ 1)
G = 6.6743e-11

dv = similar(coordinates)
Expand All @@ -36,6 +43,5 @@ function benchmark_n_body(neighborhood_search, coordinates; parallel = true)
return dv
end

return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G,
$neighborhood_search, $parallel)
return @belapsed $compute_acceleration!($dv, $coordinates, $mass, $G, $nhs, $parallel)
end
76 changes: 70 additions & 6 deletions benchmarks/smoothed_particle_hydrodynamics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,80 @@ function benchmark_wcsph(neighborhood_search, coordinates; parallel = true)
smoothing_length, viscosity = viscosity,
density_diffusion = density_diffusion)

v = vcat(fluid.velocity, fluid.density')
u = copy(fluid.coordinates)
# Note that we cannot just disable parallelism in TrixiParticles.
# But passing a different backend like `CUDA.CUDABackend`
# allows us to change the type of the array to run the benchmark on the GPU.
if parallel isa Bool
system = fluid_system
nhs = neighborhood_search
else
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
end

v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
dv = zero(v)

# Initialize the system
TrixiParticles.initialize!(fluid_system, neighborhood_search)
TrixiParticles.compute_pressure!(fluid_system, v)
TrixiParticles.initialize!(system, nhs)
TrixiParticles.compute_pressure!(system, v)

return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $neighborhood_search,
$fluid_system, $fluid_system)
return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $nhs, $system, $system)
end

"""
benchmark_wcsph_fp32(neighborhood_search, coordinates; parallel = true)
Like [`benchmark_wcsph`](@ref), but using single precision floating point numbers.
"""
function benchmark_wcsph_fp32(neighborhood_search, coordinates_; parallel = true)
coordinates = convert(Matrix{Float32}, coordinates_)
density = 1000.0f0
fluid = InitialCondition(; coordinates, density, mass = 0.1f0)

# Compact support == smoothing length for the Wendland kernel
smoothing_length = convert(Float32, PointNeighbors.search_radius(neighborhood_search))
if ndims(neighborhood_search) == 1
smoothing_kernel = SchoenbergCubicSplineKernel{1}()
else
smoothing_kernel = WendlandC2Kernel{ndims(neighborhood_search)}()
end

sound_speed = 10.0f0
state_equation = StateEquationCole(; sound_speed, reference_density = density,
exponent = 1)

fluid_density_calculator = ContinuityDensity()
viscosity = ArtificialViscosityMonaghan(alpha = 0.02f0, beta = 0.0f0)
density_diffusion = DensityDiffusionMolteniColagrossi(delta = 0.1f0)

fluid_system = WeaklyCompressibleSPHSystem(fluid, fluid_density_calculator,
state_equation, smoothing_kernel,
smoothing_length, viscosity = viscosity,
acceleration = (0.0f0, 0.0f0, 0.0f0),
density_diffusion = density_diffusion)

# Note that we cannot just disable parallelism in TrixiParticles.
# But passing a different backend like `CUDA.CUDABackend`
# allows us to change the type of the array to run the benchmark on the GPU.
if parallel isa Bool
system = fluid_system
nhs = neighborhood_search
else
system = PointNeighbors.Adapt.adapt(parallel, fluid_system)
nhs = PointNeighbors.Adapt.adapt(parallel, neighborhood_search)
end

v = PointNeighbors.Adapt.adapt(parallel, vcat(fluid.velocity, fluid.density'))
u = PointNeighbors.Adapt.adapt(parallel, coordinates)
dv = zero(v)

# Initialize the system
TrixiParticles.initialize!(system, nhs)
TrixiParticles.compute_pressure!(system, v)

return @belapsed TrixiParticles.interact!($dv, $v, $u, $v, $u, $nhs, $system, $system)
end

"""
Expand Down

0 comments on commit 26c98ba

Please sign in to comment.