diff --git a/.github/workflows/gpu-test.sbatch b/.github/workflows/gpu-test.sbatch new file mode 100644 index 000000000..746247b1e --- /dev/null +++ b/.github/workflows/gpu-test.sbatch @@ -0,0 +1,105 @@ +#!/bin/bash +#! +#! Example SLURM job script for Wilkes3 (AMD EPYC 7763, ConnectX-6, A100) +#! Last updated: Fri 30 Jul 11:07:58 BST 2021 +#! + +#!############################################################# +#!#### Modify the options in this section as appropriate ###### +#!############################################################# + +#! sbatch directives begin here ############################### +#! Name of the job: +#SBATCH -J gpujob +#! Which project should be charged (NB Wilkes2 projects end in '-GPU'): +#SBATCH -A taylor-sl3-gpu +#! How many whole nodes should be allocated? +#SBATCH --nodes=1 +#! How many (MPI) tasks will there be in total? +#! Note probably this should not exceed the total number of GPUs in use. +#SBATCH --ntasks=1 +#! Specify the number of GPUs per node (between 1 and 4; must be 4 if nodes>1). +#! Note that the job submission script will enforce no more than 32 cpus per GPU. +#SBATCH --gres=gpu:1 +#! How much wallclock time will be required? +#SBATCH --time=02:00:00 +#! What types of email messages do you wish to receive? +#SBATCH --mail-type=NONE +#! Uncomment this to prevent the job from being requeued (e.g. if +#! interrupted by node failure or system downtime): +##SBATCH --no-requeue + +#! Do not change: +#SBATCH -p ampere + +#! sbatch directives end here (put any additional directives above this line) + +#! Notes: +#! Charging is determined by GPU number*walltime. + +#! Number of nodes and tasks per node allocated by SLURM (do not change): +numnodes=$SLURM_JOB_NUM_NODES +numtasks=$SLURM_NTASKS +mpi_tasks_per_node=$(echo "$SLURM_TASKS_PER_NODE" | sed -e 's/^\([0-9][0-9]*\).*$/\1/') +#! ############################################################ +#! Modify the settings below to specify the application's environment, location +#! and launch method: + +#! Optionally modify the environment seen by the application +#! (note that SLURM reproduces the environment at submission irrespective of ~/.bashrc): +. /etc/profile.d/modules.sh # Leave this line (enables the module command) +module purge # Removes all modules still loaded +module load rhel8/default-amp # REQUIRED - loads the basic environment + +#! Insert additional module load commands after this line if needed: + +#! Full path to application executable: +#! TODO - workout how to make version get passed to this script +application="/home/js2430/rds/hpc-work/OceanBioME-runner/_work/_temp/julia-1.9.3/bin/julia -O0 --color=yes --project test/gpu_runtests.jl" + +#! Run options for the application: +options="" + +#! Work directory (i.e. where the job will run): +workdir="$SLURM_SUBMIT_DIR" # The value of SLURM_SUBMIT_DIR sets workdir to the directory + # in which sbatch is run. + +#! Are you using OpenMP (NB this is unrelated to OpenMPI)? If so increase this +#! safe value to no more than 128: +export OMP_NUM_THREADS=1 + +#! Number of MPI tasks to be started by the application per node and in total (do not change): +np=$[${numnodes}*${mpi_tasks_per_node}] + +#! Choose this for a pure shared-memory OpenMP parallel program on a single node: +#! (OMP_NUM_THREADS threads will be created): +CMD="$application $options" + + +############################################################### +### You should not have to change anything below this line #### +############################################################### + +cd $workdir +echo -e "Changed directory to `pwd`.\n" + +JOBID=$SLURM_JOB_ID + +echo -e "JobID: $JOBID\n======" +echo "Time: `date`" +echo "Running on master node: `hostname`" +echo "Current directory: `pwd`" + +if [ "$SLURM_JOB_NODELIST" ]; then + #! Create a machine file: + export NODEFILE=`generate_pbs_nodefile` + cat $NODEFILE | uniq > machine.file.$JOBID + echo -e "\nNodes allocated:\n================" + echo `cat machine.file.$JOBID | sed -e 's/\..*$//g'` +fi + +echo -e "\nnumtasks=$numtasks, numnodes=$numnodes, mpi_tasks_per_node=$mpi_tasks_per_node (OMP_NUM_THREADS=$OMP_NUM_THREADS)" + +echo -e "\nExecuting command:\n==================\n$CMD\n" + +eval $CMD \ No newline at end of file diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml new file mode 100644 index 000000000..8869261d6 --- /dev/null +++ b/.github/workflows/gpu-tests.yml @@ -0,0 +1,35 @@ +name: CSD3 tests + +on: + push: + branches: + - main + pull_request: + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + +env: + DATADEPS_ALWAYS_ACCEPT: true + JULIA_VERSION: "1.9.3" + JULIA_MINOR_VERSION: "1.9" + RDS_HOME: "/home/js2430/rds/hpc-work/OceanBioME-runner/_work" + +jobs: + test: + name: Deploy CSD3 tests + runs-on: self-hosted + steps: + - label: "initialize gpu enviroment" + commands: + - "wget -N -P $CSD3_HOME/_temp https://julialang-s3.julialang.org/bin/linux/x64/$JULIA_MINOR_VERSION/julia-$JULIA_VERSION-linux-x86_64.tar.gz" + - "tar xf $CSD3_HOME/_temp/julia-$JULIA_VERSION-linux-x86_64.tar.gz -C $CSD3_HOME/_temp" + + # Run the tests on slurm + - "sbatch .github/workflows/gpu-test.sbatch" + + # Clean up + - "rm -rf $CSD3_HOME/_temp/julia-$JULIA_VERSION" # presumably this happens anyway diff --git a/test/gpu_runtests.jl b/test/gpu_runtests.jl new file mode 100644 index 000000000..18cc64848 --- /dev/null +++ b/test/gpu_runtests.jl @@ -0,0 +1,13 @@ +using Pkg + +Pkg.instantiate() + +Pkg.add("Test", "CUDA", "DataDeps", "Documenter", "Statistics", "JLD2") + +Pkg.precompile() + +using Oceananigans + +arch = GPU() + +include("runtests.jl") \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 8dff45c8d..11ed54869 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,9 @@ using OceanBioME, Documenter, Test +if !(@isdefined arch) + arch = CPU() +end + include("test_utils.jl") include("test_light.jl") include("test_LOBSTER.jl") @@ -8,6 +12,8 @@ include("test_gasexchange.jl") include("test_slatissima.jl") include("test_sediments.jl") -@testset "Doctests" begin - doctest(OceanBioME) -end +if isa(arch, CPU) + @testset "Doctests" begin + doctest(OceanBioME) + end +end \ No newline at end of file diff --git a/test/test_LOBSTER.jl b/test/test_LOBSTER.jl index db25b6d49..25b8e5079 100644 --- a/test/test_LOBSTER.jl +++ b/test/test_LOBSTER.jl @@ -114,14 +114,13 @@ end n_timesteps = 100 -for arch in (CPU(), ) - grid = RectilinearGrid(arch; size=(1, 1, 1), extent=(1, 1, 2)) - for open_bottom = (false, true), sinking = (false, true), variable_redfield = (false, true), oxygen = (false, true), carbonates = (false, true) - if !(sinking && open_bottom) # no sinking is the same with and without open bottom - @info "Testing on $(typeof(arch)) with carbonates $(carbonates ? :✅ : :❌), oxygen $(oxygen ? :✅ : :❌), variable redfield $(variable_redfield ? :✅ : :❌), sinking $(sinking ? :✅ : :❌), open bottom $(open_bottom ? :✅ : :❌))" - @testset "$arch, $carbonates, $oxygen, $variable_redfield, $sinking, $open_bottom" begin - test_LOBSTER(grid, carbonates, oxygen, variable_redfield, sinking, open_bottom, n_timesteps) - end +grid = RectilinearGrid(arch; size=(1, 1, 1), extent=(1, 1, 2)) + +for open_bottom = (false, true), sinking = (false, true), variable_redfield = (false, true), oxygen = (false, true), carbonates = (false, true) + if !(sinking && open_bottom) # no sinking is the same with and without open bottom + @info "Testing on $(typeof(arch)) with carbonates $(carbonates ? :✅ : :❌), oxygen $(oxygen ? :✅ : :❌), variable redfield $(variable_redfield ? :✅ : :❌), sinking $(sinking ? :✅ : :❌), open bottom $(open_bottom ? :✅ : :❌))" + @testset "$arch, $carbonates, $oxygen, $variable_redfield, $sinking, $open_bottom" begin + test_LOBSTER(grid, carbonates, oxygen, variable_redfield, sinking, open_bottom, n_timesteps) end end end \ No newline at end of file diff --git a/test/test_NPZD.jl b/test/test_NPZD.jl index 465d41266..68a90a531 100644 --- a/test/test_NPZD.jl +++ b/test/test_NPZD.jl @@ -47,14 +47,13 @@ function test_NPZD(grid, sinking, open_bottom) return nothing end -for arch in (CPU(), ) - grid = RectilinearGrid(arch; size=(3, 3, 6), extent=(1, 1, 2)) - for sinking = (false, true), open_bottom = (false, true) - if !(sinking && open_bottom) # no sinking is the same with and without open bottom - @info "Testing on $(typeof(arch)) with sinking $(sinking ? :✅ : :❌), open bottom $(open_bottom ? :✅ : :❌))" - @testset "$arch, $sinking, $open_bottom" begin - test_NPZD(grid, sinking, open_bottom) - end +grid = RectilinearGrid(arch; size=(3, 3, 6), extent=(1, 1, 2)) + +for sinking = (false, true), open_bottom = (false, true) + if !(sinking && open_bottom) # no sinking is the same with and without open bottom + @info "Testing on $(typeof(arch)) with sinking $(sinking ? :✅ : :❌), open bottom $(open_bottom ? :✅ : :❌))" + @testset "$arch, $sinking, $open_bottom" begin + test_NPZD(grid, sinking, open_bottom) end end end diff --git a/test/test_light.jl b/test/test_light.jl index fdcf99169..bef511c66 100644 --- a/test/test_light.jl +++ b/test/test_light.jl @@ -42,11 +42,9 @@ function test_two_band(grid, bgc, model_type) return all(results_PAR .≈ reverse(expected_PAR)) end -archs = (CPU(), ) @testset "Light attenuaiton model" begin for model in (NonhydrostaticModel, HydrostaticFreeSurfaceModel), - arch in archs, grid in (RectilinearGrid(arch; size = (2, 2, 2), extent = (2, 2, 2)), LatitudeLongitudeGrid(arch; size = (5, 5, 2), longitude = (-180, 180), latitude = (-85, 85), z = (-2, 0))), bgc in (LOBSTER, NutrientPhytoplanktonZooplanktonDetritus) # this is now redundant since each model doesn't deal with the light separatly diff --git a/test/test_sediments.jl b/test/test_sediments.jl index 73c7f940a..2a3b9cf30 100644 --- a/test/test_sediments.jl +++ b/test/test_sediments.jl @@ -112,31 +112,30 @@ display_name(::ImmersedBoundaryGrid) = "Immersed boundary grid" bottom_height(x, y) = -1000 + 500 * exp(- (x^2 + y^2) / 250) # a perfect hill +grids = [RectilinearGrid(architecture; size=(3, 3, 50), extent=(10, 10, 500)), + LatitudeLongitudeGrid(architecture; size = (3, 3, 16), latitude = (0, 10), longitude = (0, 10), z = (-500, 0)), + ImmersedBoundaryGrid( + LatitudeLongitudeGrid(architecture; size = (3, 3, 16), latitude = (0, 10), longitude = (0, 10), z = (-500, 0)), + ridFittedBottom(bottom_height))] + @testset "Sediment integration" begin - for architecture in (CPU(), ) - grids = [RectilinearGrid(architecture; size=(3, 3, 50), extent=(10, 10, 500)), - LatitudeLongitudeGrid(architecture; size = (3, 3, 16), latitude = (0, 10), longitude = (0, 10), z = (-500, 0)), - ImmersedBoundaryGrid( - LatitudeLongitudeGrid(architecture; size = (3, 3, 16), latitude = (0, 10), longitude = (0, 10), z = (-500, 0)), - GridFittedBottom(bottom_height))] - for grid in grids - for timestepper in (:QuasiAdamsBashforth2, :RungeKutta3), - sediment_model in (InstantRemineralisation(; grid), SimpleMultiG(; grid)), - model in (NonhydrostaticModel, HydrostaticFreeSurfaceModel) - for biogeochemistry in (NutrientPhytoplanktonZooplanktonDetritus(; grid, sediment_model), - LOBSTER(; grid, - carbonates = ifelse(isa(sediment_model, SimpleMultiG), true, false), - oxygen = ifelse(isa(sediment_model, SimpleMultiG), true, false), - variable_redfield = ifelse(isa(sediment_model, SimpleMultiG), true, false), - sediment_model)) - # get rid of incompatible combinations - run = ifelse((model == NonhydrostaticModel && (isa(grid, ImmersedBoundaryGrid) || isa(grid, LatitudeLongitudeGrid))) || - (model == HydrostaticFreeSurfaceModel && timestepper == :RungeKutta3) || - (isa(sediment_model, SimpleMultiG) && isa(biogeochemistry.underlying_biogeochemistry, NutrientPhytoplanktonZooplanktonDetritus)), false, true) - if run - @info "Testing sediment on $(typeof(architecture)) with $timestepper and $(display_name(sediment_model)) on $(display_name(biogeochemistry.underlying_biogeochemistry))" - @testset "$architecture, $timestepper, $(display_name(sediment_model)), $(display_name(biogeochemistry.underlying_biogeochemistry))" test_flat_sediment(grid, biogeochemistry, model; timestepper) - end + for grid in grids + for timestepper in (:QuasiAdamsBashforth2, :RungeKutta3), + sediment_model in (InstantRemineralisation(; grid), SimpleMultiG(; grid)), + model in (NonhydrostaticModel, HydrostaticFreeSurfaceModel) + for biogeochemistry in (NutrientPhytoplanktonZooplanktonDetritus(; grid, sediment_model), + LOBSTER(; grid, + carbonates = ifelse(isa(sediment_model, SimpleMultiG), true, false), + oxygen = ifelse(isa(sediment_model, SimpleMultiG), true, false), + variable_redfield = ifelse(isa(sediment_model, SimpleMultiG), true, false), + sediment_model)) + # get rid of incompatible combinations + run = ifelse((model == NonhydrostaticModel && (isa(grid, ImmersedBoundaryGrid) || isa(grid, LatitudeLongitudeGrid))) || + (model == HydrostaticFreeSurfaceModel && timestepper == :RungeKutta3) || + (isa(sediment_model, SimpleMultiG) && isa(biogeochemistry.underlying_biogeochemistry, NutrientPhytoplanktonZooplanktonDetritus)), false, true) + if run + @info "Testing sediment on $(typeof(architecture)) with $timestepper and $(display_name(sediment_model)) on $(display_name(biogeochemistry.underlying_biogeochemistry))" + @testset "$architecture, $timestepper, $(display_name(sediment_model)), $(display_name(biogeochemistry.underlying_biogeochemistry))" test_flat_sediment(grid, biogeochemistry, model; timestepper) end end end diff --git a/test/test_slatissima.jl b/test/test_slatissima.jl index ada407cd9..a4dae7774 100644 --- a/test/test_slatissima.jl +++ b/test/test_slatissima.jl @@ -10,7 +10,6 @@ function intercept_tendencies!(model, intercepted_tendencies) end @testset "SLatissima particle setup and conservations" begin - arch = CPU() grid = RectilinearGrid(arch; size=(1, 1, 1), extent=(1, 1, 1)) # Initial properties diff --git a/test/test_utils.jl b/test/test_utils.jl index 86586a791..6b970b8c7 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -46,9 +46,7 @@ function test_negative_zeroing(arch) end @testset "Test Utils" begin - for arch in (CPU(), ) - @test test_column_diffusion_timescale(arch) - @test test_negative_scaling(arch) - @test test_negative_zeroing(arch) - end + @test test_column_diffusion_timescale(arch) + @test test_negative_scaling(arch) + @test test_negative_zeroing(arch) end