Skip to content

Commit

Permalink
tried to make a slurm script
Browse files Browse the repository at this point in the history
  • Loading branch information
jagoosw committed Sep 8, 2023
1 parent 0f5bf5d commit ed8bc4c
Show file tree
Hide file tree
Showing 10 changed files with 202 additions and 51 deletions.
105 changes: 105 additions & 0 deletions .github/workflows/gpu-test.sbatch
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/bin/bash
#!
#! Example SLURM job script for Wilkes3 (AMD EPYC 7763, ConnectX-6, A100)
#! Last updated: Fri 30 Jul 11:07:58 BST 2021
#!

#!#############################################################
#!#### Modify the options in this section as appropriate ######
#!#############################################################

#! sbatch directives begin here ###############################
#! Name of the job:
#SBATCH -J gpujob
#! Which project should be charged (NB Wilkes2 projects end in '-GPU'):
#SBATCH -A taylor-sl3-gpu
#! How many whole nodes should be allocated?
#SBATCH --nodes=1
#! How many (MPI) tasks will there be in total?
#! Note probably this should not exceed the total number of GPUs in use.
#SBATCH --ntasks=1
#! Specify the number of GPUs per node (between 1 and 4; must be 4 if nodes>1).
#! Note that the job submission script will enforce no more than 32 cpus per GPU.
#SBATCH --gres=gpu:1
#! How much wallclock time will be required?
#SBATCH --time=02:00:00
#! What types of email messages do you wish to receive?
#SBATCH --mail-type=NONE
#! Uncomment this to prevent the job from being requeued (e.g. if
#! interrupted by node failure or system downtime):
##SBATCH --no-requeue

#! Do not change:
#SBATCH -p ampere

#! sbatch directives end here (put any additional directives above this line)

#! Notes:
#! Charging is determined by GPU number*walltime.

#! Number of nodes and tasks per node allocated by SLURM (do not change):
numnodes=$SLURM_JOB_NUM_NODES
numtasks=$SLURM_NTASKS
mpi_tasks_per_node=$(echo "$SLURM_TASKS_PER_NODE" | sed -e 's/^\([0-9][0-9]*\).*$/\1/')
#! ############################################################
#! Modify the settings below to specify the application's environment, location
#! and launch method:

#! Optionally modify the environment seen by the application
#! (note that SLURM reproduces the environment at submission irrespective of ~/.bashrc):
. /etc/profile.d/modules.sh # Leave this line (enables the module command)
module purge # Removes all modules still loaded
module load rhel8/default-amp # REQUIRED - loads the basic environment

#! Insert additional module load commands after this line if needed:

#! Full path to application executable:
#! TODO - workout how to make version get passed to this script
application="/home/js2430/rds/hpc-work/OceanBioME-runner/_work/_temp/julia-1.9.3/bin/julia -O0 --color=yes --project test/gpu_runtests.jl"

#! Run options for the application:
options=""

#! Work directory (i.e. where the job will run):
workdir="$SLURM_SUBMIT_DIR" # The value of SLURM_SUBMIT_DIR sets workdir to the directory
# in which sbatch is run.

#! Are you using OpenMP (NB this is unrelated to OpenMPI)? If so increase this
#! safe value to no more than 128:
export OMP_NUM_THREADS=1

#! Number of MPI tasks to be started by the application per node and in total (do not change):
np=$[${numnodes}*${mpi_tasks_per_node}]

#! Choose this for a pure shared-memory OpenMP parallel program on a single node:
#! (OMP_NUM_THREADS threads will be created):
CMD="$application $options"


###############################################################
### You should not have to change anything below this line ####
###############################################################

cd $workdir
echo -e "Changed directory to `pwd`.\n"

JOBID=$SLURM_JOB_ID

echo -e "JobID: $JOBID\n======"
echo "Time: `date`"
echo "Running on master node: `hostname`"
echo "Current directory: `pwd`"

if [ "$SLURM_JOB_NODELIST" ]; then
#! Create a machine file:
export NODEFILE=`generate_pbs_nodefile`
cat $NODEFILE | uniq > machine.file.$JOBID
echo -e "\nNodes allocated:\n================"
echo `cat machine.file.$JOBID | sed -e 's/\..*$//g'`
fi

echo -e "\nnumtasks=$numtasks, numnodes=$numnodes, mpi_tasks_per_node=$mpi_tasks_per_node (OMP_NUM_THREADS=$OMP_NUM_THREADS)"

echo -e "\nExecuting command:\n==================\n$CMD\n"

eval $CMD
35 changes: 35 additions & 0 deletions .github/workflows/gpu-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: CSD3 tests

on:
push:
branches:
- main
pull_request:
branches:
- main

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

env:
DATADEPS_ALWAYS_ACCEPT: true
JULIA_VERSION: "1.9.3"
JULIA_MINOR_VERSION: "1.9"
RDS_HOME: "/home/js2430/rds/hpc-work/OceanBioME-runner/_work"

jobs:
test:
name: Deploy CSD3 tests
runs-on: self-hosted
steps:
- label: "initialize gpu enviroment"
commands:
- "wget -N -P $CSD3_HOME/_temp https://julialang-s3.julialang.org/bin/linux/x64/$JULIA_MINOR_VERSION/julia-$JULIA_VERSION-linux-x86_64.tar.gz"
- "tar xf $CSD3_HOME/_temp/julia-$JULIA_VERSION-linux-x86_64.tar.gz -C $CSD3_HOME/_temp"

# Run the tests on slurm
- "sbatch .github/workflows/gpu-test.sbatch"

# Clean up
- "rm -rf $CSD3_HOME/_temp/julia-$JULIA_VERSION" # presumably this happens anyway
13 changes: 13 additions & 0 deletions test/gpu_runtests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using Pkg

Pkg.instantiate()

Pkg.add("Test", "CUDA", "DataDeps", "Documenter", "Statistics", "JLD2")

Pkg.precompile()

using Oceananigans

arch = GPU()

include("runtests.jl")
12 changes: 9 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
using OceanBioME, Documenter, Test

if !(@isdefined arch)
arch = CPU()
end

include("test_utils.jl")
include("test_light.jl")
include("test_LOBSTER.jl")
Expand All @@ -8,6 +12,8 @@ include("test_gasexchange.jl")
include("test_slatissima.jl")
include("test_sediments.jl")

@testset "Doctests" begin
doctest(OceanBioME)
end
if isa(arch, CPU)
@testset "Doctests" begin
doctest(OceanBioME)
end
end
15 changes: 7 additions & 8 deletions test/test_LOBSTER.jl
Original file line number Diff line number Diff line change
Expand Up @@ -114,14 +114,13 @@ end

n_timesteps = 100

for arch in (CPU(), )
grid = RectilinearGrid(arch; size=(1, 1, 1), extent=(1, 1, 2))
for open_bottom = (false, true), sinking = (false, true), variable_redfield = (false, true), oxygen = (false, true), carbonates = (false, true)
if !(sinking && open_bottom) # no sinking is the same with and without open bottom
@info "Testing on $(typeof(arch)) with carbonates $(carbonates ? :✅ : :❌), oxygen $(oxygen ? :✅ : :❌), variable redfield $(variable_redfield ? :✅ : :❌), sinking $(sinking ? :✅ : :❌), open bottom $(open_bottom ? :✅ : :❌))"
@testset "$arch, $carbonates, $oxygen, $variable_redfield, $sinking, $open_bottom" begin
test_LOBSTER(grid, carbonates, oxygen, variable_redfield, sinking, open_bottom, n_timesteps)
end
grid = RectilinearGrid(arch; size=(1, 1, 1), extent=(1, 1, 2))

for open_bottom = (false, true), sinking = (false, true), variable_redfield = (false, true), oxygen = (false, true), carbonates = (false, true)
if !(sinking && open_bottom) # no sinking is the same with and without open bottom
@info "Testing on $(typeof(arch)) with carbonates $(carbonates ? :✅ : :❌), oxygen $(oxygen ? :✅ : :❌), variable redfield $(variable_redfield ? :✅ : :❌), sinking $(sinking ? :✅ : :❌), open bottom $(open_bottom ? :✅ : :❌))"
@testset "$arch, $carbonates, $oxygen, $variable_redfield, $sinking, $open_bottom" begin
test_LOBSTER(grid, carbonates, oxygen, variable_redfield, sinking, open_bottom, n_timesteps)
end
end
end
15 changes: 7 additions & 8 deletions test/test_NPZD.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,13 @@ function test_NPZD(grid, sinking, open_bottom)
return nothing
end

for arch in (CPU(), )
grid = RectilinearGrid(arch; size=(3, 3, 6), extent=(1, 1, 2))
for sinking = (false, true), open_bottom = (false, true)
if !(sinking && open_bottom) # no sinking is the same with and without open bottom
@info "Testing on $(typeof(arch)) with sinking $(sinking ? :✅ : :❌), open bottom $(open_bottom ? :✅ : :❌))"
@testset "$arch, $sinking, $open_bottom" begin
test_NPZD(grid, sinking, open_bottom)
end
grid = RectilinearGrid(arch; size=(3, 3, 6), extent=(1, 1, 2))

for sinking = (false, true), open_bottom = (false, true)
if !(sinking && open_bottom) # no sinking is the same with and without open bottom
@info "Testing on $(typeof(arch)) with sinking $(sinking ? :✅ : :❌), open bottom $(open_bottom ? :✅ : :❌))"
@testset "$arch, $sinking, $open_bottom" begin
test_NPZD(grid, sinking, open_bottom)
end
end
end
2 changes: 0 additions & 2 deletions test/test_light.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,9 @@ function test_two_band(grid, bgc, model_type)
return all(results_PAR .≈ reverse(expected_PAR))
end

archs = (CPU(), )

@testset "Light attenuaiton model" begin
for model in (NonhydrostaticModel, HydrostaticFreeSurfaceModel),
arch in archs,
grid in (RectilinearGrid(arch; size = (2, 2, 2), extent = (2, 2, 2)),
LatitudeLongitudeGrid(arch; size = (5, 5, 2), longitude = (-180, 180), latitude = (-85, 85), z = (-2, 0))),
bgc in (LOBSTER, NutrientPhytoplanktonZooplanktonDetritus) # this is now redundant since each model doesn't deal with the light separatly
Expand Down
47 changes: 23 additions & 24 deletions test/test_sediments.jl
Original file line number Diff line number Diff line change
Expand Up @@ -112,31 +112,30 @@ display_name(::ImmersedBoundaryGrid) = "Immersed boundary grid"

bottom_height(x, y) = -1000 + 500 * exp(- (x^2 + y^2) / 250) # a perfect hill

grids = [RectilinearGrid(architecture; size=(3, 3, 50), extent=(10, 10, 500)),
LatitudeLongitudeGrid(architecture; size = (3, 3, 16), latitude = (0, 10), longitude = (0, 10), z = (-500, 0)),
ImmersedBoundaryGrid(
LatitudeLongitudeGrid(architecture; size = (3, 3, 16), latitude = (0, 10), longitude = (0, 10), z = (-500, 0)),
ridFittedBottom(bottom_height))]

@testset "Sediment integration" begin
for architecture in (CPU(), )
grids = [RectilinearGrid(architecture; size=(3, 3, 50), extent=(10, 10, 500)),
LatitudeLongitudeGrid(architecture; size = (3, 3, 16), latitude = (0, 10), longitude = (0, 10), z = (-500, 0)),
ImmersedBoundaryGrid(
LatitudeLongitudeGrid(architecture; size = (3, 3, 16), latitude = (0, 10), longitude = (0, 10), z = (-500, 0)),
GridFittedBottom(bottom_height))]
for grid in grids
for timestepper in (:QuasiAdamsBashforth2, :RungeKutta3),
sediment_model in (InstantRemineralisation(; grid), SimpleMultiG(; grid)),
model in (NonhydrostaticModel, HydrostaticFreeSurfaceModel)
for biogeochemistry in (NutrientPhytoplanktonZooplanktonDetritus(; grid, sediment_model),
LOBSTER(; grid,
carbonates = ifelse(isa(sediment_model, SimpleMultiG), true, false),
oxygen = ifelse(isa(sediment_model, SimpleMultiG), true, false),
variable_redfield = ifelse(isa(sediment_model, SimpleMultiG), true, false),
sediment_model))
# get rid of incompatible combinations
run = ifelse((model == NonhydrostaticModel && (isa(grid, ImmersedBoundaryGrid) || isa(grid, LatitudeLongitudeGrid))) ||
(model == HydrostaticFreeSurfaceModel && timestepper == :RungeKutta3) ||
(isa(sediment_model, SimpleMultiG) && isa(biogeochemistry.underlying_biogeochemistry, NutrientPhytoplanktonZooplanktonDetritus)), false, true)
if run
@info "Testing sediment on $(typeof(architecture)) with $timestepper and $(display_name(sediment_model)) on $(display_name(biogeochemistry.underlying_biogeochemistry))"
@testset "$architecture, $timestepper, $(display_name(sediment_model)), $(display_name(biogeochemistry.underlying_biogeochemistry))" test_flat_sediment(grid, biogeochemistry, model; timestepper)
end
for grid in grids
for timestepper in (:QuasiAdamsBashforth2, :RungeKutta3),
sediment_model in (InstantRemineralisation(; grid), SimpleMultiG(; grid)),
model in (NonhydrostaticModel, HydrostaticFreeSurfaceModel)
for biogeochemistry in (NutrientPhytoplanktonZooplanktonDetritus(; grid, sediment_model),
LOBSTER(; grid,
carbonates = ifelse(isa(sediment_model, SimpleMultiG), true, false),
oxygen = ifelse(isa(sediment_model, SimpleMultiG), true, false),
variable_redfield = ifelse(isa(sediment_model, SimpleMultiG), true, false),
sediment_model))
# get rid of incompatible combinations
run = ifelse((model == NonhydrostaticModel && (isa(grid, ImmersedBoundaryGrid) || isa(grid, LatitudeLongitudeGrid))) ||
(model == HydrostaticFreeSurfaceModel && timestepper == :RungeKutta3) ||
(isa(sediment_model, SimpleMultiG) && isa(biogeochemistry.underlying_biogeochemistry, NutrientPhytoplanktonZooplanktonDetritus)), false, true)
if run
@info "Testing sediment on $(typeof(architecture)) with $timestepper and $(display_name(sediment_model)) on $(display_name(biogeochemistry.underlying_biogeochemistry))"
@testset "$architecture, $timestepper, $(display_name(sediment_model)), $(display_name(biogeochemistry.underlying_biogeochemistry))" test_flat_sediment(grid, biogeochemistry, model; timestepper)
end
end
end
Expand Down
1 change: 0 additions & 1 deletion test/test_slatissima.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ function intercept_tendencies!(model, intercepted_tendencies)
end

@testset "SLatissima particle setup and conservations" begin
arch = CPU()
grid = RectilinearGrid(arch; size=(1, 1, 1), extent=(1, 1, 1))

# Initial properties
Expand Down
8 changes: 3 additions & 5 deletions test/test_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ function test_negative_zeroing(arch)
end

@testset "Test Utils" begin
for arch in (CPU(), )
@test test_column_diffusion_timescale(arch)
@test test_negative_scaling(arch)
@test test_negative_zeroing(arch)
end
@test test_column_diffusion_timescale(arch)
@test test_negative_scaling(arch)
@test test_negative_zeroing(arch)
end

0 comments on commit ed8bc4c

Please sign in to comment.