Skip to content

Commit

Permalink
start gefs on Azure
Browse files Browse the repository at this point in the history
  • Loading branch information
weihuang-jedi committed Aug 21, 2024
2 parents 35ae83e + 1b18f2f commit f1ac84b
Show file tree
Hide file tree
Showing 50 changed files with 936 additions and 669 deletions.
2 changes: 1 addition & 1 deletion ci/cases/pr/C48mx500_3DVarAOWCDA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ arguments:
resdetocean: 5.0
comroot: {{ 'RUNTESTS' | getenv }}/COMROOT
expdir: {{ 'RUNTESTS' | getenv }}/EXPDIR
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C48mx500
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C48mx500/20240610
idate: 2021032412
edate: 2021032418
nens: 0
Expand Down
2 changes: 1 addition & 1 deletion ci/cases/pr/C96C48_hybatmDA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ arguments:
resensatmos: 48
comroot: {{ 'RUNTESTS' | getenv }}/COMROOT
expdir: {{ 'RUNTESTS' | getenv }}/EXPDIR
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C96C48
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C96C48/20240610
idate: 2021122018
edate: 2021122106
nens: 2
Expand Down
2 changes: 1 addition & 1 deletion ci/cases/pr/C96C48_ufs_hybatmDA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ arguments:
resensatmos: 48
comroot: {{ 'RUNTESTS' | getenv }}/COMROOT
expdir: {{ 'RUNTESTS' | getenv }}/EXPDIR
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C96C48
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C96C48/20240610
idate: 2024022318
edate: 2024022400
nens: 2
Expand Down
2 changes: 1 addition & 1 deletion ci/cases/pr/C96_atm3DVar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ arguments:
resdetatmos: 96
comroot: {{ 'RUNTESTS' | getenv }}/COMROOT
expdir: {{ 'RUNTESTS' | getenv }}/EXPDIR
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C96C48
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C96C48/20240610
idate: 2021122018
edate: 2021122106
nens: 0
Expand Down
2 changes: 1 addition & 1 deletion ci/cases/pr/C96_atm3DVar_extended.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ arguments:
resdetatmos: 96
comroot: {{ 'RUNTESTS' | getenv }}/COMROOT
expdir: {{ 'RUNTESTS' | getenv }}/EXPDIR
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C96C48
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C96C48/20240610
idate: 2021122018
edate: 2021122118
nens: 0
Expand Down
2 changes: 1 addition & 1 deletion ci/cases/pr/C96_atmaerosnowDA.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ arguments:
resdetatmos: 96
comroot: {{ 'RUNTESTS' | getenv }}/COMROOT
expdir: {{ 'RUNTESTS' | getenv }}/EXPDIR
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C96C48
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C96C48/20240610
idate: 2021122012
edate: 2021122100
nens: 0
Expand Down
10 changes: 5 additions & 5 deletions docs/source/init.rst
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,14 @@ Start date = 2021032312
├── enkfgdas.20210323
│   ├── 06
│   │   ├── mem001
│   │   │   └── model_data -> ../../../gdas.20210323/06/model_data
│   │   │   └── model -> ../../../gdas.20210323/06/model
│   │   ├── mem002
│   │   │   └── model_data -> ../../../gdas.20210323/06/model_data
│   │   │   └── model -> ../../../gdas.20210323/06/model
│   │   ├── mem003
│   │   │   └── model_data -> ../../../gdas.20210323/06/model_data
│   │   │   └── model -> ../../../gdas.20210323/06/model
...
│   │   └── mem080
│   │   └── model_data -> ../../../gdas.20210323/06/model_data
│   │   └── model -> ../../../gdas.20210323/06/model
│   └── 12
│   ├── mem001
│   │   └── analysis
Expand All @@ -153,7 +153,7 @@ Start date = 2021032312
│   └── gdas.t12z.ocninc.nc -> ../../../../../gdas.20210323/12/analysis/ocean/gdas.t12z.ocninc.nc
└── gdas.20210323
├── 06
│   └── model_data
│   └── model
│   ├── atmos
│   │   └── restart
│   │   ├── 20210323.120000.ca_data.tile1.nc
Expand Down
103 changes: 103 additions & 0 deletions env/AZUREPW.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#! /usr/bin/env bash

if [[ $# -ne 1 ]]; then

echo "Must specify an input argument to set runtime environment variables!"
exit 1

fi

step=$1

export launcher="srun -l --export=ALL"
export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out"

# Configure MPI environment
export OMP_STACKSIZE=2048000
export NTHSTACK=1024000000

ulimit -s unlimited
ulimit -a

# Calculate common variables
# Check first if the dependent variables are set
if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:-}" ]]; then
max_threads_per_task=$((max_tasks_per_node / tasks_per_node))
NTHREADSmax=${threads_per_task:-${max_threads_per_task}}
NTHREADS1=${threads_per_task:-1}
[[ ${NTHREADSmax} -gt ${max_threads_per_task} ]] && NTHREADSmax=${max_threads_per_task}
[[ ${NTHREADS1} -gt ${max_threads_per_task} ]] && NTHREADS1=${max_threads_per_task}
export APRUN="${launcher} -n ${ntasks}"
else
echo "ERROR config.resources must be sourced before sourcing AZUREPW.env"
exit 2
fi

if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then

export launcher="srun --mpi=pmi2 -l"

(( nnodes = (ntasks+tasks_per_node-1)/tasks_per_node ))
(( ufs_ntasks = nnodes*tasks_per_node ))
# With ESMF threading, the model wants to use the full node
export APRUN_UFS="${launcher} -n ${ufs_ntasks}"
unset nnodes ufs_ntasks

elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]]; then

export CFP_MP="YES"
if [[ "${step}" = "waveprep" ]]; then export MP_PULSE=0 ; fi
export wavempexec=${launcher}
export wave_mpmd=${mpmd_opt}

elif [[ "${step}" = "post" ]]; then

export NTHREADS_NP=${NTHREADS1}
export APRUN_NP="${APRUN}"

export NTHREADS_DWN=${threads_per_task_dwn:-1}
[[ ${NTHREADS_DWN} -gt ${max_threads_per_task} ]] && export NTHREADS_DWN=${max_threads_per_task}
export APRUN_DWN="${launcher} -n ${ntasks_dwn}"

elif [[ "${step}" = "atmos_products" ]]; then

export USE_CFP="YES" # Use MPMD for downstream product generation on Hera

elif [[ "${step}" = "oceanice_products" ]]; then

export NTHREADS_OCNICEPOST=${NTHREADS1}
export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}"

elif [[ "${step}" = "ecen" ]]; then

export NTHREADS_ECEN=${NTHREADSmax}
export APRUN_ECEN="${APRUN}"

export NTHREADS_CHGRES=${threads_per_task_chgres:-12}
[[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]] && export NTHREADS_CHGRES=${max_tasks_per_node}
export APRUN_CHGRES="time"

export NTHREADS_CALCINC=${threads_per_task_calcinc:-1}
[[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]] && export NTHREADS_CALCINC=${max_threads_per_task}
export APRUN_CALCINC="${APRUN}"

elif [[ "${step}" = "esfc" ]]; then

export NTHREADS_ESFC=${NTHREADSmax}
export APRUN_ESFC="${APRUN}"

export NTHREADS_CYCLE=${threads_per_task_cycle:-14}
[[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node}
export APRUN_CYCLE="${APRUN}"

elif [[ "${step}" = "epos" ]]; then

export NTHREADS_EPOS=${NTHREADSmax}
export APRUN_EPOS="${APRUN}"

elif [[ "${step}" = "fit2obs" ]]; then

export NTHREADS_FIT2OBS=${NTHREADS1}
export MPIRUN="${APRUN}"

fi
17 changes: 9 additions & 8 deletions jobs/JGLOBAL_STAGE_IC
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
source "${HOMEgfs}/ush/preamble.sh"
source "${HOMEgfs}/ush/jjob_header.sh" -e "stage_ic" -c "base stage_ic"

# Restart conditions for GFS cycle come from GDAS
# shellcheck disable=SC2153
rCDUMP=${RUN}
# shellcheck disable=SC2153
[[ ${RUN} = "gfs" ]] && export rCDUMP="gdas"
export rCDUMP
# Execute staging
"${SCRgfs}/exglobal_stage_ic.py"
err=$?

# Execute the Script
"${SCRgfs}/exglobal_stage_ic.sh"
###############################################################
# Check for errors and exit if any of the above failed
if [[ "${err}" -ne 0 ]]; then
echo "FATAL ERROR: Unable to copy ICs to ${ROTDIR}; ABORT!"
exit "${err}"
fi

##########################################
# Remove the Temporary working directory
Expand Down
7 changes: 3 additions & 4 deletions parm/config/gefs/config.base
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ export FIXugwd=${FIXgfs}/ugwd
export PACKAGEROOT="@PACKAGEROOT@" # TODO: set via prod_envir in Ops
export COMROOT="@COMROOT@" # TODO: set via prod_envir in Ops
export COMINsyn="@COMINsyn@"
export BASE_CPLIC="@BASE_CPLIC@"

# USER specific paths
export HOMEDIR="@HOMEDIR@"
Expand Down Expand Up @@ -346,9 +345,9 @@ export DELETE_COM_IN_ARCHIVE_JOB="YES" # NO=retain ROTDIR. YES default in arc
# Number of regional collectives to create soundings for
export NUM_SND_COLLECTIVES=${NUM_SND_COLLECTIVES:-9}

# The tracker, genesis, and METplus jobs are not supported on AWS yet
# TODO: we should place these in workflow/hosts/awspw.yaml as part of AWS setup, not for general.
if [[ "${machine}" == "AWSPW" ]]; then
# The tracker, genesis, and METplus jobs are not supported on CSPs yet
# TODO: we should place these in workflow/hosts/[aws|azure|google]pw.yaml as part of CSP's setup, not for general.
if [[ "${machine}" =~ "PW" ]]; then
export DO_WAVE="NO"
fi

Expand Down
4 changes: 4 additions & 0 deletions parm/config/gefs/config.resources
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ case ${machine} in
export PARTITION_BATCH="compute"
max_tasks_per_node=36
;;
"AZUREPW")
export PARTITION_BATCH="compute"
max_tasks_per_node=24
;;
*)
echo "FATAL ERROR: Unknown machine encountered by ${BASH_SOURCE[0]}"
exit 2
Expand Down
11 changes: 11 additions & 0 deletions parm/config/gefs/config.resources.AZUREPW
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#! /usr/bin/env bash

# AZURE-specific job resources

export is_exclusive="True"
unset memory

# shellcheck disable=SC2312
for mem_var in $(env | grep '^memory_' | cut -d= -f1); do
unset "${mem_var}"
done
49 changes: 22 additions & 27 deletions parm/config/gefs/config.stage_ic
Original file line number Diff line number Diff line change
Expand Up @@ -7,32 +7,27 @@ echo "BEGIN: config.stage_ic"
# Get task specific resources
source "${EXPDIR}/config.resources" stage_ic

case "${CASE}" in
"C384")
export CPL_ATMIC=""
export CPL_ICEIC=""
export CPL_OCNIC=""
export CPL_WAVIC=""
export CPL_MEDIC=""
;;
"C96")
export CPL_ATMIC=""
export CPL_ICEIC=""
export CPL_OCNIC=""
export CPL_WAVIC=""
export CPL_MEDIC=""
;;
"C48")
export CPL_ATMIC="gefs_test"
export CPL_ICEIC="gefs_test"
export CPL_OCNIC="gefs_test"
export CPL_WAVIC="gefs_test"
export CPL_MEDIC="gefs_test"
;;
*)
echo "FATAL ERROR Unrecognized resolution: ${CASE}"
exit 1
;;
esac
export ICSDIR="@ICSDIR@" # User provided ICSDIR; blank if not provided
export BASE_IC="@BASE_IC@" # Platform home for staged ICs

export STAGE_IC_YAML_TMPL="${PARMgfs}/stage/master_gefs.yaml.j2"

# Set ICSDIR

if [[ -z "${ICSDIR}" ]] ; then

ic_ver="20240610"

if (( NMEM_ENS > 0 )) ; then
ensic="${CASE_ENS}"
fi

if [[ "${DO_OCN:-NO}" == "YES" ]] ; then
ocnic="mx${OCNRES}"
fi

export ICSDIR="${BASE_IC}/${CASE}${ensic:-}${ocnic:-}/${ic_ver}"

fi

echo "END: config.stage_ic"
7 changes: 3 additions & 4 deletions parm/config/gfs/config.base
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ export PACKAGEROOT="@PACKAGEROOT@" # TODO: set via prod_envir in Ops
export COMROOT="@COMROOT@" # TODO: set via prod_envir in Ops
export COMINsyn="@COMINsyn@"
export DMPDIR="@DMPDIR@"
export BASE_CPLIC="@BASE_CPLIC@"

# Gempak from external models
# Default locations are to dummy locations for testing
Expand Down Expand Up @@ -483,9 +482,9 @@ export OFFSET_START_HOUR=0
# Number of regional collectives to create soundings for
export NUM_SND_COLLECTIVES=${NUM_SND_COLLECTIVES:-9}

# The tracker, genesis, and METplus jobs are not supported on AWS yet
# TODO: we should place these in workflow/hosts/awspw.yaml as part of AWS setup, not for general.
if [[ "${machine}" == "AWSPW" ]]; then
# The tracker, genesis, and METplus jobs are not supported on CSPs yet
# TODO: we should place these in workflow/hosts/awspw.yaml as part of AWS/AZURE setup, not for general.
if [[ "${machine}" =~ "PW" ]]; then
export DO_TRACKER="NO"
export DO_GENESIS="NO"
export DO_METP="NO"
Expand Down
Loading

0 comments on commit f1ac84b

Please sign in to comment.