From 93a80533b176dd194259e3c702a716135a0ef929 Mon Sep 17 00:00:00 2001 From: Pavel Belevich Date: Wed, 22 May 2024 16:10:28 -0400 Subject: [PATCH] Fix llama_13b.toml -> llama2_13b.toml in multinode_trainer.slurm (#350) cc @wanchaol @lessw2020 @wconstab --- multinode_trainer.slurm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multinode_trainer.slurm b/multinode_trainer.slurm index 3f4a6acd..09b94ef1 100644 --- a/multinode_trainer.slurm +++ b/multinode_trainer.slurm @@ -54,7 +54,7 @@ export NCCL_BUFFSIZE=2097152 #export TORCH_DIST_INIT_BARRIER=1 export FI_EFA_SET_CUDA_SYNC_MEMOPS=0 #export USE_LIBUV=1 -CONFIG_FILE=${CONFIG_FILE:-"./train_configs/llama_13b.toml"} +CONFIG_FILE=${CONFIG_FILE:-"./train_configs/llama2_13b.toml"} dcgmi profile --pause # adjust sbatch --ntasks and sbatch --nodes above and --nnodes below