From 85667a51dacb4729f10577dd06e5f545e93c4b59 Mon Sep 17 00:00:00 2001 From: Seth Zegelstein Date: Tue, 3 Sep 2024 18:14:09 +0000 Subject: [PATCH] .ci/aws: Unpin al2 p3dn ami The al2 p3dn AMI was pinned to a manually created AMI by running PortaFiducia's setup_instances.py on a p3dn.24xlarge. In order to keep up to date with OS security updates, EC2 Image Builder is used to create new updated AMI's daily. B/c p3dn is in short supply, EC2 Image Builder uses a g3 instance to generate the p3dn AMI. The AMI building script was working successfully when run on p3dn. The same script when run on g3dn would run without issues, but would not generate an AMI with CUDA installed correctly (nvidia-smi wouldn't work). Recently, we were able to fix the bug in how we were installing CUDA in our AMI Builder, which allows us to unpin the p3dn AMI, and use the newest AMI with the latest greatest security fixes. Signed-off-by: Seth Zegelstein --- .ci/aws/Jenkinsfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.ci/aws/Jenkinsfile b/.ci/aws/Jenkinsfile index 8a5bed33e..83feca9d8 100644 --- a/.ci/aws/Jenkinsfile +++ b/.ci/aws/Jenkinsfile @@ -207,7 +207,6 @@ pipeline { def p3dn_region = "ap-northeast-1" def p3dn_odcr = "cr-08ecd03c0644442e4" def p3dn_addl_args = "${base_args} --odcr-placement-group-name p3dn-placement-group" - def p3dn_al2_addl_args = "${p3dn_addl_args} --ami-id ami-0b9081c7bc36c4eba" def p4d_lock_label = "p4d-1-4node" def p4d_region = "us-east-2" def p4d_odcr = "cr-0e5eebb3c896f6af0" @@ -221,7 +220,7 @@ pipeline { def g4dn_addl_args = "${base_args} --odcr-placement-group-name g4dn-placement-group" // p3dn tests - stages["4_p3dn_al2"] = get_test_stage_with_lock("4_p3dn_al2", env.BUILD_TAG, "alinux2", "p3dn.24xlarge", p3dn_region, p3dn_lock_label, num_instances, p3dn_odcr, p3dn_al2_addl_args) + stages["4_p3dn_al2"] = get_test_stage_with_lock("4_p3dn_al2", env.BUILD_TAG, "alinux2", "p3dn.24xlarge", p3dn_region, p3dn_lock_label, num_instances, p3dn_odcr, p3dn_addl_args) stages["4_p3dn_ubuntu2004"] = get_test_stage_with_lock("4_p3dn_ubuntu2004", env.BUILD_TAG, "ubuntu2004", "p3dn.24xlarge", p3dn_region, p3dn_lock_label, num_instances, p3dn_odcr, p3dn_addl_args) stages["4_p3dn_ubuntu2204"] = get_test_stage_with_lock("4_p3dn_ubuntu2204", env.BUILD_TAG, "ubuntu2204", "p3dn.24xlarge", p3dn_region, p3dn_lock_label, num_instances, p3dn_odcr, p3dn_addl_args)