Skip to content

Commit

Permalink
.ci/aws: Pin p4/p5 ami's to AMI's from 8/7/24
Browse files Browse the repository at this point in the history
In order to attempt to stabalize the aws-ofi-nccl plugin GH PR CI, the
plan is to pin the AMI's to 8/7 before we started running into a bunch
of CUDA version related issues.  When these are fixed, we will unpin the
AMI's.

Signed-off-by: Seth Zegelstein <[email protected]>
  • Loading branch information
a-szegel committed Sep 5, 2024
1 parent 7c03145 commit 77d8cbd
Showing 1 changed file with 14 additions and 6 deletions.
20 changes: 14 additions & 6 deletions .ci/aws/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -219,21 +219,29 @@ pipeline {
def g4dn_region = "us-west-2"
def g4dn_odcr = "cr-0e2f9cac30bb5ad5f"
def g4dn_addl_args = "${base_args} --odcr-placement-group-name g4dn-placement-group"
// Pin p4/p5 to AMI's from August 7th until we figure out why ImageBuilder is broken
// p4/p5 are in different regions which is why they need different AMI ID's
def p4_al2_ami = " --ami-id ami-0325055f791f59e7b"
def p4_ub2004_ami = " --ami-id ami-05feaa67734032ae8"
def p4_ub2204_ami = " --ami-id ami-06af5c08a83958af0"
def p5_al2_ami = " --ami-id ami-0c7d6c9eddde7c8cd"
def p5_ub2004_ami = " --ami-id ami-0945f264a4dc5bbb8"
def p5_ub2204_ami = " --ami-id ami-0c97f93421701b894"

// p3dn tests
stages["4_p3dn_al2"] = get_test_stage_with_lock("4_p3dn_al2", env.BUILD_TAG, "alinux2", "p3dn.24xlarge", p3dn_region, p3dn_lock_label, num_instances, p3dn_odcr, p3dn_al2_addl_args)
stages["4_p3dn_ubuntu2004"] = get_test_stage_with_lock("4_p3dn_ubuntu2004", env.BUILD_TAG, "ubuntu2004", "p3dn.24xlarge", p3dn_region, p3dn_lock_label, num_instances, p3dn_odcr, p3dn_addl_args)
stages["4_p3dn_ubuntu2204"] = get_test_stage_with_lock("4_p3dn_ubuntu2204", env.BUILD_TAG, "ubuntu2204", "p3dn.24xlarge", p3dn_region, p3dn_lock_label, num_instances, p3dn_odcr, p3dn_addl_args)

// p4d tests
stages["4_p4d_alinux2"] = get_test_stage_with_lock("4_p4d_alinux2", env.BUILD_TAG, "alinux2", "p4d.24xlarge", p4d_region, p4d_lock_label, num_instances, p4d_odcr, p4_p5_addl_args)
stages["4_p4d_ubuntu2004"] = get_test_stage_with_lock("4_p4d_ubuntu2004", env.BUILD_TAG, "ubuntu2004", "p4d.24xlarge", p4d_region, p4d_lock_label, num_instances, p4d_odcr, p4_p5_addl_args)
stages["4_p4d_ubuntu2204"] = get_test_stage_with_lock("4_p4d_ubuntu2204", env.BUILD_TAG, "ubuntu2204", "p4d.24xlarge", p4d_region, p4d_lock_label, num_instances, p4d_odcr, p4_p5_addl_args)
stages["4_p4d_alinux2"] = get_test_stage_with_lock("4_p4d_alinux2", env.BUILD_TAG, "alinux2", "p4d.24xlarge", p4d_region, p4d_lock_label, num_instances, p4d_odcr, p4_p5_addl_args + p4_al2_ami)
stages["4_p4d_ubuntu2004"] = get_test_stage_with_lock("4_p4d_ubuntu2004", env.BUILD_TAG, "ubuntu2004", "p4d.24xlarge", p4d_region, p4d_lock_label, num_instances, p4d_odcr, p4_p5_addl_args + p4_ub2004_ami)
stages["4_p4d_ubuntu2204"] = get_test_stage_with_lock("4_p4d_ubuntu2204", env.BUILD_TAG, "ubuntu2204", "p4d.24xlarge", p4d_region, p4d_lock_label, num_instances, p4d_odcr, p4_p5_addl_args + p4_ub2204_ami)

// p5 tests
stages["4_p5_alinux2"] = get_test_stage_with_lock("4_p5_alinux2", env.BUILD_TAG, "alinux2", "p5.48xlarge", p5_region, p5_lock_label, num_instances, p5_odcr, p4_p5_addl_args)
stages["4_p5_ubuntu2004"] = get_test_stage_with_lock("4_p5_ubuntu2004", env.BUILD_TAG, "ubuntu2004", "p5.48xlarge", p5_region, p5_lock_label, num_instances, p5_odcr, p4_p5_addl_args)
stages["4_p5_ubuntu2204"] = get_test_stage_with_lock("4_p5_ubuntu2204", env.BUILD_TAG, "ubuntu2204", "p5.48xlarge", p5_region, p5_lock_label, num_instances, p5_odcr, p4_p5_addl_args)
stages["4_p5_alinux2"] = get_test_stage_with_lock("4_p5_alinux2", env.BUILD_TAG, "alinux2", "p5.48xlarge", p5_region, p5_lock_label, num_instances, p5_odcr, p4_p5_addl_args + p5_al2_ami)
stages["4_p5_ubuntu2004"] = get_test_stage_with_lock("4_p5_ubuntu2004", env.BUILD_TAG, "ubuntu2004", "p5.48xlarge", p5_region, p5_lock_label, num_instances, p5_odcr, p4_p5_addl_args + p5_ub2004_ami)
stages["4_p5_ubuntu2204"] = get_test_stage_with_lock("4_p5_ubuntu2204", env.BUILD_TAG, "ubuntu2204", "p5.48xlarge", p5_region, p5_lock_label, num_instances, p5_odcr, p4_p5_addl_args + p5_ub2204_ami)

// g4dn tests
stages["4_g4dn_ubuntu2204"] = get_test_stage_with_lock("4_g4dn_ubuntu2204", env.BUILD_TAG, "ubuntu2204", "g4dn.12xlarge", g4dn_region, g4dn_lock_label, num_instances, g4dn_odcr, g4dn_addl_args)
Expand Down

0 comments on commit 77d8cbd

Please sign in to comment.