Skip to content

Commit

Permalink
Add nvidia-fabricmanager to dstack VM images
Browse files Browse the repository at this point in the history
  • Loading branch information
jvstme committed Jul 2, 2024
1 parent 88d0936 commit adf7c8d
Show file tree
Hide file tree
Showing 14 changed files with 23 additions and 17 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,14 @@ jobs:
run: |
pip install .[oci]
- name: Copy image to target regions
if: ${{ !inputs.staging }}
run: |
python scripts/oci_image_tools.py copy \
--image ${BUILD_PREFIX}dstack${{ matrix.variant }}-${{ inputs.image_version }} \
--from $OCI_REGION \
--compartment $OCI_COMPARTMENT
- name: Publish image in OCI Marketplace
if: ${{ !inputs.staging }}
run: |
python scripts/oci_image_tools.py publish \
--image ${BUILD_PREFIX}dstack${{ matrix.variant }}-${{ inputs.image_version }} \
Expand Down
4 changes: 2 additions & 2 deletions scripts/packer/aws-image-cuda.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
"base_ami": "ami-0cffefff2d52e0a23",
"instance_type": "c5.large",
"subnet_id": "subnet-c39cb6a5",
"docker_version": "20.10.17",
"cuda_drivers_version": "535.54.03-1",
"docker_version": "",
"cuda_drivers_version": "",
"build_prefix": "",
"ami_regions": "",
"ami_groups": "",
Expand Down
2 changes: 1 addition & 1 deletion scripts/packer/aws-image.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"base_ami": "ami-0cffefff2d52e0a23",
"instance_type": "c5.large",
"subnet_id": "subnet-c39cb6a5",
"docker_version": "20.10.17",
"docker_version": "",
"build_prefix": "",
"ami_regions": "",
"ami_groups": "",
Expand Down
4 changes: 2 additions & 2 deletions scripts/packer/azure-image-cuda.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
"azure_location": "westeurope",
"azure_vm_size": "Standard_DS1_v2",
"build_prefix": "",
"docker_version": "20.10.17",
"cuda_drivers_version": "535.54.03-1",
"docker_version": "",
"cuda_drivers_version": "",
"image_version": ""
},
"builders": [{
Expand Down
2 changes: 1 addition & 1 deletion scripts/packer/azure-image-grid.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"azure_location": "westeurope",
"azure_vm_size": "Standard_DS1_v2",
"build_prefix": "",
"docker_version": "20.10.17",
"docker_version": "",
"image_version": ""
},
"builders": [{
Expand Down
2 changes: 1 addition & 1 deletion scripts/packer/azure-image.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"azure_location": "westeurope",
"azure_vm_size": "Standard_DS1_v2",
"build_prefix": "",
"docker_version": "20.10.17",
"docker_version": "",
"image_version": ""
},
"builders": [{
Expand Down
4 changes: 2 additions & 2 deletions scripts/packer/gcp-image-cuda.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"variables": {
"build_prefix": "",
"docker_version": "20.10.17",
"cuda_drivers_version": "535.54.03-1",
"docker_version": "",
"cuda_drivers_version": "",
"image_version": ""
},
"builders": [
Expand Down
2 changes: 1 addition & 1 deletion scripts/packer/gcp-image.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"variables": {
"build_prefix": "",
"docker_version": "20.10.17",
"docker_version": "",
"image_version": ""
},
"builders": [
Expand Down
4 changes: 2 additions & 2 deletions scripts/packer/locals.pkr.hcl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
locals {
clean_image_version = regex_replace(var.image_version, "[^a-z0-9-]", "-")
image_name = "${var.build_prefix}dstack-${local.clean_image_version}"
docker_version = "20.10.17"
cuda_drivers_version = "535.54.03-1"
docker_version = ""
cuda_drivers_version = ""
}
3 changes: 2 additions & 1 deletion scripts/packer/oci-image-cuda.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{
"variables": {
"build_prefix": "",
"docker_version": "20.10.17",
"docker_version": "",
"cuda_drivers_version": "",
"image_version": "",
"oci_availability_domain": "",
"oci_compartment_ocid": "",
Expand Down
2 changes: 1 addition & 1 deletion scripts/packer/oci-image.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"variables": {
"build_prefix": "",
"docker_version": "20.10.17",
"docker_version": "",
"image_version": "",
"oci_availability_domain": "",
"oci_compartment_ocid": "",
Expand Down
5 changes: 4 additions & 1 deletion scripts/packer/provisioners/cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,7 @@ rm cuda-keyring_1.0-1_all.deb

CUDA_BRANCH=$(cut -d '.' -f 1 <<< "$CUDA_DRIVERS_VERSION")
sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cuda-drivers-$CUDA_BRANCH=$CUDA_DRIVERS_VERSION
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
cuda-drivers-$CUDA_BRANCH=$CUDA_DRIVERS_VERSION \
nvidia-fabricmanager-$CUDA_BRANCH=$CUDA_DRIVERS_VERSION
sudo systemctl enable nvidia-fabricmanager
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ sudo apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install build-essential linux-azure -y

wget --no-verbose -O NVIDIA-Linux-x86_64-grid.run \
https://download.microsoft.com/download/1/4/4/14450d0e-a3f2-4b0a-9bb4-a8e729e986c4/NVIDIA-Linux-x86_64-535.154.05-grid-azure.run
https://download.microsoft.com/download/8/d/a/8da4fb8e-3a9b-4e6a-bc9a-72ff64d7a13c/NVIDIA-Linux-x86_64-535.161.08-grid-azure.run
chmod +x NVIDIA-Linux-x86_64-grid.run
sudo ./NVIDIA-Linux-x86_64-grid.run --silent --disable-nouveau
rm NVIDIA-Linux-x86_64-grid.run
Expand Down
2 changes: 1 addition & 1 deletion scripts/packer/versions.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"docker_version": "20.10.17",
"cuda_drivers_version": "535.54.03-1"
"cuda_drivers_version": "535.183.01-1"
}

0 comments on commit adf7c8d

Please sign in to comment.