Skip to content

Commit

Permalink
Retry dnf commands + always install dev tools (#5358)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeanschmidt authored Jun 19, 2024
1 parent da14b33 commit c266a95
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 40 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
set -x
set -euxo pipefail

install_hooks() {
pushd /home/$USER_NAME
Expand Down
Original file line number Diff line number Diff line change
@@ -1,78 +1,93 @@
#!/bin/bash -xe
set -x
#!/bin/bash

set -euxo pipefail

exec > >(tee /var/log/user-data.log | logger -t user-data -s 2>/dev/console) 2>&1

OS_ID=$(. /etc/os-release;echo $ID$VERSION_ID)
if [[ "$OS_ID" =~ ^amzn2023* ]]; then
PKG_MANAGER="dnf"
else
PKG_MANAGER="yum"
fi

${pre_install}

sudo yum update -y
if ! command -v curl 2>/dev/null; then
echo "Installing curl"
sudo $PKG_MANAGER install -y curl
fi

sudo sh -c "curl https://raw.githubusercontent.com/kadwanev/retry/master/retry -o /usr/local/bin/retry && chmod +x /usr/local/bin/retry"

sudo retry "$PKG_MANAGER update -y"

if ! command -v jq 2>/dev/null; then
echo "Installing jq"
sudo retry "$PKG_MANAGER install -y jq"
fi
if ! command -v git 2>/dev/null; then
echo "Installing git"
sudo retry "$PKG_MANAGER install -y git"
fi
if ! command -v pip3 2>/dev/null; then
echo "Installing git"
sudo retry "$PKG_MANAGER install -y pip"
fi

%{ if enable_cloudwatch_agent ~}
sudo yum install amazon-cloudwatch-agent -y
sudo retry "$PKG_MANAGER install amazon-cloudwatch-agent -y"
amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c ssm:${ssm_key_cloudwatch_agent_config}
%{ endif ~}

# Install docker
if [ "$(uname -m)" == "aarch64" ]; then
sudo yum install -y docker
sudo retry "$PKG_MANAGER install -y docker"
else
if command -v amazon-linux-extras 2>/dev/null; then
echo "Installing docker using amazon-linux-extras"
sudo amazon-linux-extras install docker
sudo retry "amazon-linux-extras install docker"
else
echo "Installing docker using dnf"
sudo dnf install docker -y
sudo retry "dnf install docker -y"
fi
fi

service docker start
usermod -a -G docker ec2-user

if ! command -v curl 2>/dev/null; then
echo "Installing curl"
sudo yum install -y curl
fi
if ! command -v jq 2>/dev/null; then
echo "Installing jq"
sudo yum install -y jq
fi
if ! command -v git 2>/dev/null; then
echo "Installing git"
sudo yum install -y git
fi
if ! command -v pip3 2>/dev/null; then
echo "Installing git"
sudo yum install -y pip
fi

USER_NAME=ec2-user
${install_config_runner}

sudo retry "$PKG_MANAGER groupinstall -y 'Development Tools'"
sudo retry "$PKG_MANAGER install -y 'kernel-devel-uname-r == $(uname -r)'"

echo Checking if nvidia install required ${nvidia_driver_install}
%{ if nvidia_driver_install ~}
set +e

os_id=$(. /etc/os-release;echo $ID$VERSION_ID)
if [[ "$os_id" =~ ^amzn.* ]]; then
if [[ "$os_id" =~ "amzn2023" ]] ; then
echo "NVIDIA driver install required"
if [[ "$OS_ID" =~ ^amzn.* ]]; then
if [[ "$OS_ID" =~ "amzn2023" ]] ; then
echo "On Amazon Linux 2023, installing kernel-modules-extra"
sudo dnf install kernel-modules-extra -y
sudo retry "dnf install kernel-modules-extra -y"
fi
echo Installing Development Tools
sudo yum groupinstall -y "Development Tools"
sudo yum install -y "kernel-devel-uname-r == $(uname -r)"
sudo modprobe backlight
fi
sudo curl -fsL -o /tmp/nvidia_driver "https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-550.54.15.run"
sudo /bin/bash /tmp/nvidia_driver -s --no-drm
sudo retry "curl -fsL -o /tmp/nvidia_driver 'https://s3.amazonaws.com/ossci-linux/nvidia_driver/NVIDIA-Linux-x86_64-550.54.15.run'"
sudo retry "/bin/bash /tmp/nvidia_driver -s --no-drm"
sudo rm -fv /tmp/nvidia_driver
if [[ "$os_id" =~ ^amzn.* ]]; then
if [[ "$OS_ID" =~ ^amzn.* ]]; then
if [[ "$OS_ID" == ^amzn2023* ]]; then
sudo retry "dnf install -y dnf-plugins-core"
sudo retry "dnf config-manager --add-repo 'https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo'"
else
sudo retry "yum install -y yum-utils"
sudo retry "yum-config-manager --add-repo 'https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo'"
fi
echo Installing nvidia-docker tools
sudo yum install -y yum-utils
sudo yum-config-manager --add-repo https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo
sudo yum install -y nvidia-docker2
sudo retry "$PKG_MANAGER install -y nvidia-docker2"
sudo systemctl restart docker
fi
set -e
%{ endif ~}

${post_install}
Expand Down

0 comments on commit c266a95

Please sign in to comment.