[v1.12.x-aws] .ci/aws: Get rid of ODCR/Region/PlacementGroup from Jen… #74
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: PR CI | |
on: [push, pull_request] | |
env: | |
APT_PACKAGES: >- | |
build-essential | |
git | |
libhwloc-dev | |
make | |
# note, related to issue around actions/checkout@v4, linked below. This | |
# environment variable is also now needed, as of july 2024. | |
# ref: https://github.com/actions/runner/issues/2906#issuecomment-2208546951 | |
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: 'true' | |
concurrency: | |
group: ${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
amazonlinux: | |
strategy: | |
matrix: | |
sdk: | |
- cuda | |
container: | |
- public.ecr.aws/amazonlinux/amazonlinux:2023 | |
- public.ecr.aws/amazonlinux/amazonlinux:2 | |
efainstaller: | |
- latest | |
- 1.32.0 | |
- 1.31.0 | |
- 1.30.0 | |
include: | |
- container: public.ecr.aws/amazonlinux/amazonlinux:2023 | |
displayname: al2023 | |
efainstallerdir: ALINUX2023 | |
nvidiadistro: amzn2023 | |
configmanager: dnf config-manager | |
cudapackages: cuda-cudart-devel-12-5 cuda-driver-devel-12-5 | |
- container: public.ecr.aws/amazonlinux/amazonlinux:2 | |
displayname: al2 | |
efainstallerdir: ALINUX2 | |
nvidiadistro: rhel7 | |
configmanager: yum-config-manager | |
cudapackages: cuda-cudart-devel-12-4 cuda-driver-devel-12-4 | |
runs-on: ubuntu-latest | |
container: ${{ matrix.container }} | |
name: ${{ matrix.displayname }}/${{ matrix.sdk }}/efa@${{ matrix.efainstaller }}/makeinstall | |
steps: | |
- run: | | |
yum -y update && yum -y install git tar util-linux findutils yum-utils | |
# note, do not bump to v4: https://github.com/actions/checkout/issues/1590 | |
- uses: actions/checkout@v3 | |
- name: Fetch and Install EFA Installer Dependencies | |
run: | | |
curl -O https://efa-installer.amazonaws.com/aws-efa-installer-${{ matrix.efainstaller }}.tar.gz | |
tar -xf aws-efa-installer-*.tar.gz | |
cd aws-efa-installer/RPMS/${{ matrix.efainstallerdir }}/x86_64 | |
find . | grep rpm$ | xargs yum -y localinstall | |
- name: Install hwloc, utilities. | |
run: | | |
yum -y install hwloc-devel autoconf automake libtool gcc g++ git make | |
- name: Install CUDA | |
run: | | |
${{ matrix.configmanager }} --add-repo \ | |
http://developer.download.nvidia.com/compute/cuda/repos/${{ matrix.nvidiadistro }}/x86_64/cuda-${{ matrix.nvidiadistro }}.repo \ | |
--save | |
yum -y clean expire-cache | |
yum -y install ${{ matrix.cudapackages }} | |
- name: Call `autoreconf -ivf` | |
run: | | |
./autogen.sh | |
- name: Call `./configure` | |
run: | | |
./configure --prefix=/opt/aws-ofi-nccl --with-mpi=/opt/amazon/openmpi \ | |
--with-libfabric=/opt/amazon/efa \ | |
--with-cuda=/usr/local/cuda \ | |
--enable-tests=no \ | |
--enable-platform-aws | |
- name: Call `make` | |
run: make -j | |
- name: Call `make install` | |
run: make install | |
distcheck: | |
runs-on: ubuntu-22.04 | |
strategy: | |
matrix: | |
cc-variant: | |
- latest | |
- legacy | |
cc: | |
- gcc | |
- clang | |
tracing: | |
- lttng | |
- none | |
sdk: | |
- cuda | |
- neuron | |
include: | |
- cc-variant: latest | |
cc: clang | |
cc-version: 19 | |
- cc-variant: latest | |
cc: gcc | |
cc-version: 13 | |
name: u2204/${{ matrix.sdk }}/libfabric@git/${{matrix.cc}}(${{matrix.cc-variant}})/distcheck/ | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Configure Neuron SDK Repository | |
if: matrix.sdk == 'neuron' | |
run: | | |
# Configure Linux for Neuron repository updates | |
sudo tee /etc/apt/sources.list.d/neuron.list > /dev/null << EOF | |
deb https://apt.repos.neuron.amazonaws.com jammy main | |
EOF | |
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add - | |
sudo apt update -y | |
- name: Add compiler repositories | |
if: matrix.cc-variant == 'latest' | |
run: | | |
if [ "${{ matrix.cc }}" == "clang" ]; then | |
wget https://apt.llvm.org/llvm.sh | |
chmod +x llvm.sh | |
# Delete the last line, allowing us to use the cache below for | |
# actually installing the package; this just adds the | |
# repository. | |
sed -i '$ d' llvm.sh | |
sudo ./llvm.sh ${{ matrix.cc-version }} | |
fi | |
if [ "${{ matrix.cc }}" == "gcc" ]; then | |
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test | |
fi | |
- name: Install Latest Compiler | |
uses: awalsh128/cache-apt-pkgs-action@latest | |
if: matrix.cc-variant == 'latest' | |
with: | |
packages: ${{ matrix.cc }}-${{matrix.cc-version}} | |
version: compiler-${{ matrix.cc }}-${{matrix.cc-version}} | |
- name: Install Base Dependencies | |
uses: awalsh128/cache-apt-pkgs-action@latest | |
with: | |
packages: ${{ env.APT_PACKAGES }} | |
version: base-packages | |
- name: Install CUDA SDK | |
if: matrix.sdk == 'cuda' | |
uses: awalsh128/cache-apt-pkgs-action@latest | |
with: | |
packages: nvidia-cuda-toolkit | |
version: cuda-packages | |
- name: Install Neuron SDK | |
if: matrix.sdk == 'neuron' | |
uses: awalsh128/cache-apt-pkgs-action@latest | |
with: | |
packages: aws-neuronx-runtime-lib | |
version: neuron-packages | |
- name: Install lttng | |
uses: awalsh128/cache-apt-pkgs-action@latest | |
if: matrix.tracing == 'lttng' | |
with: | |
packages: liblttng-ust-dev | |
version: lttng | |
- name: Fetch and Install EFA Installer Dependencies | |
run: | | |
curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz | |
tar -xf aws-efa-installer-*.tar.gz | |
pushd aws-efa-installer/ | |
sudo ./efa_installer.sh -y --skip-kmod | |
popd | |
- name: Build Plugin | |
run: | | |
set -x | |
export CC="${{ matrix.realcc || matrix.cc }}" | |
# actions/checkout@v4 would drop the plugin source in $PWD, | |
# so go ahead and build it. | |
./autogen.sh | |
if [ "${{ matrix.sdk }}" == "cuda" ] | |
then | |
./configure --with-mpi=/opt/amazon/openmpi \ | |
--with-libfabric=/opt/amazon/efa \ | |
--with-cuda=/usr/local/cuda/ \ | |
--enable-tests \ | |
--enable-platform-aws | |
else | |
./configure --with-libfabric=/opt/amazon/efa \ | |
--enable-neuron \ | |
--enable-platform-aws | |
fi | |
make -j "$(nproc)" | |
- name: Run Dist Check | |
run: make distcheck | |
- name: Upload build logs | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ matrix.cc }}-${{ matrix.cc-variant }}-${{ matrix.sdk }}-config.log | |
path: config.log | |
if-no-files-found: ignore | |
unit-tests: | |
runs-on: ubuntu-22.04 | |
strategy: | |
matrix: | |
cc-variant: | |
- latest | |
- legacy | |
cc: | |
- gcc | |
- clang | |
sdk: | |
- cuda | |
- neuron | |
include: | |
- cc-variant: latest | |
cc: clang | |
cc-version: 19 | |
- cc-variant: latest | |
cc: gcc | |
cc-version: 13 | |
name: u2204/${{ matrix.sdk }}/${{matrix.cc}}(${{matrix.cc-variant}})/unit-tests/ | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Configure Neuron SDK Repository | |
if: matrix.sdk == 'neuron' | |
run: | | |
# Configure Linux for Neuron repository updates | |
sudo tee /etc/apt/sources.list.d/neuron.list > /dev/null << EOF | |
deb https://apt.repos.neuron.amazonaws.com jammy main | |
EOF | |
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add - | |
sudo apt update -y | |
- name: Add compiler repositories | |
if: matrix.cc-variant == 'latest' | |
run: | | |
if [ "${{ matrix.cc }}" == "clang" ]; then | |
wget https://apt.llvm.org/llvm.sh | |
chmod +x llvm.sh | |
# Delete the last line, allowing us to use the cache below for | |
# actually installing the package; this just adds the | |
# repository. | |
sed -i '$ d' llvm.sh | |
sudo ./llvm.sh ${{ matrix.cc-version }} | |
fi | |
if [ "${{ matrix.cc }}" == "gcc" ]; then | |
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test | |
fi | |
- name: Install Latest Compiler | |
uses: awalsh128/cache-apt-pkgs-action@latest | |
if: matrix.cc-variant == 'latest' | |
with: | |
packages: ${{ matrix.cc }}-${{matrix.cc-version}} | |
version: compiler-${{ matrix.cc }}-${{matrix.cc-version}} | |
- name: Install Base Dependencies | |
uses: awalsh128/cache-apt-pkgs-action@latest | |
with: | |
packages: ${{ env.APT_PACKAGES }} | |
version: base-packages | |
- name: Install CUDA SDK | |
if: matrix.sdk == 'cuda' | |
uses: awalsh128/cache-apt-pkgs-action@latest | |
with: | |
packages: nvidia-cuda-toolkit | |
version: cuda-packages | |
- name: Install Neuron SDK | |
if: matrix.sdk == 'neuron' | |
uses: awalsh128/cache-apt-pkgs-action@latest | |
with: | |
packages: aws-neuronx-runtime-lib | |
version: neuron-packages | |
- name: Fetch and Install EFA Installer Dependencies | |
run: | | |
curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz | |
tar -xf aws-efa-installer-*.tar.gz | |
pushd aws-efa-installer/ | |
sudo ./efa_installer.sh -y --skip-kmod | |
popd | |
- name: Build Plugin | |
run: | | |
set -x | |
export CC="${{ matrix.realcc || matrix.cc }}" | |
# actions/checkout@v4 would drop the plugin source in $PWD, | |
# so go ahead and build it. | |
./autogen.sh | |
if [ "${{ matrix.sdk }}" == "cuda" ] | |
then | |
./configure --with-mpi=/opt/amazon/openmpi \ | |
--with-libfabric=/opt/amazon/efa \ | |
--with-cuda=/usr/local/cuda/ \ | |
--enable-tests \ | |
--enable-debug \ | |
--enable-platform-aws | |
else | |
./configure --with-libfabric=/opt/amazon/efa \ | |
--enable-neuron \ | |
--enable-debug \ | |
--enable-platform-aws | |
fi | |
make -j "$(nproc)" | |
- name: Run unit tests | |
run: make check | |
- name: Upload build logs | |
if: failure() | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ matrix.cc }}-${{ matrix.cc-variant }}-${{ matrix.sdk }}-config.log | |
path: config.log | |
if-no-files-found: ignore | |
codechecker: | |
runs-on: ubuntu-22.04 | |
needs: [distcheck] | |
strategy: | |
matrix: | |
sdk: | |
- cuda | |
- neuron | |
name: CodeChecker - ${{ matrix.sdk }} | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: '3.9' | |
- name: Configure Neuron SDK Repository | |
if: matrix.sdk == 'neuron' | |
run: | | |
# Configure Linux for Neuron repository updates | |
sudo tee /etc/apt/sources.list.d/neuron.list > /dev/null << EOF | |
deb https://apt.repos.neuron.amazonaws.com jammy main | |
EOF | |
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add - | |
sudo apt update -y | |
- name: Install Base Dependencies | |
uses: awalsh128/cache-apt-pkgs-action@latest | |
with: | |
packages: ${{ env.APT_PACKAGES }} | |
version: base-packages | |
- name: Install CUDA SDK | |
if: matrix.sdk == 'cuda' | |
uses: awalsh128/cache-apt-pkgs-action@latest | |
with: | |
packages: nvidia-cuda-toolkit | |
version: cuda-packages | |
- name: Install Neuron SDK | |
if: matrix.sdk == 'neuron' | |
uses: awalsh128/cache-apt-pkgs-action@latest | |
with: | |
packages: aws-neuronx-runtime-lib | |
version: neuron-packages | |
- name: Install cppcheck | |
uses: awalsh128/cache-apt-pkgs-action@latest | |
with: | |
packages: cppcheck | |
version: codechecker-cppcheck | |
- name: Fetch and Install EFA Installer Dependencies | |
run: | | |
curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz | |
tar -xf aws-efa-installer-*.tar.gz | |
pushd aws-efa-installer/ | |
sudo ./efa_installer.sh -y --skip-kmod | |
popd | |
- name: Run Configure | |
run: | | |
./autogen.sh | |
if [ "${{ matrix.sdk }}" == "neuron" ]; then | |
./configure \ | |
--with-libfabric="/opt/amazon/efa" \ | |
--enable-neuron \ | |
--enable-platform-aws | |
else | |
./configure \ | |
--with-libfabric="/opt/amazon/efa" \ | |
--with-mpi="/opt/amazon/openmpi" \ | |
--with-cuda=/usr/local/cuda/ \ | |
--enable-tests \ | |
--enable-platform-aws | |
fi | |
- name: Run CodeChecker | |
uses: whisperity/codechecker-analysis-action@v1 | |
id: codechecker | |
with: | |
build-command: make | |
ctu: true | |
config: .github/codechecker.yaml | |
install-custom: true | |
version: v6.23.1 | |
llvm-version: '18' | |
- name: Save CodeChecker HTML output. | |
uses: actions/upload-artifact@v4 | |
with: | |
name: CodeChecker Bug Reports for ${{ matrix.sdk }} | |
path: ${{ steps.codechecker.outputs.result-html-dir }}/*.html | |
- name: CodeChecker Pass Or Fail? | |
if: steps.codechecker.outputs.warnings-in-diff == 'true' | |
shell: bash | |
run: | | |
echo "::error title=Static Analyzers Failed::Analysed commit(s) caused static analysis warnings" | |
exit 0 |