From 46a5792915666d818dcd1a0e7f91684c3f3f75d9 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 14 Aug 2024 01:35:01 +0900 Subject: [PATCH] Bug Fix - Update Docker Exec Command for Persistent HPCX Environment (#635) Add 10-hpcx.sh to /etc/profile.d Update the Docker exec command to ensure a persistent HPCX environment. --- dockerfile/cuda12.2.dockerfile | 2 +- dockerfile/cuda12.4.dockerfile | 2 +- superbench/runner/runner.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dockerfile/cuda12.2.dockerfile b/dockerfile/cuda12.2.dockerfile index 9c58c0eca..b53fe1c7c 100644 --- a/dockerfile/cuda12.2.dockerfile +++ b/dockerfile/cuda12.2.dockerfile @@ -131,7 +131,7 @@ ENV PATH="${PATH}" \ RUN echo PATH="$PATH" > /etc/environment && \ echo LD_LIBRARY_PATH="$LD_LIBRARY_PATH" >> /etc/environment && \ echo SB_MICRO_PATH="$SB_MICRO_PATH" >> /etc/environment && \ - echo "source /opt/hpcx/hpcx-init.sh && hpcx_load" >> /etc/bash.bashrc + echo "source /opt/hpcx/hpcx-init.sh && hpcx_load" | tee -a /etc/bash.bashrc >> /etc/profile.d/10-hpcx.sh # Add config files ADD dockerfile/etc /opt/microsoft/ diff --git a/dockerfile/cuda12.4.dockerfile b/dockerfile/cuda12.4.dockerfile index 777f9a401..984c6ee01 100644 --- a/dockerfile/cuda12.4.dockerfile +++ b/dockerfile/cuda12.4.dockerfile @@ -125,7 +125,7 @@ ENV PATH="${PATH}" \ RUN echo PATH="$PATH" > /etc/environment && \ echo LD_LIBRARY_PATH="$LD_LIBRARY_PATH" >> /etc/environment && \ echo SB_MICRO_PATH="$SB_MICRO_PATH" >> /etc/environment && \ - echo "source /opt/hpcx/hpcx-init.sh && hpcx_load" >> /etc/bash.bashrc + echo "source /opt/hpcx/hpcx-init.sh && hpcx_load" | tee -a /etc/bash.bashrc >> /etc/profile.d/10-hpcx.sh # Add config files ADD dockerfile/etc /opt/microsoft/ diff --git a/superbench/runner/runner.py b/superbench/runner/runner.py index cd0c8c4dc..a1d3e50c6 100644 --- a/superbench/runner/runner.py +++ b/superbench/runner/runner.py @@ -205,7 +205,7 @@ def run_sys_info(self): logger.info('Runner is going to get node system info.') - fcmd = "docker exec sb-workspace bash -c '{command}'" + fcmd = "docker exec sb-workspace bash -lc '{command}'" if 'skip' not in self._docker_config: self._docker_config.skip = False @@ -462,7 +462,7 @@ def _run_proc(self, benchmark_name, mode, vars): envvar = f'{k}={str(v).format(proc_rank=mode.proc_rank, proc_num=mode.proc_num)}' env_list += f' -e {envvar}' if not self._docker_config.skip else f' && export {envvar}' - fcmd = "docker exec {env_list} sb-workspace bash -c '{command}'" + fcmd = "docker exec {env_list} sb-workspace bash -lc '{command}'" if self._docker_config.skip: fcmd = "bash -c '{env_list} && cd $SB_WORKSPACE && {command}'" ansible_runner_config = self._ansible_client.get_shell_config(