Skip to content

Commit

Permalink
Merge branch 'master' into tohtana/support_autocast
Browse files Browse the repository at this point in the history
  • Loading branch information
tohtana authored Feb 28, 2025
2 parents 22e5ba9 + 02bbf50 commit 453cc16
Show file tree
Hide file tree
Showing 63 changed files with 1,080 additions and 333 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cpu-torch-latest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
git clone https://github.com/huggingface/transformers
cd transformers
# if needed switch to the last known good SHA until transformers@master is fixed
# git checkout 6c3f168b3
git checkout 981c276
git rev-parse --short HEAD
pip install .
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/hpu-gaudi2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ jobs:
git clone https://github.com/huggingface/transformers
cd transformers
# if needed switch to the last known good SHA until transformers@master is fixed
git checkout 6c3f168b3
git checkout 981c276
git rev-parse --short HEAD
pip install .
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/no-torch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,12 @@ jobs:
run: |
pip uninstall torch --yes
pip install setuptools
pip install build
pip list
- name: Build deepspeed
run: |
DS_BUILD_STRING=" " python setup.py sdist
DS_BUILD_STRING=" " python -m build --sdist
- name: Open GitHub issue if nightly CI fails
if: ${{ failure() && (github.event_name == 'schedule') }}
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/nv-a6000.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ jobs:
run: |
git clone https://github.com/huggingface/transformers
cd transformers
git checkout v4.47.1
# if you need to use an older transformers version temporarily in case of breakage
git checkout 981c276
git rev-parse --short HEAD
python -m pip install .
- name: Install deepspeed
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/nv-accelerate-v100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ concurrency:

jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]

steps:
- uses: actions/checkout@v4
Expand All @@ -29,7 +29,7 @@ jobs:

- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/nv-ds-chat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ permissions:

jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]

steps:
- uses: actions/checkout@v4
Expand All @@ -37,13 +37,13 @@ jobs:

- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
- name: Install deepspeed
run: |
pip install transformers
pip install transformers==4.48.3
pip install .[dev]
ds_report
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/nv-inference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ concurrency:

jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]

steps:
- uses: actions/checkout@v4
Expand All @@ -32,7 +32,7 @@ jobs:

- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch==2.1.2 torchvision==0.16.2 --index-url https://download.pytorch.org/whl/cu121
pip install -U --cache-dir $TORCH_CACHE torch==2.1.2 torchvision==0.16.2 --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand All @@ -58,8 +58,8 @@ jobs:
run: |
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests
#pytest $PYTEST_OPTS -m 'seq_inference' unit/ --torch_ver="2.1" --cuda_ver="12.1"
pytest $PYTEST_OPTS -m 'inference_ops' unit/ --torch_ver="2.1" --cuda_ver="12.1"
pytest $PYTEST_OPTS --forked -n 4 -m 'inference' unit/ --torch_ver="2.1" --cuda_ver="12.1"
#pytest $PYTEST_OPTS -m 'seq_inference' unit/ --torch_ver="2.1" --cuda_ver="12.4"
pytest $PYTEST_OPTS -m 'inference_ops' unit/ --torch_ver="2.1" --cuda_ver="12.4"
pytest $PYTEST_OPTS --forked -n 4 -m 'inference' unit/ --torch_ver="2.1" --cuda_ver="12.4"
# run ds_report again to check updated op list
ds_report
4 changes: 2 additions & 2 deletions .github/workflows/nv-lightning-v100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ concurrency:

jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]

steps:
- uses: actions/checkout@v4
Expand All @@ -29,7 +29,7 @@ jobs:

- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/nv-mii.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ concurrency:

jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]

steps:
- uses: actions/checkout@v4
Expand All @@ -37,7 +37,7 @@ jobs:

- name: Install pytorch
run: |
pip3 install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip3 install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/nv-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ permissions:

jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]

steps:
- uses: actions/checkout@v4
Expand All @@ -28,7 +28,7 @@ jobs:

- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand Down Expand Up @@ -58,7 +58,7 @@ jobs:
run: |
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests
pytest $PYTEST_OPTS --forked -m 'nightly' unit/ --torch_ver="2.5" --cuda_ver="12.1"
pytest $PYTEST_OPTS --forked -m 'nightly' unit/ --torch_ver="2.6" --cuda_ver="12.4"
- name: Open GitHub issue if nightly CI fails
if: ${{ failure() && (github.event_name == 'schedule') }}
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/nv-torch-latest-v100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ concurrency:

jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]

steps:
- uses: actions/checkout@v4
Expand All @@ -29,7 +29,7 @@ jobs:

- name: Install pytorch
run: |
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu121
pip install -U --cache-dir $TORCH_CACHE torch torchvision --index-url https://download.pytorch.org/whl/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand All @@ -38,7 +38,7 @@ jobs:
git clone https://github.com/huggingface/transformers
cd transformers
# if needed switch to the last known good SHA until transformers@master is fixed
# git checkout 6c3f168b3
git checkout 981c276
git rev-parse --short HEAD
pip install .
Expand All @@ -55,5 +55,5 @@ jobs:
run: |
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests
pytest $PYTEST_OPTS --forked -n 4 unit/ --torch_ver="2.5" --cuda_ver="12.1"
pytest $PYTEST_OPTS --forked -m 'sequential' unit/ --torch_ver="2.5" --cuda_ver="12.1"
pytest $PYTEST_OPTS --forked -n 8 unit/ --torch_ver="2.6" --cuda_ver="12.4"
pytest $PYTEST_OPTS --forked -m 'sequential' unit/ --torch_ver="2.6" --cuda_ver="12.4"
14 changes: 7 additions & 7 deletions .github/workflows/nv-torch-nightly-v100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ name: nv-torch-nightly-v100

on:
workflow_dispatch:
pull_request:
paths:
- '.github/workflows/nv-torch-nightly-v100.yml'
schedule:
- cron: "0 0 * * *"
pull_request:
paths:
- '.github/workflows/nv-torch-nightly-v100.yml'

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
Expand All @@ -18,7 +18,7 @@ permissions:

jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
runs-on: [self-hosted, nvidia, cu124, v100]

steps:
- uses: actions/checkout@v4
Expand All @@ -28,7 +28,7 @@ jobs:

- name: Install pytorch
run: |
pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu121
pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu124
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
Expand All @@ -37,7 +37,7 @@ jobs:
git clone https://github.com/huggingface/transformers
cd transformers
# if needed switch to the last known good SHA until transformers@master is fixed
# git checkout 6c3f168b3
git checkout 981c276
git rev-parse --short HEAD
pip install .
Expand All @@ -54,7 +54,7 @@ jobs:
run: |
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests
pytest $PYTEST_OPTS --forked -n 4 unit/
pytest $PYTEST_OPTS --forked -n 8 unit/
pytest $PYTEST_OPTS --forked -m 'sequential' unit/
- name: Open GitHub issue if nightly CI fails
Expand Down
65 changes: 0 additions & 65 deletions .github/workflows/nv-torch110-p40.yml

This file was deleted.

66 changes: 0 additions & 66 deletions .github/workflows/nv-torch110-v100.yml

This file was deleted.

Loading

0 comments on commit 453cc16

Please sign in to comment.