Skip to content

Commit

Permalink
add stable diffusion CI workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
mrwyattii committed Feb 14, 2024
1 parent 8e0c7f1 commit 4c6a302
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 10 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/nv-a6000-fastgen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ on:
paths-ignore:
- 'mii/legacy/**'
- 'tests/legacy/**'
- '.github/workflows/nv-torch-latest-v100.yml'
- '.github/workflows/nv-v100-legacy.yml'
- '.github/workflows/nv-a6000-sd.yml'

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
Expand Down
58 changes: 58 additions & 0 deletions .github/workflows/nv-a6000-sd.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: nv-a6000-sd

on:
workflow_dispatch:
schedule:
- cron: "0 0 * * *"
pull_request:
paths:
- 'mii/legacy/**'
- 'tests/legacy/**'
- '.github/workflows/nv-a6000-sd.yml'

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
unit-tests:
runs-on: [self-hosted, nvidia, a6000]
container:
image: nvcr.io/nvidia/pytorch:23.03-py3
ports:
- 80
options: --gpus all --shm-size "8G"

steps:
- uses: actions/checkout@v4

- name: Check container state
run: |
ldd --version
nvcc --version
nvidia-smi
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
- name: Install transformers
run: |
git clone --depth=1 https://github.com/huggingface/transformers
cd transformers
git rev-parse --short HEAD
python -m pip install .
- name: Install deepspeed
run: |
git clone --depth=1 https://github.com/microsoft/DeepSpeed
cd DeepSpeed
python -m pip install .
ds_report
- name: Install MII
run: |
pip install .[dev]
- name: Python environment
run: |
python -m pip list
- name: Unit tests
run: |
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests/legacy
python -m pytest --color=yes --durations=0 --verbose -rF -m "stable_diffusion" ./
2 changes: 1 addition & 1 deletion .github/workflows/nv-v100-legacy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
- 'mii/__init__.py'
- 'mii/legacy/**'
- 'tests/legacy/**'
- '.github/workflows/nv-torch-latest-v100.yml'
- '.github/workflows/nv-v100-legacy.yml'
- 'requirements/**'
- 'setup.py'

Expand Down
1 change: 1 addition & 0 deletions requirements/requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
clang-format==16.0.2
diffusers
pre-commit>=2.20.0
pytest
pytest-forked
35 changes: 29 additions & 6 deletions tests/legacy/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import os
import mii.legacy as mii
from types import SimpleNamespace
from packaging import version as pkg_version
import torch


@pytest.fixture(scope="function", params=["fp16"])
Expand Down Expand Up @@ -84,11 +86,9 @@ def ds_config(request):
return request.param


@pytest.fixture(scope="function")
def replace_with_kernel_inject(model_name):
if "clip-vit" in model_name:
return False
return True
@pytest.fixture(scope="function", params=[True])
def replace_with_kernel_inject(request):
return request.param


@pytest.fixture(scope="function")
Expand Down Expand Up @@ -145,8 +145,31 @@ def expected_failure(request):
return request.param


@pytest.fixture(scope="function", params=[None])
def min_compute_capability(request):
return request.param


@pytest.fixture(scope="function")
def meets_compute_capability_reqs(min_compute_capability):
if min_compute_capability is None:
return
min_compute_ver = pkg_version.parse(str(min_compute_capability))
device_compute_ver = pkg_version.parse(".".join(
map(str,
torch.cuda.get_device_capability())))
if device_compute_ver < min_compute_ver:
pytest.skip(
f"Skipping test because device compute capability ({device_compute_ver}) is less than the minimum required ({min_compute_ver})."
)


@pytest.fixture(scope="function")
def deployment(deployment_name, mii_config, model_config, expected_failure):
def deployment(deployment_name,
mii_config,
model_config,
expected_failure,
meets_compute_capability_reqs):
if expected_failure is not None:
with pytest.raises(expected_failure) as excinfo:
mii.deploy(
Expand Down
2 changes: 1 addition & 1 deletion tests/legacy/pytest.ini
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[pytest]
markers =
deepspeed:Run test for deepspeed CI
stable_diffusion:Run Stable Diffusion tests
24 changes: 23 additions & 1 deletion tests/legacy/test_local_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def test_single_GPU(deployment, query):


@pytest.mark.parametrize(
"task_name, model_name, query",
"task_name, model_name, query, tensor_parallel",
[
(
"text-generation",
Expand All @@ -92,6 +92,7 @@ def test_single_GPU(deployment, query):
"query": ["DeepSpeed is the greatest",
"Seattle is"]
},
2,
),
],
)
Expand Down Expand Up @@ -121,3 +122,24 @@ def test_session(deployment, query):
result = generator.query(query)
generator.destroy_session(session_name)
assert result


@pytest.mark.stable_diffusion
@pytest.mark.parametrize(
"task_name, model_name, query",
[
(
"text-to-image",
"openskyml/midjourney-mini",
{
"query": ["a dog on a rocket"]
},
),
],
)
@pytest.mark.parametrize("min_compute_capability", [8])
def test_stable_diffusion(deployment, query):
print(deployment)
generator = mii.mii_query_handle(deployment)
result = generator.query(query)
assert result

0 comments on commit 4c6a302

Please sign in to comment.