Skip to content

Commit

Permalink
[CI] Add e2e CI (#60)
Browse files Browse the repository at this point in the history
* add e2e pipeline

* install runtime

* fix cuda

* add cancel in progress if new push

* fix some unit tests

* clean up

* add A16

* try to debug github machine nvidia-smi

* force reinstall

* fix runtime cuda install
  • Loading branch information
zhekunz2 authored Sep 15, 2023
1 parent f3420fc commit 616d940
Show file tree
Hide file tree
Showing 13 changed files with 129 additions and 25 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/compiler-and-runtime-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ on:
- "scripts/frontends/**"
workflow_dispatch:

# Ensure that only a single job or workflow using the same
# concurrency group will run at a time. This would cancel
# any in-progress jobs in the same github workflow and github
# ref (e.g. refs/heads/main or refs/pull/<pr_number>/merge).
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
clear_workspace:
name: Clear workspace
Expand Down
31 changes: 31 additions & 0 deletions .github/workflows/e2e_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: e2e Numerical test CI

on:
push:
branches:
- main
pull_request:
branches:
- main
workflow_dispatch:

# Ensure that only a single job or workflow using the same
# concurrency group will run at a time. This would cancel
# any in-progress jobs in the same github workflow and github
# ref (e.g. refs/heads/main or refs/pull/<pr_number>/merge).
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
numerical_e2e_test:
name: e2e CI
runs-on: self-hosted
steps:
- name: clear workspace
run: rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Checkout byteir repo
uses: actions/checkout@v3
- name: Build and test e2e
run: ./scripts/e2e/build_and_test_e2e.sh ${{ secrets.LLVM_INSTALL_DIR }} ${{ secrets.TORCH_FRONTEND_LLVM_INSTALL_DIR }}
shell: bash
8 changes: 8 additions & 0 deletions .github/workflows/torch-frontend-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ on:
- "scripts/frontends/torch-frontend/**"
workflow_dispatch:

# Ensure that only a single job or workflow using the same
# concurrency group will run at a time. This would cancel
# any in-progress jobs in the same github workflow and github
# ref (e.g. refs/heads/main or refs/pull/<pr_number>/merge).
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
torch_frontend_build_and_test:
name: torch-frontend CI
Expand Down
3 changes: 2 additions & 1 deletion compiler/python/byteir/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def _detect_cuda_with_nvidia_smi():
sm_names = {
"sm_70": ["V100"],
"sm_75": ["T4", "Quadro T2000"],
"sm_80": ["PG509", "A100", "A10", "RTX 30", "A30", "RTX 40"],
"sm_80": ["PG509", "A100", "A10", "RTX 30", "A30", "RTX 40", "A16"],
"sm_90": ["H100"],
}
for sm, names in sm_names.items():
Expand Down Expand Up @@ -222,6 +222,7 @@ def compile_cuda_with_ait(
if verbose:
_print_verbose(device_module, "// IR Dump After NVVM Codegen:")
# write to output device ptx
assert _detect_cuda_with_nvidia_smi() != None
byteir.translate_to_ptx(device_module.operation, output_file_dir + "/" + output_file_name, _detect_cuda_with_nvidia_smi())

with context:
Expand Down
30 changes: 30 additions & 0 deletions scripts/e2e/build_and_test_e2e.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

set -e
set -x

CUR_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
# path to byteir root
ROOT_PROJ_DIR="$CUR_DIR/../.."

LLVM_INSTALL_DIR="$1"

TORCH_FRONTEND_LLVM_INSTALL_DIR="$2"

pushd $ROOT_PROJ_DIR
# build compiler
bash scripts/compiler/build_and_lit_test.sh $LLVM_INSTALL_DIR
# build runtime
bash scripts/runtime/build_and_test.sh --cuda --python --no-test $LLVM_INSTALL_DIR
# build torch_frontend
bash scripts/frontends/torch-frontend/build_and_test.sh $TORCH_FRONTEND_LLVM_INSTALL_DIR

pip3 install $ROOT_PROJ_DIR/external/AITemplate/python/dist/*.whl --force-reinstall
pip3 install $ROOT_PROJ_DIR/compiler/build/python/dist/*.whl --force-reinstall
pip3 install $ROOT_PROJ_DIR/runtime/python/dist/*.whl --force-reinstall
pip3 install $ROOT_PROJ_DIR/frontends/torch-frontend/build/torch-frontend/python/dist/*.whl --force-reinstall
pip3 install -r $ROOT_PROJ_DIR/frontends/torch-frontend/torch-requirements.txt

python3 tests/numerical_test/main.py
rm -rf ./local_test
popd
7 changes: 7 additions & 0 deletions scripts/runtime/build_and_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ cmake -GNinja \

cmake --build "$BUILD_DIR" --target all --target install

if [[ $BRT_ENABLE_PYTHON_BINDINGS == "ON" ]]; then
pushd $PROJ_DIR/python
# note: python packing depend on `--target install`
python3 setup.py bdist_wheel
popd
fi

if [[ $BRT_USE_CUDA == "ON" ]] && [[ $BRT_ENABLE_ASAN == "ON" ]]; then
export ASAN_OPTIONS=protect_shadow_gap=0
fi
Expand Down
1 change: 1 addition & 0 deletions tests/numerical_test/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def get_entry_func_name(interp):


def compile_and_run_mlir(mhlo_file, target):
np.random.seed(0)
try:
interp = Interpreter.load_from_file(mhlo_file)
np_inputs = generate_np_inputs(interp)
Expand Down
12 changes: 6 additions & 6 deletions tests/numerical_test/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@

SM80_PLUS_TESTS = [
"dot_f32.mlir",
"MatmulF32Module_basic",
"bmm_rrr_add_f32.mlir",
"bmm_rrr_f32.mlir",
"bmm_rrr_permute_f32.mlir",
"MatmulF32Module_basic",
"BatchMatmulAddF32Module_basic",
"BatchMatmulF32Module",
]


Expand All @@ -59,7 +59,7 @@ def _detect_cuda_with_nvidia_smi():
sm_names = {
70: ["V100"],
75: ["T4", "Quadro T2000"],
80: ["PG509", "A100", "A10", "RTX 30", "A30", "RTX 40"],
80: ["PG509", "A100", "A10", "RTX 30", "A30", "RTX 40", "A16"],
90: ["H100"],
}
for sm, names in sm_names.items():
Expand Down Expand Up @@ -120,8 +120,8 @@ def main():
elif args.config == 'torch':
results = run_torch_test(arch)
failed = report_results(results)
# TODO(zzk): use test infra for dynamo tests
run_torch_dynamo_tests(arch)
# TODO(zzk): disable flash attn test for now
# run_torch_dynamo_tests(arch)
sys.exit(1 if failed else 0)


Expand Down
6 changes: 0 additions & 6 deletions tests/numerical_test/mlir_tests/ops/bmm_rrr_add_f32.mlir

This file was deleted.

4 changes: 0 additions & 4 deletions tests/numerical_test/mlir_tests/ops/bmm_rrr_f32.mlir

This file was deleted.

10 changes: 5 additions & 5 deletions tests/numerical_test/mlir_tests/ops/bmm_rrr_permute_f32.mlir
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
func.func @bmm_rrr(%arg0 : tensor<12x256x256xf32>, %arg1 : tensor<12x256x64xf32>) -> tensor<1x256x12x64xf32> {
%0 = "mhlo.dot_general"(%arg0, %arg1) {dot_dimension_numbers = #mhlo.dot<lhs_batching_dimensions = [0], rhs_batching_dimensions = [0], lhs_contracting_dimensions = [2], rhs_contracting_dimensions = [1]>} : (tensor<12x256x256xf32>, tensor<12x256x64xf32>) -> tensor<12x256x64xf32>
%1 = mhlo.reshape %0 : (tensor<12x256x64xf32>) -> tensor<1x12x256x64xf32>
%2 = "mhlo.transpose"(%1) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x12x256x64xf32>) -> tensor<1x256x12x64xf32>
return %2 : tensor<1x256x12x64xf32>
func.func @bmm_rrr_permute_f32(%arg0: tensor<4x2x2xf32>, %arg1: tensor<4x2x2xf32>) -> tensor<2x2x2x2xf32> {
%0 = "mhlo.dot_general"(%arg0, %arg1) {dot_dimension_numbers = #mhlo.dot<lhs_batching_dimensions = [0], rhs_batching_dimensions = [0], lhs_contracting_dimensions = [2], rhs_contracting_dimensions = [1]>} : (tensor<4x2x2xf32>, tensor<4x2x2xf32>) -> tensor<4x2x2xf32>
%1 = mhlo.reshape %0 : (tensor<4x2x2xf32>) -> tensor<2x2x2x2xf32>
%2 = "mhlo.transpose"(%1) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<2x2x2x2xf32>) -> tensor<2x2x2x2xf32>
return %2 : tensor<2x2x2x2xf32>
}
6 changes: 3 additions & 3 deletions tests/numerical_test/mlir_tests/ops/dot_f32.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
func.func @gemm_rrr_f32(%arg0 : tensor<256x256xf32>, %arg1 : tensor<256x256xf32>) -> tensor<256x256xf32> {
%0 = "mhlo.dot"(%arg0, %arg1) : (tensor<256x256xf32>, tensor<256x256xf32>) -> tensor<256x256xf32>
return %0 : tensor<256x256xf32>
func.func @gemm_rrr_f32(%arg0 : tensor<4x4xf32>, %arg1 : tensor<4x4xf32>) -> tensor<4x4xf32> {
%0 = "mhlo.dot"(%arg0, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
return %0 : tensor<4x4xf32>
}
28 changes: 28 additions & 0 deletions tests/numerical_test/torch_e2e_testing/test_suite/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,31 @@ def forward(self, a, b):
@register_test_case(module_factory=lambda: MatmulF32Module())
def MatmulF32Module_basic(module, tu: TestUtils):
module.forward(tu.rand(5, 6), tu.rand(6, 10))


class BatchMatmulF32Module(torch.nn.Module):

def __init__(self):
super().__init__()

def forward(self, a, b):
return torch.bmm(a, b)


@register_test_case(module_factory=lambda: BatchMatmulF32Module())
def BatchMatmulF32Module_basic(module, tu: TestUtils):
module.forward(tu.rand(2, 5, 6), tu.rand(2, 6, 10))


class BatchMatmulAddF32Module(torch.nn.Module):

def __init__(self):
super().__init__()

def forward(self, a, b, c):
return c + torch.bmm(a, b)


@register_test_case(module_factory=lambda: BatchMatmulAddF32Module())
def BatchMatmulAddF32Module_basic(module, tu: TestUtils):
module.forward(tu.rand(2, 5, 6), tu.rand(2, 6, 10), tu.rand(2, 5, 10))

0 comments on commit 616d940

Please sign in to comment.