Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI] Add e2e CI #60

Merged
merged 10 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/compiler-and-runtime-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ on:
- "scripts/frontends/**"
workflow_dispatch:

# Ensure that only a single job or workflow using the same
# concurrency group will run at a time. This would cancel
# any in-progress jobs in the same github workflow and github
# ref (e.g. refs/heads/main or refs/pull/<pr_number>/merge).
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
clear_workspace:
name: Clear workspace
Expand Down
31 changes: 31 additions & 0 deletions .github/workflows/e2e_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: e2e Numerical test CI

on:
push:
branches:
- main
pull_request:
branches:
- main
workflow_dispatch:

# Ensure that only a single job or workflow using the same
# concurrency group will run at a time. This would cancel
# any in-progress jobs in the same github workflow and github
# ref (e.g. refs/heads/main or refs/pull/<pr_number>/merge).
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
numerical_e2e_test:
name: e2e CI
runs-on: self-hosted
steps:
- name: clear workspace
run: rm -rf $GITHUB_WORKSPACE && mkdir $GITHUB_WORKSPACE
- name: Checkout byteir repo
uses: actions/checkout@v3
- name: Build and test e2e
run: ./scripts/e2e/build_and_test_e2e.sh ${{ secrets.LLVM_INSTALL_DIR }} ${{ secrets.TORCH_FRONTEND_LLVM_INSTALL_DIR }}
shell: bash
8 changes: 8 additions & 0 deletions .github/workflows/torch-frontend-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ on:
- "scripts/frontends/torch-frontend/**"
workflow_dispatch:

# Ensure that only a single job or workflow using the same
# concurrency group will run at a time. This would cancel
# any in-progress jobs in the same github workflow and github
# ref (e.g. refs/heads/main or refs/pull/<pr_number>/merge).
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
torch_frontend_build_and_test:
name: torch-frontend CI
Expand Down
3 changes: 2 additions & 1 deletion compiler/python/byteir/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def _detect_cuda_with_nvidia_smi():
sm_names = {
"sm_70": ["V100"],
"sm_75": ["T4", "Quadro T2000"],
"sm_80": ["PG509", "A100", "A10", "RTX 30", "A30", "RTX 40"],
"sm_80": ["PG509", "A100", "A10", "RTX 30", "A30", "RTX 40", "A16"],
"sm_90": ["H100"],
}
for sm, names in sm_names.items():
Expand Down Expand Up @@ -222,6 +222,7 @@ def compile_cuda_with_ait(
if verbose:
_print_verbose(device_module, "// IR Dump After NVVM Codegen:")
# write to output device ptx
assert _detect_cuda_with_nvidia_smi() != None
byteir.translate_to_ptx(device_module.operation, output_file_dir + "/" + output_file_name, _detect_cuda_with_nvidia_smi())

with context:
Expand Down
30 changes: 30 additions & 0 deletions scripts/e2e/build_and_test_e2e.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

set -e
set -x

CUR_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
# path to byteir root
ROOT_PROJ_DIR="$CUR_DIR/../.."

LLVM_INSTALL_DIR="$1"

TORCH_FRONTEND_LLVM_INSTALL_DIR="$2"

pushd $ROOT_PROJ_DIR
# build compiler
bash scripts/compiler/build_and_lit_test.sh $LLVM_INSTALL_DIR
# build runtime
bash scripts/runtime/build_and_test.sh --cuda --python --no-test $LLVM_INSTALL_DIR
# build torch_frontend
bash scripts/frontends/torch-frontend/build_and_test.sh $TORCH_FRONTEND_LLVM_INSTALL_DIR

pip3 install $ROOT_PROJ_DIR/external/AITemplate/python/dist/*.whl --force-reinstall
pip3 install $ROOT_PROJ_DIR/compiler/build/python/dist/*.whl --force-reinstall
pip3 install $ROOT_PROJ_DIR/runtime/python/dist/*.whl --force-reinstall
pip3 install $ROOT_PROJ_DIR/frontends/torch-frontend/build/torch-frontend/python/dist/*.whl --force-reinstall
pip3 install -r $ROOT_PROJ_DIR/frontends/torch-frontend/torch-requirements.txt

python3 tests/numerical_test/main.py
rm -rf ./local_test
popd
7 changes: 7 additions & 0 deletions scripts/runtime/build_and_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ cmake -GNinja \

cmake --build "$BUILD_DIR" --target all --target install

if [[ $BRT_ENABLE_PYTHON_BINDINGS == "ON" ]]; then
pushd $PROJ_DIR/python
# note: python packing depend on `--target install`
python3 setup.py bdist_wheel
popd
fi

if [[ $BRT_USE_CUDA == "ON" ]] && [[ $BRT_ENABLE_ASAN == "ON" ]]; then
export ASAN_OPTIONS=protect_shadow_gap=0
fi
Expand Down
1 change: 1 addition & 0 deletions tests/numerical_test/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def get_entry_func_name(interp):


def compile_and_run_mlir(mhlo_file, target):
np.random.seed(0)
try:
interp = Interpreter.load_from_file(mhlo_file)
np_inputs = generate_np_inputs(interp)
Expand Down
12 changes: 6 additions & 6 deletions tests/numerical_test/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@

SM80_PLUS_TESTS = [
"dot_f32.mlir",
"MatmulF32Module_basic",
"bmm_rrr_add_f32.mlir",
"bmm_rrr_f32.mlir",
"bmm_rrr_permute_f32.mlir",
"MatmulF32Module_basic",
"BatchMatmulAddF32Module_basic",
"BatchMatmulF32Module",
]


Expand All @@ -59,7 +59,7 @@ def _detect_cuda_with_nvidia_smi():
sm_names = {
70: ["V100"],
75: ["T4", "Quadro T2000"],
80: ["PG509", "A100", "A10", "RTX 30", "A30", "RTX 40"],
80: ["PG509", "A100", "A10", "RTX 30", "A30", "RTX 40", "A16"],
90: ["H100"],
}
for sm, names in sm_names.items():
Expand Down Expand Up @@ -120,8 +120,8 @@ def main():
elif args.config == 'torch':
results = run_torch_test(arch)
failed = report_results(results)
# TODO(zzk): use test infra for dynamo tests
run_torch_dynamo_tests(arch)
# TODO(zzk): disable flash attn test for now
# run_torch_dynamo_tests(arch)
sys.exit(1 if failed else 0)


Expand Down
6 changes: 0 additions & 6 deletions tests/numerical_test/mlir_tests/ops/bmm_rrr_add_f32.mlir

This file was deleted.

4 changes: 0 additions & 4 deletions tests/numerical_test/mlir_tests/ops/bmm_rrr_f32.mlir

This file was deleted.

10 changes: 5 additions & 5 deletions tests/numerical_test/mlir_tests/ops/bmm_rrr_permute_f32.mlir
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
func.func @bmm_rrr(%arg0 : tensor<12x256x256xf32>, %arg1 : tensor<12x256x64xf32>) -> tensor<1x256x12x64xf32> {
%0 = "mhlo.dot_general"(%arg0, %arg1) {dot_dimension_numbers = #mhlo.dot<lhs_batching_dimensions = [0], rhs_batching_dimensions = [0], lhs_contracting_dimensions = [2], rhs_contracting_dimensions = [1]>} : (tensor<12x256x256xf32>, tensor<12x256x64xf32>) -> tensor<12x256x64xf32>
%1 = mhlo.reshape %0 : (tensor<12x256x64xf32>) -> tensor<1x12x256x64xf32>
%2 = "mhlo.transpose"(%1) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<1x12x256x64xf32>) -> tensor<1x256x12x64xf32>
return %2 : tensor<1x256x12x64xf32>
func.func @bmm_rrr_permute_f32(%arg0: tensor<4x2x2xf32>, %arg1: tensor<4x2x2xf32>) -> tensor<2x2x2x2xf32> {
%0 = "mhlo.dot_general"(%arg0, %arg1) {dot_dimension_numbers = #mhlo.dot<lhs_batching_dimensions = [0], rhs_batching_dimensions = [0], lhs_contracting_dimensions = [2], rhs_contracting_dimensions = [1]>} : (tensor<4x2x2xf32>, tensor<4x2x2xf32>) -> tensor<4x2x2xf32>
%1 = mhlo.reshape %0 : (tensor<4x2x2xf32>) -> tensor<2x2x2x2xf32>
%2 = "mhlo.transpose"(%1) {permutation = dense<[0, 2, 1, 3]> : tensor<4xi64>} : (tensor<2x2x2x2xf32>) -> tensor<2x2x2x2xf32>
return %2 : tensor<2x2x2x2xf32>
}
6 changes: 3 additions & 3 deletions tests/numerical_test/mlir_tests/ops/dot_f32.mlir
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
func.func @gemm_rrr_f32(%arg0 : tensor<256x256xf32>, %arg1 : tensor<256x256xf32>) -> tensor<256x256xf32> {
%0 = "mhlo.dot"(%arg0, %arg1) : (tensor<256x256xf32>, tensor<256x256xf32>) -> tensor<256x256xf32>
return %0 : tensor<256x256xf32>
func.func @gemm_rrr_f32(%arg0 : tensor<4x4xf32>, %arg1 : tensor<4x4xf32>) -> tensor<4x4xf32> {
%0 = "mhlo.dot"(%arg0, %arg1) : (tensor<4x4xf32>, tensor<4x4xf32>) -> tensor<4x4xf32>
return %0 : tensor<4x4xf32>
}
28 changes: 28 additions & 0 deletions tests/numerical_test/torch_e2e_testing/test_suite/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,31 @@ def forward(self, a, b):
@register_test_case(module_factory=lambda: MatmulF32Module())
def MatmulF32Module_basic(module, tu: TestUtils):
module.forward(tu.rand(5, 6), tu.rand(6, 10))


class BatchMatmulF32Module(torch.nn.Module):

def __init__(self):
super().__init__()

def forward(self, a, b):
return torch.bmm(a, b)


@register_test_case(module_factory=lambda: BatchMatmulF32Module())
def BatchMatmulF32Module_basic(module, tu: TestUtils):
module.forward(tu.rand(2, 5, 6), tu.rand(2, 6, 10))


class BatchMatmulAddF32Module(torch.nn.Module):

def __init__(self):
super().__init__()

def forward(self, a, b, c):
return c + torch.bmm(a, b)


@register_test_case(module_factory=lambda: BatchMatmulAddF32Module())
def BatchMatmulAddF32Module_basic(module, tu: TestUtils):
module.forward(tu.rand(2, 5, 6), tu.rand(2, 6, 10), tu.rand(2, 5, 10))