Skip to content

Commit

Permalink
SYCL: Alternative solution to avoid runtime error of launching kernel…
Browse files Browse the repository at this point in the history
… on xpu:1 when que… (#769)

…rying SYCL kernel bundle ahead
The kernel won't be built for devices except for the first device.
Launching kernel on devices except for the first device will raise
runtime error. Here is an alternative as a temporary solution to provide
an extra hint to SYCL runtime.
intel/llvm#15127

---------

Signed-off-by: Feng Yuan <[email protected]>
Co-authored-by: Wang, Chuanqi <[email protected]>
  • Loading branch information
fengyuan14 and chuanqi129 authored Sep 29, 2024
1 parent 151a19e commit 459f92c
Show file tree
Hide file tree
Showing 27 changed files with 58 additions and 29 deletions.
24 changes: 19 additions & 5 deletions .github/workflows/_linux_ut.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ on:
required: true
type: string
default: ''
description: UT scope. `op_example,op_extended,op_ut,torch_xpu` Delimiter is comma
description: UT scope. `op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu` Delimiter is comma
abi:
required: false
type: string
Expand Down Expand Up @@ -118,24 +118,38 @@ jobs:
cd ..
python pytorch/torch/utils/collect_env.py
- name: Run XPU OP Examples
if: contains(inputs.ut, 'op_example') || github.event_name == 'schedule'
if: contains(inputs.ut, 'op_regression') || github.event_name == 'schedule'
run: |
cd ${{ github.workspace }}
xpu-smi discovery
source .github/scripts/env.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
cd ${{ github.workspace }}
cd examples
cd test/regressions
pip install pytest
timeout 8000 pytest -v
timeout 8000 pytest -v
- name: Run XPU OP Regressions test on device 1
if: contains(inputs.ut, 'op_regression_dev1') || github.event_name == 'schedule'
run: |
cd ${{ github.workspace }}
xpu-smi discovery
source .github/scripts/env.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
export ZE_AFFINITY_MASK_OLD=${ZE_AFFINITY_MASK}
unset ZE_AFFINITY_MASK
cd ${{ github.workspace }}
cd test/regressions
pip install pytest
timeout 8000 pytest -v test_operation_on_device_1.py
export ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK_OLD}
- name: Run XPU OP Extended UT
if: contains(inputs.ut, 'op_extended') || github.event_name == 'schedule'
run: |
source .github/scripts/env.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
export PYTORCH_TEST_WITH_SLOW=1
cd ../pytorch/third_party/torch-xpu-ops/test/xpu/extended/
timeout 10000 python run_test_with_skip.py
timeout 10000 python run_test_with_skip.py
- name: Run XPU OP UT
if: contains(inputs.ut, 'op_ut') || github.event_name == 'schedule'
run: |
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/nightly_ondemand.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ on:
required: false
type: string
default: 'torch_xpu'
description: UT scope. `op_example,op_extended,op_ut,torch_xpu`. Delimiter is comma
description: UT scope. `op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu`. Delimiter is comma
triton:
required: false
type: string
Expand Down Expand Up @@ -69,20 +69,20 @@ jobs:
Linux-Nightly-Ondemand-UT-Tests:
if: github.event_name == 'schedule' || ${{ inputs.ut_suite }}
uses: ./.github/workflows/_linux_ut.yml
with:
with:
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: ${{ github.event_name == 'schedule' && 'op_example,op_extended,op_ut,torch_xpu' || inputs.ut }}
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }}
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
triton: ${{ github.event_name == 'schedule' && '' || inputs.triton }}
runner: linux.idc.xpu

Linux-Weekly-UT-Tests-ABI-0:
if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5'
uses: ./.github/workflows/_linux_ut.yml
with:
with:
abi: 0
ut: op_example,op_extended,op_ut,torch_xpu
ut: op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu
runner: linux.idc.xpu

Linux-Nightly-Ondemand-E2E-Tests:
Expand All @@ -93,7 +93,7 @@ jobs:
env:
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: ${{ github.event_name == 'schedule' && 'op_example,op_extended,op_ut,torch_xpu' || inputs.ut }}
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
outputs:
TORCH_BRANCH_ID: ${{ steps.pinned.outputs.TORCH_BRANCH_ID }}
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/nightly_ondemand_rolling.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ on:
required: false
type: string
default: 'torch_xpu'
description: UT scope. `op_example,op_extended,op_ut,torch_xpu`. Delimiter is comma
description: UT scope. `op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu`. Delimiter is comma
triton:
required: false
type: string
Expand Down Expand Up @@ -69,19 +69,19 @@ jobs:
Linux-Nightly-Ondemand-UT-Tests-Rolling:
if: github.event_name == 'schedule' || ${{ inputs.ut_suite }}
uses: ./.github/workflows/_linux_ut.yml
with:
with:
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: ${{ github.event_name == 'schedule' && 'op_example,op_extended,op_ut,torch_xpu' || inputs.ut }}
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }}
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
runner: pvc_rolling

Linux-Weekly-UT-Tests-ABI-0-Rolling:
if: github.event_name == 'schedule' && github.event.schedule == '30 16 * * 5'
uses: ./.github/workflows/_linux_ut.yml
with:
with:
abi: 0
ut: op_example,op_extended,op_ut,torch_xpu
ut: op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu
runner: pvc_rolling

Linux-Nightly-Ondemand-E2E-Tests-Rolling:
Expand All @@ -92,7 +92,7 @@ jobs:
env:
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
ut: ${{ github.event_name == 'schedule' && 'op_example,op_extended,op_ut,torch_xpu' || inputs.ut }}
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
outputs:
TORCH_BRANCH_ID: ${{ steps.pinned.outputs.TORCH_BRANCH_ID }}
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/nightly_ondemand_whl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ on:
required: false
type: string
default: 'torch_xpu'
description: UT scope. `op_example,op_extended,op_ut,torch_xpu`. Delimiter is comma
description: UT scope. `op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu`. Delimiter is comma
suite:
required: true
type: string
Expand Down Expand Up @@ -59,8 +59,8 @@ jobs:
Linux-Nightly-Ondemand-UT-WHL-Tests:
if: github.event_name == 'schedule' || ${{ inputs.ut_suite }}
uses: ./.github/workflows/_linux_ut.yml
with:
ut: ${{ github.event_name == 'schedule' && 'op_example,op_extended,op_ut,torch_xpu' || inputs.ut }}
with:
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
pytorch: nightly_wheel
runner: linux.idc.xpu
Expand All @@ -72,7 +72,7 @@ jobs:
timeout-minutes: 3600
env:
pytorch: ${{ github.event_name == 'schedule' && 'nightly' || inputs.pytorch }}
ut: ${{ github.event_name == 'schedule' && 'op_example,op_extended,op_ut,torch_xpu' || inputs.ut }}
ut: ${{ github.event_name == 'schedule' && 'op_regression,op_regression_dev1,op_extended,op_ut,torch_xpu' || inputs.ut }}
python: ${{ github.event_name == 'schedule' && '3.10' || inputs.python }}
outputs:
TORCH_BRANCH_ID: ${{ steps.pinned.outputs.TORCH_BRANCH_ID }}
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ jobs:
# Don't run on forked repos and draft PRs
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }}
uses: ./.github/workflows/_linux_ut.yml
with:
ut: op_example,op_extended,op_ut
with:
ut: op_regression,op_regression_dev1,op_extended,op_ut
runner: linux.idc.xpu

preci-ut-abi-0:
# Don't run on forked repos and draft PRs
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }}
uses: ./.github/workflows/_linux_ut.yml
with:
with:
abi: 0
ut: op_extended
runner: linux.idc.xpu
Expand Down
9 changes: 7 additions & 2 deletions src/comm/DeviceProperties.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,13 @@ static int64_t syclMaxWorkGroupSize(
auto dev = q.get_device();

auto kid = ::sycl::get_kernel_id<KernelClass>();
auto kbundle =
::sycl::get_kernel_bundle<::sycl::bundle_state::executable>(ctx, {kid});
// The kernel won't be built for devices except for the first device.
// Launching kernel on devices except for the first device will raise
// runtime error. Here is an alternative as a temporary solution to
// provide an extra hint to SYCL runtime.
// https://github.com/intel/llvm/issues/15127
auto kbundle = ::sycl::get_kernel_bundle<::sycl::bundle_state::executable>(
ctx, {dev}, {kid});

::sycl::kernel k = kbundle.get_kernel(kid);
return k.get_info<::sycl::info::kernel_device_specific::work_group_size>(dev);
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
10 changes: 10 additions & 0 deletions test/regressions/test_operation_on_device_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import torch
from torch.testing._internal.common_utils import TestCase

class TestOperationOnDevice1(TestCase):
def test_sum_on_device1(self, dtype=torch.float):
if torch.xpu.device_count() >= 2:
a = torch.randn(2, 3, device=torch.device('xpu:1'))
res = a.sum()
ref = a.cpu().sum()
self.assertEqual(ref, res)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion test/xpu/xpu_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@
}
},
"histogram": {
("TestCommonXPU", "test_out_histogram_xpu_float32"):{
("TestCommonXPU", "test_out"):{
torch.float32: tol(atol=3e-5, rtol=5e-5),
}
}
Expand Down

0 comments on commit 459f92c

Please sign in to comment.