Skip to content

Commit

Permalink
[SYCL][Driver] Support bfloat16 devicelib selection when multiple AOT…
Browse files Browse the repository at this point in the history
… targets specified (#16494)

User can specify multiple AOT targets when building sycl program in
followings ways:
1). via -fsycl-targets=intel_gpu_pvc,intel_gpu_acm_g10,....
2). via -fsycl-targets=spir64_gen ... -Xs "-device pvc,dg2...."
3). via -fsycl-targets=spir64_gen..., -Xsycl-target-backend=spir64_gen
"-device pvc"
We should select native bfloat16 devicelib when all AOT targets
specified support native bfloat16 conversion. Currently, pvc, dg2, bmg
devices support native bfloat16.
If user specifies JIT target together with AOT targets which all support
native bfloat16 conversion, we still select native bfloat16 devicelib
since bfloat16 devicelib is skipped in linking step for JIT target.

---------

Signed-off-by: jinge90 <[email protected]>
Co-authored-by: Michael Toguchi <[email protected]>
  • Loading branch information
jinge90 and mdtoguchi authored Jan 31, 2025
1 parent 51cfcfa commit 71ca51f
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 17 deletions.
84 changes: 67 additions & 17 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Options.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Option/Option.h"
#include "llvm/SYCLLowerIR/DeviceConfigFile.hpp"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/SYCLLowerIR/DeviceConfigFile.hpp"
#include <algorithm>
#include <sstream>

Expand Down Expand Up @@ -305,6 +306,11 @@ bool SYCL::shouldDoPerObjectFileLinking(const Compilation &C) {
// Return whether to use native bfloat16 library.
static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C,
bool &UseNative) {

static llvm::SmallSet<StringRef, 8> GPUArchsWithNBF16{
"intel_gpu_pvc", "intel_gpu_acm_g10", "intel_gpu_acm_g11",
"intel_gpu_acm_g12", "intel_gpu_dg2_10", "intel_gpu_dg2_11",
"intel_dg2_g12", "intel_gpu_bmg_g21"};
const llvm::opt::ArgList &Args = C.getArgs();
bool NeedLibs = false;

Expand All @@ -330,32 +336,75 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C,
}
}

UseNative = false;

// Check for intel_gpu_pvc as the target
if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) {
if (SYCLTarget->getValues().size() == 1) {
StringRef SYCLTargetStr = SYCLTarget->getValue();
if (SYCLTargetStr == "intel_gpu_pvc")
UseNative = true;
}
}

auto checkBF = [](StringRef Device) {
return Device.starts_with("pvc") || Device.starts_with("ats");
};
// We need to select fallback/native bfloat16 devicelib in AOT compilation
// targetting for Intel GPU devices. Users have 2 ways to apply AOT,
// 1). clang++ -fsycl -fsycl-targets=spir64_gen -Xs "-device pvc,...,"
// 2). clang++ -fsycl -fsycl-targets=intel_gpu_pvc,...
// 3). clang++ -fsycl -fsycl-targets=spir64_gen,intel_gpu_pvc,...
// -Xsycl-target-backend=spir64_gen "-device dg2"

std::string Params;
for (const auto &Arg : TargArgs) {
Params += " ";
Params += Arg;
}

auto checkBF = [](StringRef Device) {
return Device.starts_with("pvc") || Device.starts_with("ats") ||
Device.starts_with("dg2") || Device.starts_with("bmg");
};

auto checkSpirvJIT = [](StringRef Target) {
return Target.starts_with("spir64-") || Target.starts_with("spirv64-") ||
(Target == "spir64") || (Target == "spirv64");
};

size_t DevicesPos = Params.find("-device ");
if (!UseNative && DevicesPos != std::string::npos) {
// "-device xxx" is used to specify AOT target device, so user must apply
// -Xs "-device xxx" or -Xsycl-target-backend=spir64_gen "-device xxx"
if (DevicesPos != std::string::npos) {
UseNative = true;
std::istringstream Devices(Params.substr(DevicesPos + 8));
for (std::string S; std::getline(Devices, S, ',');)
UseNative &= checkBF(S);

// When "-device XXX" is applied to specify GPU type, user can still
// add -fsycl-targets=intel_gpu_pvc..., native bfloat16 devicelib can
// only be linked when all GPU types specified support.
// We need to filter CPU and FPGA target here and only focus on GPU
// device.
if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) {
for (auto TargetsV : SYCLTarget->getValues()) {
if (!checkSpirvJIT(StringRef(TargetsV)) &&
!StringRef(TargetsV).starts_with("spir64_gen") &&
!StringRef(TargetsV).starts_with("spir64_x86_64") &&
!StringRef(TargetsV).starts_with("spir64_fpga") &&
!GPUArchsWithNBF16.contains(StringRef(TargetsV))) {
UseNative = false;
break;
}
}
}

return NeedLibs;

} else {
// -fsycl-targets=intel_gpu_xxx is used to specify AOT target device.
// Multiple Intel GPU devices can be specified, native bfloat16 devicelib
// can be involved only when all GPU deivces specified support native
// bfloat16 native conversion.
UseNative = true;

if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) {
for (auto TargetsV : SYCLTarget->getValues()) {
if (!checkSpirvJIT(StringRef(TargetsV)) &&
!GPUArchsWithNBF16.contains(StringRef(TargetsV))) {
UseNative = false;
break;
}
}
}
return NeedLibs;
}
}
return NeedLibs;
Expand Down Expand Up @@ -511,7 +560,8 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
}

if (TargetTriple.isNVPTX() && IgnoreSingleLibs)
LibraryList.push_back(Args.MakeArgString("devicelib-nvptx64-nvidia-cuda.bc"));
LibraryList.push_back(
Args.MakeArgString("devicelib-nvptx64-nvidia-cuda.bc"));

if (TargetTriple.isAMDGCN() && IgnoreSingleLibs)
LibraryList.push_back(Args.MakeArgString("devicelib-amdgcn-amd-amdhsa.bc"));
Expand Down
106 changes: 106 additions & 0 deletions clang/test/Driver/sycl-device-lib-bfloat16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,95 @@
// RUN: --sysroot=%S/Inputs/SYCL -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK


// Test AOT-DG2 compilation uses native libs + native libs.
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g10 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE

// Test AOT-PVC + AOT-DG2 compilation uses native libs + native libs.
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,intel_gpu_acm_g10 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NATIVE

// Test AOT-PVC + AOT-DG1 compilation uses native libs + native libs.
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,intel_gpu_acm_g10 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK


// Test AOT-PVC + JIT compilation uses native libs + no libs
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64-unknown-unknown \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64-unknown-unknown \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE

// Test test AOT-DG1 + JIT compilation uses native libs + no libs
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spir64 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spirv64 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spir64-unknown-unknown \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spirv64-unknown-unknown \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE

// Test test AOT-PVC + JIT compilation + AOT-DG2 uses native libs + no libs + native libs
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64,intel_gpu_acm_g10 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE-NATIVE
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64,intel_gpu_acm_g10 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE-NATIVE
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64-unknown-unknown,intel_gpu_acm_g10 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE-NATIVE
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64-unknown-unknown,intel_gpu_acm_g10 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE-NATIVE

// Test test AOT-PVC + JIT compilation + AOT-DG1 uses fallback libs + no libs + fallback libs
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64,intel_gpu_dg1 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE-FALLBACK
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64,intel_gpu_dg1 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE-FALLBACK
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64-unknown-unknown,intel_gpu_dg1 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE-FALLBACK
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64-unknown-unknown,intel_gpu_dg1 \
// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \
// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE-FALLBACK

// Test test AOT-PVC + AOT-DG1 specified via different options, uses fallback libs
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64_gen \
// RUN: --sysroot=%S/Inputs/SYCL -Xsycl-target-backend=spir64_gen "-device dg1" \
// RUN: %s -### 2>&1 | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spir64_gen \
// RUN: --sysroot=%S/Inputs/SYCL -Xsycl-target-backend=spir64_gen "-device pvc" \
// RUN: %s -### 2>&1 | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK

// Test test AOT-PVC + AOT-BMG specified via different options, uses native libs
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_bmg_g21,spir64_gen \
// RUN: --sysroot=%S/Inputs/SYCL -Xsycl-target-backend=spir64_gen "-device pvc" \
// RUN: %s -### 2>&1 | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NATIVE
// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64_gen \
// RUN: --sysroot=%S/Inputs/SYCL -Xsycl-target-backend=spir64_gen "-device bmg-g21-a0" \
// RUN: %s -### 2>&1 | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NATIVE

// BFLOAT16-NOT: llvm-link{{.*}} "{{.*}}libsycl-{{fallback|native}}-bfloat16.bc"

// BFLOAT16-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc"
Expand All @@ -85,3 +174,20 @@

// BFLOAT16-FALLBACK-FALLBACK: llvm-link{{.*}} "{{.*}}libsycl-fallback-bfloat16.bc"
// BFLOAT16-FALLBACK-FALLBACK: "{{.*}}libsycl-fallback-bfloat16.bc"

// BFLOAT16-NATIVE-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc"
// BFLOAT16-NATIVE-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc"

// BFLOAT16-NATIVE-NONE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc"
// BFLOAT16-NATIVE-NONE-NOT: llvm-link{{.*}} "{{.*}}-bfloat16.bc"

// BFLOAT16-FALLBACK-NONE: llvm-link{{.*}} "{{.*}}libsycl-fallback-bfloat16.bc"
// BFLOAT16-FALLBACK-NONE-NOT: llvm-link{{.*}} "{{.*}}-bfloat16.bc"

// BFLOAT16-NATIVE-NONE-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc"
// BFLOAT16-NATIVE-NONE-NATIVE-NOT: llvm-link{{.*}} "{{.*}}-bfloat16.bc"
// BFLOAT16-NATIVE-NONE-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc"

// BFLOAT16-FALLBACK-NONE-FALLBACK: llvm-link{{.*}} "{{.*}}libsycl-fallback-bfloat16.bc"
// BFLOAT16-FALLBACK-NONE-FALLBACK-NOT: llvm-link{{.*}} "{{.*}}-bfloat16.bc"
// BFLOAT16-FALLBACK-NONE-FALLBACK: llvm-link{{.*}} "{{.*}}libsycl-fallback-bfloat16.bc"

0 comments on commit 71ca51f

Please sign in to comment.