diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index f3f2ba57b6a5e..57e64e08d1006 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -13,11 +13,12 @@ #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/InputInfo.h" #include "clang/Driver/Options.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Option/Option.h" +#include "llvm/SYCLLowerIR/DeviceConfigFile.hpp" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" -#include "llvm/SYCLLowerIR/DeviceConfigFile.hpp" #include #include @@ -305,6 +306,11 @@ bool SYCL::shouldDoPerObjectFileLinking(const Compilation &C) { // Return whether to use native bfloat16 library. static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, bool &UseNative) { + + static llvm::SmallSet GPUArchsWithNBF16{ + "intel_gpu_pvc", "intel_gpu_acm_g10", "intel_gpu_acm_g11", + "intel_gpu_acm_g12", "intel_gpu_dg2_10", "intel_gpu_dg2_11", + "intel_dg2_g12", "intel_gpu_bmg_g21"}; const llvm::opt::ArgList &Args = C.getArgs(); bool NeedLibs = false; @@ -330,32 +336,75 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, } } - UseNative = false; - - // Check for intel_gpu_pvc as the target - if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { - if (SYCLTarget->getValues().size() == 1) { - StringRef SYCLTargetStr = SYCLTarget->getValue(); - if (SYCLTargetStr == "intel_gpu_pvc") - UseNative = true; - } - } - - auto checkBF = [](StringRef Device) { - return Device.starts_with("pvc") || Device.starts_with("ats"); - }; + // We need to select fallback/native bfloat16 devicelib in AOT compilation + // targetting for Intel GPU devices. Users have 2 ways to apply AOT, + // 1). clang++ -fsycl -fsycl-targets=spir64_gen -Xs "-device pvc,...," + // 2). clang++ -fsycl -fsycl-targets=intel_gpu_pvc,... + // 3). clang++ -fsycl -fsycl-targets=spir64_gen,intel_gpu_pvc,... + // -Xsycl-target-backend=spir64_gen "-device dg2" std::string Params; for (const auto &Arg : TargArgs) { Params += " "; Params += Arg; } + + auto checkBF = [](StringRef Device) { + return Device.starts_with("pvc") || Device.starts_with("ats") || + Device.starts_with("dg2") || Device.starts_with("bmg"); + }; + + auto checkSpirvJIT = [](StringRef Target) { + return Target.starts_with("spir64-") || Target.starts_with("spirv64-") || + (Target == "spir64") || (Target == "spirv64"); + }; + size_t DevicesPos = Params.find("-device "); - if (!UseNative && DevicesPos != std::string::npos) { + // "-device xxx" is used to specify AOT target device, so user must apply + // -Xs "-device xxx" or -Xsycl-target-backend=spir64_gen "-device xxx" + if (DevicesPos != std::string::npos) { UseNative = true; std::istringstream Devices(Params.substr(DevicesPos + 8)); for (std::string S; std::getline(Devices, S, ',');) UseNative &= checkBF(S); + + // When "-device XXX" is applied to specify GPU type, user can still + // add -fsycl-targets=intel_gpu_pvc..., native bfloat16 devicelib can + // only be linked when all GPU types specified support. + // We need to filter CPU and FPGA target here and only focus on GPU + // device. + if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { + for (auto TargetsV : SYCLTarget->getValues()) { + if (!checkSpirvJIT(StringRef(TargetsV)) && + !StringRef(TargetsV).starts_with("spir64_gen") && + !StringRef(TargetsV).starts_with("spir64_x86_64") && + !StringRef(TargetsV).starts_with("spir64_fpga") && + !GPUArchsWithNBF16.contains(StringRef(TargetsV))) { + UseNative = false; + break; + } + } + } + + return NeedLibs; + + } else { + // -fsycl-targets=intel_gpu_xxx is used to specify AOT target device. + // Multiple Intel GPU devices can be specified, native bfloat16 devicelib + // can be involved only when all GPU deivces specified support native + // bfloat16 native conversion. + UseNative = true; + + if (Arg *SYCLTarget = Args.getLastArg(options::OPT_fsycl_targets_EQ)) { + for (auto TargetsV : SYCLTarget->getValues()) { + if (!checkSpirvJIT(StringRef(TargetsV)) && + !GPUArchsWithNBF16.contains(StringRef(TargetsV))) { + UseNative = false; + break; + } + } + } + return NeedLibs; } } return NeedLibs; @@ -511,7 +560,8 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, } if (TargetTriple.isNVPTX() && IgnoreSingleLibs) - LibraryList.push_back(Args.MakeArgString("devicelib-nvptx64-nvidia-cuda.bc")); + LibraryList.push_back( + Args.MakeArgString("devicelib-nvptx64-nvidia-cuda.bc")); if (TargetTriple.isAMDGCN() && IgnoreSingleLibs) LibraryList.push_back(Args.MakeArgString("devicelib-amdgcn-amd-amdhsa.bc")); diff --git a/clang/test/Driver/sycl-device-lib-bfloat16.cpp b/clang/test/Driver/sycl-device-lib-bfloat16.cpp index 097f1754ba3c5..43993fbe7c021 100644 --- a/clang/test/Driver/sycl-device-lib-bfloat16.cpp +++ b/clang/test/Driver/sycl-device-lib-bfloat16.cpp @@ -68,6 +68,95 @@ // RUN: --sysroot=%S/Inputs/SYCL -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK + +// Test AOT-DG2 compilation uses native libs + native libs. +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_acm_g10 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE + +// Test AOT-PVC + AOT-DG2 compilation uses native libs + native libs. +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,intel_gpu_acm_g10 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NATIVE + +// Test AOT-PVC + AOT-DG1 compilation uses native libs + native libs. +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,intel_gpu_acm_g10 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK + + +// Test AOT-PVC + JIT compilation uses native libs + no libs +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64-unknown-unknown \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64-unknown-unknown \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE + +// Test test AOT-DG1 + JIT compilation uses native libs + no libs +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spir64 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spirv64 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spir64-unknown-unknown \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spirv64-unknown-unknown \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE + +// Test test AOT-PVC + JIT compilation + AOT-DG2 uses native libs + no libs + native libs +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64,intel_gpu_acm_g10 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE-NATIVE +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64,intel_gpu_acm_g10 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE-NATIVE +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64-unknown-unknown,intel_gpu_acm_g10 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE-NATIVE +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64-unknown-unknown,intel_gpu_acm_g10 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NONE-NATIVE + +// Test test AOT-PVC + JIT compilation + AOT-DG1 uses fallback libs + no libs + fallback libs +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64,intel_gpu_dg1 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE-FALLBACK +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64,intel_gpu_dg1 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE-FALLBACK +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64-unknown-unknown,intel_gpu_dg1 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE-FALLBACK +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spirv64-unknown-unknown,intel_gpu_dg1 \ +// RUN: --sysroot=%S/Inputs/SYCL %s -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-NONE-FALLBACK + +// Test test AOT-PVC + AOT-DG1 specified via different options, uses fallback libs +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64_gen \ +// RUN: --sysroot=%S/Inputs/SYCL -Xsycl-target-backend=spir64_gen "-device dg1" \ +// RUN: %s -### 2>&1 | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_dg1,spir64_gen \ +// RUN: --sysroot=%S/Inputs/SYCL -Xsycl-target-backend=spir64_gen "-device pvc" \ +// RUN: %s -### 2>&1 | FileCheck %s -check-prefix=BFLOAT16-FALLBACK-FALLBACK + +// Test test AOT-PVC + AOT-BMG specified via different options, uses native libs +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_bmg_g21,spir64_gen \ +// RUN: --sysroot=%S/Inputs/SYCL -Xsycl-target-backend=spir64_gen "-device pvc" \ +// RUN: %s -### 2>&1 | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NATIVE +// RUN: %clangxx -fsycl -fsycl-targets=intel_gpu_pvc,spir64_gen \ +// RUN: --sysroot=%S/Inputs/SYCL -Xsycl-target-backend=spir64_gen "-device bmg-g21-a0" \ +// RUN: %s -### 2>&1 | FileCheck %s -check-prefix=BFLOAT16-NATIVE-NATIVE + // BFLOAT16-NOT: llvm-link{{.*}} "{{.*}}libsycl-{{fallback|native}}-bfloat16.bc" // BFLOAT16-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc" @@ -85,3 +174,20 @@ // BFLOAT16-FALLBACK-FALLBACK: llvm-link{{.*}} "{{.*}}libsycl-fallback-bfloat16.bc" // BFLOAT16-FALLBACK-FALLBACK: "{{.*}}libsycl-fallback-bfloat16.bc" + +// BFLOAT16-NATIVE-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc" +// BFLOAT16-NATIVE-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc" + +// BFLOAT16-NATIVE-NONE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc" +// BFLOAT16-NATIVE-NONE-NOT: llvm-link{{.*}} "{{.*}}-bfloat16.bc" + +// BFLOAT16-FALLBACK-NONE: llvm-link{{.*}} "{{.*}}libsycl-fallback-bfloat16.bc" +// BFLOAT16-FALLBACK-NONE-NOT: llvm-link{{.*}} "{{.*}}-bfloat16.bc" + +// BFLOAT16-NATIVE-NONE-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc" +// BFLOAT16-NATIVE-NONE-NATIVE-NOT: llvm-link{{.*}} "{{.*}}-bfloat16.bc" +// BFLOAT16-NATIVE-NONE-NATIVE: llvm-link{{.*}} "{{.*}}libsycl-native-bfloat16.bc" + +// BFLOAT16-FALLBACK-NONE-FALLBACK: llvm-link{{.*}} "{{.*}}libsycl-fallback-bfloat16.bc" +// BFLOAT16-FALLBACK-NONE-FALLBACK-NOT: llvm-link{{.*}} "{{.*}}-bfloat16.bc" +// BFLOAT16-FALLBACK-NONE-FALLBACK: llvm-link{{.*}} "{{.*}}libsycl-fallback-bfloat16.bc"