diff --git a/README.md b/README.md index 7a407c9ab..6f3553156 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ CHIP_DEVICE= # If there are multiple devices CHIP_DEVICE_TYPE= or empty # Selects which type of device to use. Defaults to empty. CHIP_LOGLEVEL= # Sets the log level. If compiled in RELEASE, only err/crit are available CHIP_DUMP_SPIRV= # Dumps the generated SPIR-V code to a file -CHIP_JIT_FLAGS_OVERRIDE= # String to override the default JIT flags. Defaults to -cl-kernel-arg-info -cl-std=CL3.0 +CHIP_JIT_FLAGS= # Additional JIT flags CHIP_L0_COLLECT_EVENTS_TIMEOUT= # Timeout in seconds for collecting Level Zero events CHIP_L0_EVENT_TIMEOUT= # If enabled, skips the uninitialization of chipStar's backend objects at program termination diff --git a/scripts/unit_tests.sh b/scripts/unit_tests.sh index 3ee50f528..2f8454b87 100755 --- a/scripts/unit_tests.sh +++ b/scripts/unit_tests.sh @@ -2,6 +2,7 @@ set -e +export CHIP_MODULE_CACHE_DIR="" host=`hostname` echo "Running on ${host}" # If not on Salami read the file /opt/actions-runner/num-threads.txt and set the number of threads to the value in the file diff --git a/src/CHIPDriver.hh b/src/CHIPDriver.hh index 84bc0353d..21c0aa739 100644 --- a/src/CHIPDriver.hh +++ b/src/CHIPDriver.hh @@ -232,7 +232,8 @@ private: bool DumpSpirv_ = false; bool SkipUninit_ = false; bool LazyJit_ = true; - std::string JitFlags_ = CHIP_DEFAULT_JIT_FLAGS; + std::string JitFlags_ = ""; + std::string JitFlagsOverride_ = ""; unsigned long L0EventTimeout_ = 0; int L0CollectEventsTimeout_ = 0; bool OCLDisableQueueProfiling_ = false; @@ -253,6 +254,8 @@ public: bool getDumpSpirv() const { return DumpSpirv_; } bool getSkipUninit() const { return SkipUninit_; } const std::string &getJitFlags() const { return JitFlags_; } + const std::string &getJitFlagsOverride() const { return JitFlagsOverride_; } + bool hasJitOverride() const { return !JitFlagsOverride_.empty(); } bool getLazyJit() const { return LazyJit_; } int getL0CollectEventsTimeout() const { return L0CollectEventsTimeout_; } unsigned long getL0EventTimeout() const { @@ -287,7 +290,9 @@ private: LazyJit_ = readEnvVar("CHIP_LAZY_JIT", value) ? parseBoolean(value) : LazyJit_; JitFlags_ = - readEnvVar("CHIP_JIT_FLAGS_OVERRIDE", value, false) ? value : JitFlags_; + readEnvVar("CHIP_JIT_FLAGS", value, false) ? value : JitFlags_; + JitFlagsOverride_ = + readEnvVar("CHIP_JIT_FLAGS_OVERRIDE", value, false) ? value : JitFlagsOverride_; L0CollectEventsTimeout_ = readEnvVar("CHIP_L0_COLLECT_EVENTS_TIMEOUT", value) ? parseInt(value) @@ -343,7 +348,7 @@ private: logInfo("CHIP_DEVICE={}", DeviceIdx_); logInfo("CHIP_BE={}", Backend_.str()); logInfo("CHIP_DUMP_SPIRV={}", DumpSpirv_ ? "on" : "off"); - logInfo("CHIP_JIT_FLAGS_OVERRIDE={}", JitFlags_); + logInfo("CHIP_JIT_FLAGS_OVERRIDE={}", JitFlagsOverride_); logInfo("CHIP_L0_COLLECT_EVENTS_TIMEOUT={}", L0CollectEventsTimeout_); logInfo("CHIP_L0_EVENT_TIMEOUT={}", L0EventTimeout_); logInfo("CHIP_SKIP_UNINIT={}", SkipUninit_ ? "on" : "off"); diff --git a/src/backend/Level0/CHIPBackendLevel0.cc b/src/backend/Level0/CHIPBackendLevel0.cc index b39994795..26a2afe6b 100644 --- a/src/backend/Level0/CHIPBackendLevel0.cc +++ b/src/backend/Level0/CHIPBackendLevel0.cc @@ -1650,10 +1650,7 @@ void CHIPBackendLevel0::uninitialize() { return; } -std::string CHIPBackendLevel0::getDefaultJitFlags() { - return std::string( - "-cl-std=CL2.0 -cl-take-global-address -cl-match-sincospi"); -} +std::string CHIPBackendLevel0::getDefaultJitFlags() { return std::string(""); } void CHIPBackendLevel0::initializeCommon(ze_driver_handle_t ZeDriver) { uint32_t ExtCount = 0; @@ -2558,7 +2555,11 @@ void CHIPModuleLevel0::compile(chipstar::Device *ChipDev) { std::vector ILSizes(1, SPIRVBin.size()); std::vector ILInputs( 1, reinterpret_cast(SPIRVBin.data())); - std::vector BuildFlags(1, ChipEnvVars.getJitFlags().c_str()); + auto Flags = ChipEnvVars.hasJitOverride() ? ChipEnvVars.getJitFlagsOverride() + : ChipEnvVars.getJitFlags() + " " + + Backend->getDefaultJitFlags(); + logInfo("JIT flags: {}", Flags); + std::vector BuildFlags(1, Flags.c_str()); appendDeviceLibrarySources(ILSizes, ILInputs, BuildFlags, LzDev->getFpAtomicProps()); diff --git a/src/backend/OpenCL/CHIPBackendOpenCL.cc b/src/backend/OpenCL/CHIPBackendOpenCL.cc index aaa2cb969..57e78fc8b 100644 --- a/src/backend/OpenCL/CHIPBackendOpenCL.cc +++ b/src/backend/OpenCL/CHIPBackendOpenCL.cc @@ -863,7 +863,11 @@ static cl::Program compileIL(cl::Context Ctx, CHIPDeviceOpenCL &ChipDev, cl_device_id DevId = ChipDev.get()->get(); auto Start = std::chrono::high_resolution_clock::now(); - Err = clCompileProgram(Prog.get(), 1, &DevId, Options.c_str(), 0, nullptr, + auto Flags = ChipEnvVars.hasJitOverride() ? ChipEnvVars.getJitFlagsOverride() + : ChipEnvVars.getJitFlags() + " " + + Backend->getDefaultJitFlags(); + logInfo("JIT flags: {}", Flags); + Err = clCompileProgram(Prog.get(), 1, &DevId, Flags.c_str(), 0, nullptr, nullptr, nullptr, nullptr); auto End = std::chrono::high_resolution_clock::now(); auto Duration = @@ -1129,7 +1133,8 @@ void CHIPModuleOpenCL::compile(chipstar::Device *ChipDev) { int Err; auto SrcBin = Src_->getBinary(); - std::string buildOptions = ChipEnvVars.getJitFlags(); + std::string buildOptions = + Backend->getDefaultJitFlags() + " " + ChipEnvVars.getJitFlags(); std::string binAsStr = std::string(SrcBin.begin(), SrcBin.end()); // Include device name in cache key @@ -1150,7 +1155,24 @@ void CHIPModuleOpenCL::compile(chipstar::Device *ChipDev) { appendRuntimeObjects(*ChipCtxOcl->get(), *ChipDevOcl, ClObjects); auto linkStart = std::chrono::high_resolution_clock::now(); - Program_ = cl::linkProgram(ClObjects, nullptr, nullptr, nullptr, &Err); + + std::string Flags = ""; + // Check if running on Intel GPU OpenCL driver + std::string vendor = ChipDevOcl->get()->getInfo(); + bool isIntelGPU = + (vendor.find("Intel") != std::string::npos) && + (ChipDevOcl->get()->getInfo() & CL_DEVICE_TYPE_GPU); + + if (isIntelGPU) { + // Only Intel GPU driver seems to need compile flags at the link step + Flags = ChipEnvVars.hasJitOverride() ? ChipEnvVars.getJitFlagsOverride() + : ChipEnvVars.getJitFlags() + " " + + Backend->getDefaultJitFlags(); + } + + logInfo("JIT Link flags: {}", Flags); + Program_ = + cl::linkProgram(ClObjects, Flags.c_str(), nullptr, nullptr, &Err); auto linkEnd = std::chrono::high_resolution_clock::now(); auto linkDuration = std::chrono::duration_cast( linkEnd - linkStart); diff --git a/tests/runtime/TestEnvVars.hip b/tests/runtime/TestEnvVars.hip index b5b91737d..e63ace2c8 100644 --- a/tests/runtime/TestEnvVars.hip +++ b/tests/runtime/TestEnvVars.hip @@ -56,7 +56,8 @@ void testEnvironmentVariableParsing() { setEnvVar("CHIP_DUMP_SPIRV", "1"); setEnvVar("CHIP_SKIP_UNINIT", "on"); setEnvVar("CHIP_LAZY_JIT", "1"); - setEnvVar("CHIP_JIT_FLAGS_OVERRIDE", "-O2"); + setEnvVar("CHIP_JIT_FLAGS", ""); + setEnvVar("CHIP_JIT_FLAGS_OVERRIDE", ""); setEnvVar("CHIP_L0_COLLECT_EVENTS_TIMEOUT", "1000"); setEnvVar("CHIP_L0_EVENT_TIMEOUT", "2000"); setEnvVar("CHIP_OCL_DISABLE_QUEUE_PROFILING", "1"); @@ -73,7 +74,8 @@ void testEnvironmentVariableParsing() { assertEqual(true, envVars.getDumpSpirv(), "CHIP_DUMP_SPIRV"); assertEqual(true, envVars.getSkipUninit(), "CHIP_SKIP_UNINIT"); assertEqual(true, envVars.getLazyJit(), "CHIP_LAZY_JIT"); - assertEqual(std::string("-O2"), envVars.getJitFlags(), "CHIP_JIT_FLAGS_OVERRIDE"); + assertEqual(std::string(""), envVars.getJitFlags(), "CHIP_JIT_FLAGS"); + assertEqual(std::string(""), envVars.getJitFlagsOverride(), "CHIP_JIT_FLAGS_OVERRIDE"); assertEqual(1000, envVars.getL0CollectEventsTimeout(), "CHIP_L0_COLLECT_EVENTS_TIMEOUT"); assertEqual(static_cast(2000), envVars.getL0EventTimeout(), "CHIP_L0_EVENT_TIMEOUT"); assertEqual(true, envVars.getOCLDisableQueueProfiling(), "CHIP_OCL_DISABLE_QUEUE_PROFILING"); @@ -87,6 +89,7 @@ void testEnvironmentVariableParsing() { unsetEnvVar("CHIP_DUMP_SPIRV"); unsetEnvVar("CHIP_SKIP_UNINIT"); unsetEnvVar("CHIP_LAZY_JIT"); + unsetEnvVar("CHIP_JIT_FLAGS"); unsetEnvVar("CHIP_JIT_FLAGS_OVERRIDE"); unsetEnvVar("CHIP_L0_COLLECT_EVENTS_TIMEOUT"); unsetEnvVar("CHIP_L0_EVENT_TIMEOUT"); @@ -108,7 +111,8 @@ void testDefaultValues() { assertEqual(false, envVars.getDumpSpirv(), "Default CHIP_DUMP_SPIRV"); assertEqual(false, envVars.getSkipUninit(), "Default CHIP_SKIP_UNINIT"); assertEqual(true, envVars.getLazyJit(), "Default CHIP_LAZY_JIT"); - assertEqual(std::string(CHIP_DEFAULT_JIT_FLAGS), envVars.getJitFlags(), "Default CHIP_JIT_FLAGS_OVERRIDE"); + assertEqual(std::string(""), envVars.getJitFlags(), "Default CHIP_JIT_FLAGS"); + assertEqual(std::string(""), envVars.getJitFlagsOverride(), "Default CHIP_JIT_FLAGS_OVERRIDE"); assertEqual(0, envVars.getL0CollectEventsTimeout(), "Default CHIP_L0_COLLECT_EVENTS_TIMEOUT"); assertEqual(UINT64_MAX, envVars.getL0EventTimeout(), "Default CHIP_L0_EVENT_TIMEOUT"); assertEqual(false, envVars.getOCLDisableQueueProfiling(), "Default CHIP_OCL_DISABLE_QUEUE_PROFILING");