From 998d03b9d316e7f298afc1f19ed4bc72337ee539 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Mon, 2 Sep 2024 10:15:06 +0200 Subject: [PATCH 01/10] Tweak the way large cfuncs/taylor integrators are split in multiple LLVM modules. --- src/expression_cfunc.cpp | 27 ++++++++++----------------- src/taylor_02.cpp | 35 +++++++++++++++++------------------ 2 files changed, 27 insertions(+), 35 deletions(-) diff --git a/src/expression_cfunc.cpp b/src/expression_cfunc.cpp index 20b128001..e1875233d 100644 --- a/src/expression_cfunc.cpp +++ b/src/expression_cfunc.cpp @@ -1697,21 +1697,14 @@ void multi_cfunc_evaluate_segments(llvm::Type *main_fp_t, std::list cur_state->builder().SetInsertPoint( llvm::BasicBlock::Create(cur_state->context(), "entry", make_driver_proto(*cur_state, cur_state_idx))); - // Variable to keep track of how many blocks have been codegenned - // in the current state. - boost::safe_numerics::safe n_cg_blocks = 0; + // Variable to keep track of how many evaluation functions have + // been invoked in the current state. + boost::safe_numerics::safe n_evalf = 0; - // Limit of codegenned blocks per state. + // Limit of function evaluations per state. // NOTE: this has not been really properly tuned, // needs more investigation. - // NOTE: it would probably be better here to keep track of the - // total number of function calls per segment, rather than - // the number of blocks. The reason for this is that each - // function call in principle increases the size of the - // auxiliary global arrays used by the compact mode - // argument generators, which in turn increases the code - // generation time. - constexpr auto max_n_cg_blocks = 20u; + constexpr auto max_n_evalf = 100u; // Variable to keep track of the u variable // on whose definition we are operating. @@ -1719,7 +1712,7 @@ void multi_cfunc_evaluate_segments(llvm::Type *main_fp_t, std::list // Iterate over the segments in s_dc. for (const auto &seg : s_dc) { - if (n_cg_blocks > max_n_cg_blocks) { + if (n_evalf > max_n_evalf) { // We have codegenned enough blocks for this state. Create the return // value for the current driver, and move to the next one. cur_state->builder().CreateRetVoid(); @@ -1729,7 +1722,7 @@ void multi_cfunc_evaluate_segments(llvm::Type *main_fp_t, std::list cur_state = &states.back(); // Reset/update the counters. - n_cg_blocks = 0; + n_evalf = 0; ++cur_state_idx; // Add the driver declaration to the main state, and invoke it. @@ -1898,6 +1891,9 @@ void multi_cfunc_evaluate_segments(llvm::Type *main_fp_t, std::list assert(std::ranges::all_of(gens, [](const auto &f) { return static_cast(f); })); // LCOV_EXCL_STOP + // Update the number of invoked evaluation functions. + n_evalf += ncalls; + // We will be manually unrolling loops if ncalls is small enough. // This seems to help with compilation times. constexpr auto max_unroll_n = 5u; @@ -1942,9 +1938,6 @@ void multi_cfunc_evaluate_segments(llvm::Type *main_fp_t, std::list } } - // Update the number of codegenned blocks. - n_cg_blocks += seg_map.size(); - // LCOV_EXCL_START // Update segment_bd if needed. if (is_tracing) { diff --git a/src/taylor_02.cpp b/src/taylor_02.cpp index a4ff3e61d..b5d4354d0 100644 --- a/src/taylor_02.cpp +++ b/src/taylor_02.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1042,28 +1043,25 @@ std::vector taylor_compute_jet_multi(llvm_state &main_state, llvm::T cur_state->builder().SetInsertPoint(llvm::BasicBlock::Create( cur_state->context(), "entry", taylor_cm_make_driver_proto(*cur_state, cur_state_idx))); - // Variable to keep track of how many blocks have been codegenned - // in the current state. - boost::safe_numerics::safe n_cg_blocks = 0; + // Variable to keep track of how many evaluation functions have + // been invoked in the current state. + boost::safe_numerics::safe n_evalf = 0; - // Limit of codegenned blocks per state. + // Limit of function evaluations per state. // NOTE: this has not been really properly tuned, - // needs more investigation. - // NOTE: it would probably be better here to keep track of the - // total number of function calls per segment, rather than - // the number of blocks. The reason for this is that each - // function call in principle increases the size of the - // auxiliary global arrays used by the compact mode - // argument generators, which in turn increases the code - // generation time. - constexpr auto max_n_cg_blocks = 20u; + // needs more investigation. In any case, this should + // be smaller than the corresponding limit in cfunc + // because here we are typically more work for function + // evaluation (as each function evaluation implements + // an AD formula). + constexpr auto max_n_evalf = 20u; // Variable to keep track of the index of the first u variable // in a segment. auto start_u_idx = n_eq; // Helper to finalise the current driver function and create a new one. - auto start_new_driver = [&cur_state, &states, &main_state, &n_cg_blocks, &cur_state_idx, &main_driver_decls]() { + auto start_new_driver = [&cur_state, &states, &main_state, &n_evalf, &cur_state_idx, &main_driver_decls]() { // Finalise the current driver. cur_state->builder().CreateRetVoid(); @@ -1072,7 +1070,7 @@ std::vector taylor_compute_jet_multi(llvm_state &main_state, llvm::T cur_state = &states.back(); // Reset/update the counters. - n_cg_blocks = 0; + n_evalf = 0; ++cur_state_idx; // Add the driver declaration to the main state. @@ -1100,7 +1098,7 @@ std::vector taylor_compute_jet_multi(llvm_state &main_state, llvm::T // of the sv funcs. const auto is_svf_seg = need_svf_lo && max_svf_idx >= start_u_idx && max_svf_idx < (start_u_idx + seg_n_ex); - if (n_cg_blocks > max_n_cg_blocks || is_svf_seg) { + if (n_evalf > max_n_evalf || is_svf_seg) { // Either we have codegenned enough blocks for this state, or we are // in the max_svf_idx state. Finalise the current driver and start the new one. start_new_driver(); @@ -1119,8 +1117,9 @@ std::vector taylor_compute_jet_multi(llvm_state &main_state, llvm::T const auto seg_map = taylor_cm_codegen_segment_diff(seg, start_u_idx, *cur_state, fp_t, batch_size, n_uvars, high_accuracy, parallel_mode); - // Update the number of codegenned blocks. - n_cg_blocks += seg_map.size(); + // Update the number of invoked evaluation functions. + n_evalf = std::accumulate(seg_map.begin(), seg_map.end(), n_evalf, + [](auto a, const auto &p) { return a + p.second.first; }); // Update start_u_idx. start_u_idx += seg_n_ex; From aff820fb8eb6f61ee3509efeb8329fc6d8f29299 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Mon, 2 Sep 2024 15:35:32 +0200 Subject: [PATCH 02/10] clang on windows attempt. --- .github/workflows/gha_ci.yml | 20 ++++++++++++++++++++ CMakeLists.txt | 2 +- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gha_ci.yml b/.github/workflows/gha_ci.yml index ee5e0f980..78063daa3 100644 --- a/.github/workflows/gha_ci.yml +++ b/.github/workflows/gha_ci.yml @@ -45,6 +45,26 @@ jobs: cmake ../ -G "Visual Studio 17 2022" -A x64 -DHEYOKA_BUILD_TESTS=yes -DHEYOKA_WITH_MPPP=yes -DHEYOKA_BUILD_TUTORIALS=ON -DHEYOKA_ENABLE_IPO=yes -DHEYOKA_WITH_SLEEF=yes cmake --build . --config Release -j2 copy Release\heyoka.dll test\Release\ + windows_2022_llvm_latest_clang: + runs-on: windows-2022 + steps: + - uses: actions/checkout@v4 + - uses: conda-incubator/setup-miniconda@v3 + with: + auto-update-conda: true + python-version: "3.10" + channels: conda-forge + channel-priority: strict + - uses: ilammy/msvc-dev-cmd@v1 + - name: Build + shell: pwsh + run: | + conda install -y cmake clang ninja llvmdev tbb-devel tbb libboost-devel xtensor xtensor-blas blas blas-devel fmt spdlog sleef zlib libzlib 'mppp=1.*' + mkdir build + cd build + cmake ../ -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DHEYOKA_BUILD_TESTS=yes -DHEYOKA_WITH_MPPP=yes -DHEYOKA_BUILD_TUTORIALS=ON -DHEYOKA_ENABLE_IPO=yes -DHEYOKA_WITH_SLEEF=yes + cmake --build . -j4 -- -v + copy Release\heyoka.dll test\Release\ conda_release_static: runs-on: ubuntu-latest steps: diff --git a/CMakeLists.txt b/CMakeLists.txt index 6cfdd07bf..aed2ec827 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -517,7 +517,7 @@ if(NOT ${Boost_FOUND}) message(FATAL_ERROR "Could not locate Boost in either CONFIG or MODULE mode.") endif() message(STATUS "Found Boost version ${Boost_VERSION}.") -target_link_libraries(heyoka PUBLIC Boost::boost Boost::serialization) +target_link_libraries(heyoka PUBLIC Boost::boost Boost::serialization Boost::disable_autolinking Boost::dynamic_linking) # NOTE: quench warnings from Boost when building the library. target_compile_definitions(heyoka PRIVATE BOOST_ALLOW_DEPRECATED_HEADERS) From 95f29e7067858d9bda31af07efc3451b827a611a Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Mon, 2 Sep 2024 15:47:47 +0200 Subject: [PATCH 03/10] Minor. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aed2ec827..838730306 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -517,7 +517,7 @@ if(NOT ${Boost_FOUND}) message(FATAL_ERROR "Could not locate Boost in either CONFIG or MODULE mode.") endif() message(STATUS "Found Boost version ${Boost_VERSION}.") -target_link_libraries(heyoka PUBLIC Boost::boost Boost::serialization Boost::disable_autolinking Boost::dynamic_linking) +target_link_libraries(heyoka PUBLIC Boost::boost Boost::serialization Boost::disable_autolinking) # NOTE: quench warnings from Boost when building the library. target_compile_definitions(heyoka PRIVATE BOOST_ALLOW_DEPRECATED_HEADERS) From a240214315764cb9053a8e93118a0592f1e8e20e Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Mon, 2 Sep 2024 15:48:58 +0200 Subject: [PATCH 04/10] Re-enable tests. --- .github/workflows/gha_ci.yml | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/.github/workflows/gha_ci.yml b/.github/workflows/gha_ci.yml index 78063daa3..3ea2d68ca 100644 --- a/.github/workflows/gha_ci.yml +++ b/.github/workflows/gha_ci.yml @@ -45,26 +45,7 @@ jobs: cmake ../ -G "Visual Studio 17 2022" -A x64 -DHEYOKA_BUILD_TESTS=yes -DHEYOKA_WITH_MPPP=yes -DHEYOKA_BUILD_TUTORIALS=ON -DHEYOKA_ENABLE_IPO=yes -DHEYOKA_WITH_SLEEF=yes cmake --build . --config Release -j2 copy Release\heyoka.dll test\Release\ - windows_2022_llvm_latest_clang: - runs-on: windows-2022 - steps: - - uses: actions/checkout@v4 - - uses: conda-incubator/setup-miniconda@v3 - with: - auto-update-conda: true - python-version: "3.10" - channels: conda-forge - channel-priority: strict - - uses: ilammy/msvc-dev-cmd@v1 - - name: Build - shell: pwsh - run: | - conda install -y cmake clang ninja llvmdev tbb-devel tbb libboost-devel xtensor xtensor-blas blas blas-devel fmt spdlog sleef zlib libzlib 'mppp=1.*' - mkdir build - cd build - cmake ../ -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DHEYOKA_BUILD_TESTS=yes -DHEYOKA_WITH_MPPP=yes -DHEYOKA_BUILD_TUTORIALS=ON -DHEYOKA_ENABLE_IPO=yes -DHEYOKA_WITH_SLEEF=yes - cmake --build . -j4 -- -v - copy Release\heyoka.dll test\Release\ + ctest -j4 -V -C Release conda_release_static: runs-on: ubuntu-latest steps: From cfdce125792b7c8ed13d4cc4af46dabfca4b2f29 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Mon, 2 Sep 2024 21:50:42 +0200 Subject: [PATCH 05/10] Small tweak. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 838730306..6cfdd07bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -517,7 +517,7 @@ if(NOT ${Boost_FOUND}) message(FATAL_ERROR "Could not locate Boost in either CONFIG or MODULE mode.") endif() message(STATUS "Found Boost version ${Boost_VERSION}.") -target_link_libraries(heyoka PUBLIC Boost::boost Boost::serialization Boost::disable_autolinking) +target_link_libraries(heyoka PUBLIC Boost::boost Boost::serialization) # NOTE: quench warnings from Boost when building the library. target_compile_definitions(heyoka PRIVATE BOOST_ALLOW_DEPRECATED_HEADERS) From db35939ad123b7fe0f3419bf39607a5c8ee329ff Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Mon, 2 Sep 2024 21:53:22 +0200 Subject: [PATCH 06/10] Tentative Windows fix. --- src/llvm_state.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/llvm_state.cpp b/src/llvm_state.cpp index da7908e14..52f71cc56 100644 --- a/src/llvm_state.cpp +++ b/src/llvm_state.cpp @@ -309,17 +309,17 @@ llvm::orc::JITTargetMachineBuilder create_jit_tmb(unsigned opt_level, code_model // LCOV_EXCL_START -#if LLVM_VERSION_MAJOR >= 17 - // NOTE: the code model setup is working only on LLVM>=19 (or at least // LLVM 18 + patches, as in the conda-forge LLVM package), due to this bug: // // https://github.com/llvm/llvm-project/issues/88115 // // Additionally, there are indications from our CI that attempting to set - // the code model before LLVM 17 might just be buggy, as we see widespread + // the code model before LLVM 17 or on Windows might just be buggy, as we see widespread // ASAN failures all over the place. Thus, let us not do anything with the code - // model setting before LLVM 17. + // model setting before LLVM 17 or on Windows. + +#if LLVM_VERSION_MAJOR >= 17 && !defined(_WIN32) // Setup the code model. switch (c_model) { From b615886de072c8e451424058335a5bbc13e1ee8d Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Tue, 3 Sep 2024 10:21:09 +0200 Subject: [PATCH 07/10] Avoid using bmp in a test. --- test/llvm_helpers.cpp | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/test/llvm_helpers.cpp b/test/llvm_helpers.cpp index ec13bd327..b96833ac9 100644 --- a/test/llvm_helpers.cpp +++ b/test/llvm_helpers.cpp @@ -20,7 +20,6 @@ #include #include -#include #include @@ -1671,16 +1670,18 @@ TEST_CASE("eft_product scalar") REQUIRE(x == a * b); +#if defined(HEYOKA_HAVE_REAL) #if defined(HEYOKA_HAVE_REAL128) if constexpr (!std::is_same_v) { #endif - namespace bmp = boost::multiprecision; - using mp_fp_t - = bmp::number::digits * 2, bmp::digit_base_2>>; - REQUIRE(mp_fp_t(x) + mp_fp_t(y) == mp_fp_t(a) * mp_fp_t(b)); + using mp_fp_t = mppp::real; + const auto prec = std::numeric_limits::digits * 2; + + REQUIRE(mp_fp_t(x, prec) + mp_fp_t(y, prec) == mp_fp_t(a, prec) * mp_fp_t(b, prec)); #if defined(HEYOKA_HAVE_REAL128) } +#endif #endif } } @@ -1759,16 +1760,17 @@ TEST_CASE("eft_product batch") REQUIRE(xv == a * b); +#if defined(HEYOKA_HAVE_REAL) #if defined(HEYOKA_HAVE_REAL128) if constexpr (!std::is_same_v) { #endif - namespace bmp = boost::multiprecision; - using mp_fp_t = bmp::number< - bmp::cpp_bin_float::digits * 2, bmp::digit_base_2>>; + using mp_fp_t = mppp::real; + const auto prec = std::numeric_limits::digits * 2; - REQUIRE(mp_fp_t(xv) + mp_fp_t(yv) == mp_fp_t(a) * mp_fp_t(b)); + REQUIRE(mp_fp_t(xv, prec) + mp_fp_t(yv, prec) == mp_fp_t(a, prec) * mp_fp_t(b, prec)); #if defined(HEYOKA_HAVE_REAL128) } +#endif #endif } } @@ -2526,12 +2528,12 @@ TEST_CASE("dl modulus scalar") auto f_ptr = reinterpret_cast(s.jit_lookup("hey_dl_modulus")); +#if defined(HEYOKA_HAVE_REAL) #if defined(HEYOKA_HAVE_REAL128) if constexpr (!std::is_same_v) { #endif - namespace bmp = boost::multiprecision; - using mp_fp_t - = bmp::number::digits * 2, bmp::digit_base_2>>; + using mp_fp_t = mppp::real; + const auto prec = std::numeric_limits::digits * 2; std::uniform_real_distribution op_dist(fp_t(-1e6), fp_t(1e6)), quo_dist(fp_t(.1), fp_t(10.)); @@ -2542,13 +2544,14 @@ TEST_CASE("dl modulus scalar") f_ptr(&res_hi, &res_lo, x, 0, y, 0); - auto res_mp = mp_fp_t(x) - mp_fp_t(y) * floor(mp_fp_t(x) / mp_fp_t(y)); + auto res_mp = mp_fp_t(x, prec) - mp_fp_t(y, prec) * floor(mp_fp_t(x, prec) / mp_fp_t(y, prec)); REQUIRE(res_hi == approximately(static_cast(res_mp), fp_t(10))); } #if defined(HEYOKA_HAVE_REAL128) } +#endif #endif } }; @@ -2608,12 +2611,12 @@ TEST_CASE("dl modulus batch") auto f_ptr = reinterpret_cast( s.jit_lookup("hey_dl_modulus")); +#if defined(HEYOKA_HAVE_REAL) #if defined(HEYOKA_HAVE_REAL128) if constexpr (!std::is_same_v) { #endif - namespace bmp = boost::multiprecision; - using mp_fp_t - = bmp::number::digits * 2, bmp::digit_base_2>>; + using mp_fp_t = mppp::real; + const auto prec = std::numeric_limits::digits * 2; std::uniform_real_distribution op_dist(fp_t(-1e6), fp_t(1e6)), quo_dist(fp_t(.1), fp_t(10.)); @@ -2634,8 +2637,9 @@ TEST_CASE("dl modulus batch") b_lo_vec.data()); for (auto i = 0u; i < batch_size; ++i) { - auto res_mp = mp_fp_t(a_hi_vec[i]) - - mp_fp_t(b_hi_vec[i]) * floor(mp_fp_t(a_hi_vec[i]) / mp_fp_t(b_hi_vec[i])); + auto res_mp = mp_fp_t(a_hi_vec[i], prec) + - mp_fp_t(b_hi_vec[i], prec) + * floor(mp_fp_t(a_hi_vec[i], prec) / mp_fp_t(b_hi_vec[i], prec)); REQUIRE(x_vec[i] == approximately(static_cast(res_mp), fp_t(10))); } @@ -2643,6 +2647,7 @@ TEST_CASE("dl modulus batch") #if defined(HEYOKA_HAVE_REAL128) } +#endif #endif } } From 5250e91625c1883bd851a11531352b07a7666a27 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Tue, 3 Sep 2024 10:21:21 +0200 Subject: [PATCH 08/10] [skip ci] From 30e8f0c69941de7c7161e2ccf9ce0211fc222a5b Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Tue, 3 Sep 2024 20:33:15 +0200 Subject: [PATCH 09/10] Set parallel compilation to disabled by default, and just forbid it in Windows. --- include/heyoka/llvm_state.hpp | 23 ++++++++++++----------- src/llvm_state.cpp | 6 ++++++ 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/include/heyoka/llvm_state.hpp b/include/heyoka/llvm_state.hpp index 4ee929cbf..a6fb77d52 100644 --- a/include/heyoka/llvm_state.hpp +++ b/include/heyoka/llvm_state.hpp @@ -348,17 +348,18 @@ std::optional llvm_state_mem_cache_lookup(const std::vector, unsigned, llvm_mc_value); // The default setting for the parjit flag for llvm_multi_state. -// There is evidence of an LLVM thread scheduling bug when parallel compilation -// is active, that rarely results in multiply-defined symbols for external C -// functions, which leads to compilation failure. So far, we have been able to -// trigger this issue only on 64-bit arm. -inline constexpr bool default_parjit = -#if defined(HEYOKA_ARCH_ARM) - false -#else - true -#endif - ; +// +// At this time, it seems like parallel compilation in lljit is buggy: +// +// - on Unix platforms, parallel compilation occasionally results in +// multiply-defined symbols for external C functions, which leads to +// compilation failures; +// - on Windows, it seems like parallel compilation outright results in +// segmentation faults under heavy load. +// +// The root of the problem seems to be a concurrency issue. Thus, for the time +// being, let us just disable parallel compilation by default. +inline constexpr bool default_parjit = false; } // namespace detail diff --git a/src/llvm_state.cpp b/src/llvm_state.cpp index 52f71cc56..af5fc6d6d 100644 --- a/src/llvm_state.cpp +++ b/src/llvm_state.cpp @@ -1667,11 +1667,17 @@ multi_jit::multi_jit(unsigned n_modules, unsigned opt_level, code_model c_model, #else + // NOTE: never enable parallel compilation on Windows due to + // segfaults under heavy load. +#if !defined(_WIN32) + if (m_parjit) { // Set the number of compilation threads. lljit_builder.setNumCompileThreads(std::thread::hardware_concurrency()); } +#endif + #endif // Create the jit. From 5d645c6de0756c10de0cc2daadd8fed7a2f4291e Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Tue, 3 Sep 2024 22:55:10 +0200 Subject: [PATCH 10/10] Update the known issues page. --- doc/known_issues.rst | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/doc/known_issues.rst b/doc/known_issues.rst index 495739430..7972f0689 100644 --- a/doc/known_issues.rst +++ b/doc/known_issues.rst @@ -18,14 +18,18 @@ Unsolved The root cause is most likely a code-generation/optimisation problem in LLVM. This issue is currently under investigation. -* The parallel compilation feature (added in heyoka 6.0.0) is currently disabled - by default on 64-bit ARM processors (this includes the Apple M1 and its successors). +* The parallel compilation feature (added in heyoka 6.0.0) is currently turned + off by default on all platforms and completely disabled on Windows. The reason is a likely thread scheduling bug in LLVM's parallel compilation facilities - that very rarely results in a multiply-defined symbol, which ultimately leads to compilation - failure. The issue is currently under investigation by the LLVM developers. In the - meantime, you can explicitly turn on parallel compilation via the ``kw::parjit`` + which, on Unix systems, rarely results in a multiply-defined symbol, ultimately leading to a compilation + failure. On Windows, parallel compilation under heavy loads results in segmentation faults. + The issue is currently under investigation by the LLVM developers. In the + meantime, you can explicitly turn on parallel compilation on Unix systems via the ``kw::parjit`` :ref:`keyword argument ` when constructing an integrator or a compiled function. +* The option for selecting the code used model for JIT compilation + (added in heyoka 6.0.0) is currently disabled on Windows due to what + looks like an LLVM bug. The issue is currently under investigation. Solved ======