From bb05e0810b87e74709d9f4c4545f1f57a1b386f5 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 2 Jul 2024 12:58:09 -0400 Subject: [PATCH] Use PyMutex instead of std::mutex in free-threaded build. (#5219) * Use PyMutex instead of std::mutex in free-threaded build. PyMutex is now part of the public C API as of 3.13.0b3 and generally has slightly less overhead than std::mutex. * style: pre-commit fixes * Fix instance_map_shard padding --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- include/pybind11/detail/internals.h | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index e61c1687fc..3f0a6a9492 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -148,20 +148,35 @@ struct override_hash { using instance_map = std::unordered_multimap; +#ifdef Py_GIL_DISABLED +// Wrapper around PyMutex to provide BasicLockable semantics +class pymutex { + PyMutex mutex; + +public: + pymutex() : mutex({}) {} + void lock() { PyMutex_Lock(&mutex); } + void unlock() { PyMutex_Unlock(&mutex); } +}; + // Instance map shards are used to reduce mutex contention in free-threaded Python. struct instance_map_shard { - std::mutex mutex; instance_map registered_instances; + pymutex mutex; // alignas(64) would be better, but causes compile errors in macOS before 10.14 (see #5200) - char padding[64 - (sizeof(std::mutex) + sizeof(instance_map)) % 64]; + char padding[64 - (sizeof(instance_map) + sizeof(pymutex)) % 64]; }; +static_assert(sizeof(instance_map_shard) % 64 == 0, + "instance_map_shard size is not a multiple of 64 bytes"); +#endif + /// Internal data structure used to track registered instances and types. /// Whenever binary incompatible changes are made to this structure, /// `PYBIND11_INTERNALS_VERSION` must be incremented. struct internals { #ifdef Py_GIL_DISABLED - std::mutex mutex; + pymutex mutex; #endif // std::type_index -> pybind11's type information type_map registered_types_cpp; @@ -614,7 +629,7 @@ inline local_internals &get_local_internals() { } #ifdef Py_GIL_DISABLED -# define PYBIND11_LOCK_INTERNALS(internals) std::unique_lock lock((internals).mutex) +# define PYBIND11_LOCK_INTERNALS(internals) std::unique_lock lock((internals).mutex) #else # define PYBIND11_LOCK_INTERNALS(internals) #endif @@ -651,7 +666,7 @@ inline auto with_instance_map(const void *ptr, auto idx = static_cast(hash & internals.instance_shards_mask); auto &shard = internals.instance_shards[idx]; - std::unique_lock lock(shard.mutex); + std::unique_lock lock(shard.mutex); return cb(shard.registered_instances); #else (void) ptr; @@ -667,7 +682,7 @@ inline size_t num_registered_instances() { size_t count = 0; for (size_t i = 0; i <= internals.instance_shards_mask; ++i) { auto &shard = internals.instance_shards[i]; - std::unique_lock lock(shard.mutex); + std::unique_lock lock(shard.mutex); count += shard.registered_instances.size(); } return count;