From 9ef9c1a26ad87827757059946e0fb6bb94ba2e68 Mon Sep 17 00:00:00 2001 From: Nick Ripley Date: Thu, 6 Mar 2025 14:50:56 -0500 Subject: [PATCH] refactor(profiling): remove memalloc lock debug asserts (#12657) We added some debug assertions around the locks we added to memalloc and the GIL. When we added the locks, we hadn't fully root-caused the problem and weren't 100% sure why we needed the locks in the first place. The assertions were there to find where the locks were contended, and to see if the GIL wasn't held. It turns out the GIL _was_ held, at least in a test program which crashed prior to adding locking. But we fundamentally misunderstood the guarantees the GIL made (or lack thereof). See https://cython.readthedocs.io/en/latest/src/userguide/nogil.html#don-t-use-the-gil-as-a-lock. The GIL is not intended to be used for anything other than protecting access to the interpreter. The GIL doesn't make critical sections for C extensions. The GIL isn't our lock, in other words. Since we understand better now why the locks are needed, we can remove the debug assertions. In their place, add some documentation for why the locks are needed. --- ddtrace/profiling/collector/_memalloc.c | 48 +++++-------------- ddtrace/profiling/collector/_memalloc_debug.h | 25 ---------- ddtrace/profiling/collector/_memalloc_heap.c | 9 ++-- .../profiling/collector/_memalloc_reentrant.h | 14 +----- 4 files changed, 18 insertions(+), 78 deletions(-) delete mode 100644 ddtrace/profiling/collector/_memalloc_debug.h diff --git a/ddtrace/profiling/collector/_memalloc.c b/ddtrace/profiling/collector/_memalloc.c index 5e61988cfc5..f9b8c31a71e 100644 --- a/ddtrace/profiling/collector/_memalloc.c +++ b/ddtrace/profiling/collector/_memalloc.c @@ -5,7 +5,6 @@ #define PY_SSIZE_T_CLEAN #include -#include "_memalloc_debug.h" #include "_memalloc_heap.h" #include "_memalloc_reentrant.h" #include "_memalloc_tb.h" @@ -43,19 +42,18 @@ static PyObject* object_string = NULL; #define ALLOC_TRACKER_MAX_COUNT UINT64_MAX -// The data coordination primitives in this and related files are related to a crash we started seeing. -// We don't have a precise understanding of the causal factors within the runtime that lead to this condition, -// since the GIL alone was sufficient in the past for preventing this issue. -// We add an option here to _add_ a crash, in order to observe this condition in a future diagnostic iteration. -// **This option is _intended_ to crash the Python process** do not use without a good reason! -static char g_crash_on_mutex_pass_str[] = "_DD_PROFILING_MEMALLOC_CRASH_ON_MUTEX_PASS"; -// The allocation profiler functions should (in theory) only be called when allocating Python -// objects, which should (in theory) only be done with the GIL held. We have reason to believe -// that this code is sometimes reached without the GIL held, since some crashes in the code -// seem to go away with our own locking. This debug flag will make the profiler crash if -// it detects the GIL is not held in places where we think it ought to be. -static char g_crash_on_no_gil_str[] = "_DD_PROFILING_MEMALLOC_CRASH_ON_NO_GIL"; -static bool g_crash_on_no_gil = false; +/* This lock protects access to global_alloc_tracker. The GIL is NOT sufficient + to protect our data structures from concurrent access. For one, the GIL is an + implementation detail and may go away in the future. Additionally, even if the + GIL is held on _entry_ to our C extension functions, making it safe to call + Python C API functions, the GIL can be released during Python C API calls if + we call back into interpreter code. This can happen if we allocate a Python + object (such as frame info), trigger garbage collection, and run arbitrary + destructors. When this happens, other threads can run python code, such as the + thread that aggregates and uploads the profile data and mutates the global + data structures. The GIL does not create critical sections for C extension + functions! + */ static memlock_t g_memalloc_lock; static alloc_tracker_t* global_alloc_tracker; @@ -98,31 +96,15 @@ __attribute__((constructor)) static void memalloc_init() { - // Check if we should crash the process on mutex pass - bool crash_on_mutex_pass = memalloc_get_bool_env(g_crash_on_mutex_pass_str); - memlock_init(&g_memalloc_lock, crash_on_mutex_pass); + memlock_init(&g_memalloc_lock); #ifndef _WIN32 pthread_atfork(memalloc_prefork, memalloc_postfork_parent, memalloc_postfork_child); #endif - - g_crash_on_no_gil = memalloc_get_bool_env(g_crash_on_no_gil_str); -} - -static void -memalloc_assert_gil() -{ - if (g_crash_on_no_gil && !PyGILState_Check()) { - int* p = NULL; - *p = 0; - abort(); // should never reach here - } } static void memalloc_add_event(memalloc_context_t* ctx, void* ptr, size_t size) { - memalloc_assert_gil(); - uint64_t alloc_count = atomic_add_clamped(&global_alloc_tracker->alloc_count, 1, ALLOC_TRACKER_MAX_COUNT); /* Return if we've reached the maximum number of allocations */ @@ -345,8 +327,6 @@ memalloc_stop(PyObject* Py_UNUSED(module), PyObject* Py_UNUSED(args)) return NULL; } - memalloc_assert_gil(); - PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &global_memalloc_ctx.pymem_allocator_obj); memalloc_tb_deinit(); if (memlock_trylock(&g_memalloc_lock)) { @@ -406,8 +386,6 @@ iterevents_new(PyTypeObject* type, PyObject* Py_UNUSED(args), PyObject* Py_UNUSE return NULL; } - memalloc_assert_gil(); - /* Reset the current traceback list. Do this outside lock so we can track it, * and avoid reentrancy/deadlock problems, if we start tracking the raw * allocator domain */ diff --git a/ddtrace/profiling/collector/_memalloc_debug.h b/ddtrace/profiling/collector/_memalloc_debug.h deleted file mode 100644 index 0e2ad4653c3..00000000000 --- a/ddtrace/profiling/collector/_memalloc_debug.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _DDTRACE_MEMALLOC_DEBUG_H -#define _DDTRACE_MEMALLOC_DEBUG_H - -#include -#include -#include - -static const char* g_truthy_values[] = { "1", "true", "yes", "on", "enable", "enabled", NULL }; // NB the sentinel NULL - -static bool -memalloc_get_bool_env(char* key) -{ - char* val = getenv(key); - if (!val) { - return false; - } - for (int i = 0; g_truthy_values[i]; i++) { - if (strcmp(val, g_truthy_values[i]) == 0) { - return true; - } - } - return false; -} - -#endif diff --git a/ddtrace/profiling/collector/_memalloc_heap.c b/ddtrace/profiling/collector/_memalloc_heap.c index 1a3c3fb8cad..f0363c862d8 100644 --- a/ddtrace/profiling/collector/_memalloc_heap.c +++ b/ddtrace/profiling/collector/_memalloc_heap.c @@ -2,7 +2,6 @@ #include #define PY_SSIZE_T_CLEAN -#include "_memalloc_debug.h" #include "_memalloc_heap.h" #include "_memalloc_reentrant.h" #include "_memalloc_tb.h" @@ -80,7 +79,9 @@ typedef struct } freezer; } heap_tracker_t; -static char g_crash_on_mutex_pass_str[] = "_DD_PROFILING_MEMHEAP_CRASH_ON_MUTEX_PASS"; +/* This lock protects global_heap_tracker. See g_memalloc_lock docs for why this + * is needed, and why the GIL is not sufficient to protect our data structures + */ static memlock_t g_memheap_lock; static heap_tracker_t global_heap_tracker; @@ -120,9 +121,7 @@ __attribute__((constructor)) static void memheap_init() { - // Check if we should crash the process on mutex pass - bool crash_on_mutex_pass = memalloc_get_bool_env(g_crash_on_mutex_pass_str); - memlock_init(&g_memheap_lock, crash_on_mutex_pass); + memlock_init(&g_memheap_lock); #ifndef _WIN32 pthread_atfork(memheap_prefork, memheap_postfork_parent, memheap_postfork_child); #endif diff --git a/ddtrace/profiling/collector/_memalloc_reentrant.h b/ddtrace/profiling/collector/_memalloc_reentrant.h index 7e9c7989ad4..990134b5c0f 100644 --- a/ddtrace/profiling/collector/_memalloc_reentrant.h +++ b/ddtrace/profiling/collector/_memalloc_reentrant.h @@ -86,18 +86,13 @@ typedef struct #endif } memlock_t; -// Global setting; if a lock fails to be acquired, crash -static bool g_crash_on_mutex_pass = false; - // Generic initializer static inline bool -memlock_init(memlock_t* lock, bool crash_on_pass) +memlock_init(memlock_t* lock) { if (!lock) return false; - g_crash_on_mutex_pass = crash_on_pass; - #ifdef _WIN32 lock->mutex = CreateMutex(NULL, FALSE, NULL); return lock->mutex != NULL; @@ -137,13 +132,6 @@ memlock_trylock(memlock_t* lock) #else bool result = 0 == pthread_mutex_trylock(&lock->mutex); #endif - if (!result && g_crash_on_mutex_pass) { - // segfault - int* p = NULL; - *p = 0; - abort(); // should never reach here - } - return result; }