diff --git a/ddtrace/profiling/collector/_memalloc.c b/ddtrace/profiling/collector/_memalloc.c index 5e61988cfc5..f9b8c31a71e 100644 --- a/ddtrace/profiling/collector/_memalloc.c +++ b/ddtrace/profiling/collector/_memalloc.c @@ -5,7 +5,6 @@ #define PY_SSIZE_T_CLEAN #include -#include "_memalloc_debug.h" #include "_memalloc_heap.h" #include "_memalloc_reentrant.h" #include "_memalloc_tb.h" @@ -43,19 +42,18 @@ static PyObject* object_string = NULL; #define ALLOC_TRACKER_MAX_COUNT UINT64_MAX -// The data coordination primitives in this and related files are related to a crash we started seeing. -// We don't have a precise understanding of the causal factors within the runtime that lead to this condition, -// since the GIL alone was sufficient in the past for preventing this issue. -// We add an option here to _add_ a crash, in order to observe this condition in a future diagnostic iteration. -// **This option is _intended_ to crash the Python process** do not use without a good reason! -static char g_crash_on_mutex_pass_str[] = "_DD_PROFILING_MEMALLOC_CRASH_ON_MUTEX_PASS"; -// The allocation profiler functions should (in theory) only be called when allocating Python -// objects, which should (in theory) only be done with the GIL held. We have reason to believe -// that this code is sometimes reached without the GIL held, since some crashes in the code -// seem to go away with our own locking. This debug flag will make the profiler crash if -// it detects the GIL is not held in places where we think it ought to be. -static char g_crash_on_no_gil_str[] = "_DD_PROFILING_MEMALLOC_CRASH_ON_NO_GIL"; -static bool g_crash_on_no_gil = false; +/* This lock protects access to global_alloc_tracker. The GIL is NOT sufficient + to protect our data structures from concurrent access. For one, the GIL is an + implementation detail and may go away in the future. Additionally, even if the + GIL is held on _entry_ to our C extension functions, making it safe to call + Python C API functions, the GIL can be released during Python C API calls if + we call back into interpreter code. This can happen if we allocate a Python + object (such as frame info), trigger garbage collection, and run arbitrary + destructors. When this happens, other threads can run python code, such as the + thread that aggregates and uploads the profile data and mutates the global + data structures. The GIL does not create critical sections for C extension + functions! + */ static memlock_t g_memalloc_lock; static alloc_tracker_t* global_alloc_tracker; @@ -98,31 +96,15 @@ __attribute__((constructor)) static void memalloc_init() { - // Check if we should crash the process on mutex pass - bool crash_on_mutex_pass = memalloc_get_bool_env(g_crash_on_mutex_pass_str); - memlock_init(&g_memalloc_lock, crash_on_mutex_pass); + memlock_init(&g_memalloc_lock); #ifndef _WIN32 pthread_atfork(memalloc_prefork, memalloc_postfork_parent, memalloc_postfork_child); #endif - - g_crash_on_no_gil = memalloc_get_bool_env(g_crash_on_no_gil_str); -} - -static void -memalloc_assert_gil() -{ - if (g_crash_on_no_gil && !PyGILState_Check()) { - int* p = NULL; - *p = 0; - abort(); // should never reach here - } } static void memalloc_add_event(memalloc_context_t* ctx, void* ptr, size_t size) { - memalloc_assert_gil(); - uint64_t alloc_count = atomic_add_clamped(&global_alloc_tracker->alloc_count, 1, ALLOC_TRACKER_MAX_COUNT); /* Return if we've reached the maximum number of allocations */ @@ -345,8 +327,6 @@ memalloc_stop(PyObject* Py_UNUSED(module), PyObject* Py_UNUSED(args)) return NULL; } - memalloc_assert_gil(); - PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &global_memalloc_ctx.pymem_allocator_obj); memalloc_tb_deinit(); if (memlock_trylock(&g_memalloc_lock)) { @@ -406,8 +386,6 @@ iterevents_new(PyTypeObject* type, PyObject* Py_UNUSED(args), PyObject* Py_UNUSE return NULL; } - memalloc_assert_gil(); - /* Reset the current traceback list. Do this outside lock so we can track it, * and avoid reentrancy/deadlock problems, if we start tracking the raw * allocator domain */ diff --git a/ddtrace/profiling/collector/_memalloc_debug.h b/ddtrace/profiling/collector/_memalloc_debug.h deleted file mode 100644 index 0e2ad4653c3..00000000000 --- a/ddtrace/profiling/collector/_memalloc_debug.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _DDTRACE_MEMALLOC_DEBUG_H -#define _DDTRACE_MEMALLOC_DEBUG_H - -#include -#include -#include - -static const char* g_truthy_values[] = { "1", "true", "yes", "on", "enable", "enabled", NULL }; // NB the sentinel NULL - -static bool -memalloc_get_bool_env(char* key) -{ - char* val = getenv(key); - if (!val) { - return false; - } - for (int i = 0; g_truthy_values[i]; i++) { - if (strcmp(val, g_truthy_values[i]) == 0) { - return true; - } - } - return false; -} - -#endif diff --git a/ddtrace/profiling/collector/_memalloc_heap.c b/ddtrace/profiling/collector/_memalloc_heap.c index 1a3c3fb8cad..f0363c862d8 100644 --- a/ddtrace/profiling/collector/_memalloc_heap.c +++ b/ddtrace/profiling/collector/_memalloc_heap.c @@ -2,7 +2,6 @@ #include #define PY_SSIZE_T_CLEAN -#include "_memalloc_debug.h" #include "_memalloc_heap.h" #include "_memalloc_reentrant.h" #include "_memalloc_tb.h" @@ -80,7 +79,9 @@ typedef struct } freezer; } heap_tracker_t; -static char g_crash_on_mutex_pass_str[] = "_DD_PROFILING_MEMHEAP_CRASH_ON_MUTEX_PASS"; +/* This lock protects global_heap_tracker. See g_memalloc_lock docs for why this + * is needed, and why the GIL is not sufficient to protect our data structures + */ static memlock_t g_memheap_lock; static heap_tracker_t global_heap_tracker; @@ -120,9 +121,7 @@ __attribute__((constructor)) static void memheap_init() { - // Check if we should crash the process on mutex pass - bool crash_on_mutex_pass = memalloc_get_bool_env(g_crash_on_mutex_pass_str); - memlock_init(&g_memheap_lock, crash_on_mutex_pass); + memlock_init(&g_memheap_lock); #ifndef _WIN32 pthread_atfork(memheap_prefork, memheap_postfork_parent, memheap_postfork_child); #endif diff --git a/ddtrace/profiling/collector/_memalloc_reentrant.h b/ddtrace/profiling/collector/_memalloc_reentrant.h index 7e9c7989ad4..990134b5c0f 100644 --- a/ddtrace/profiling/collector/_memalloc_reentrant.h +++ b/ddtrace/profiling/collector/_memalloc_reentrant.h @@ -86,18 +86,13 @@ typedef struct #endif } memlock_t; -// Global setting; if a lock fails to be acquired, crash -static bool g_crash_on_mutex_pass = false; - // Generic initializer static inline bool -memlock_init(memlock_t* lock, bool crash_on_pass) +memlock_init(memlock_t* lock) { if (!lock) return false; - g_crash_on_mutex_pass = crash_on_pass; - #ifdef _WIN32 lock->mutex = CreateMutex(NULL, FALSE, NULL); return lock->mutex != NULL; @@ -137,13 +132,6 @@ memlock_trylock(memlock_t* lock) #else bool result = 0 == pthread_mutex_trylock(&lock->mutex); #endif - if (!result && g_crash_on_mutex_pass) { - // segfault - int* p = NULL; - *p = 0; - abort(); // should never reach here - } - return result; }