From 7ff0057d219ab3e5cb9ca8a4283b5199d73bb0aa Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Wed, 6 Mar 2024 12:49:49 +0000 Subject: [PATCH] fix(profiling): serialisable stack key (#8435) Ensure that the stack key used to group events is actually hashable. ## Testing strategy This is an attempt to mitigate #8218, for which we don't have a reproducer. Should the issue occur again, we should be able to act on any future reports with the extra context that this proposed change provides. ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. - [x] If change touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. ## Reviewer Checklist - [ ] Title is accurate - [ ] All changes are related to the pull request's stated goal - [ ] Description motivates each change - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [ ] Testing strategy adequately addresses listed risks - [ ] Change is maintainable (easy to change, telemetry, documentation) - [ ] Release note makes sense to a user of the library - [ ] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- ddtrace/profiling/exporter/pprof.pyx | 9 ++++++++- .../fix-profiling-stack-event-data-dc50c8e7663718cc.yaml | 5 +++++ 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/fix-profiling-stack-event-data-dc50c8e7663718cc.yaml diff --git a/ddtrace/profiling/exporter/pprof.pyx b/ddtrace/profiling/exporter/pprof.pyx index 79f40685fef..54cc85193b7 100644 --- a/ddtrace/profiling/exporter/pprof.pyx +++ b/ddtrace/profiling/exporter/pprof.pyx @@ -13,6 +13,7 @@ from ddtrace.internal import packages from ddtrace.internal._encoding import ListStringTable as _StringTable from ddtrace.internal.compat import ensure_text from ddtrace.internal.datadog.profiling.ddup.utils import sanitize_string +from ddtrace.internal.logger import get_logger from ddtrace.internal.utils import config from ddtrace.profiling import event from ddtrace.profiling import exporter @@ -23,6 +24,9 @@ from ddtrace.profiling.collector import stack_event from ddtrace.profiling.collector import threading +log = get_logger(__name__) + + if hasattr(typing, "TypedDict"): Package = typing.TypedDict( "Package", @@ -129,7 +133,10 @@ cdef groupby(object collection, object key): cdef dict groups = {} for item in collection: - groups.setdefault(key(item), []).append(item) + try: + groups.setdefault(key(item), []).append(item) + except Exception: + log.warning("Failed to group item %r", item, exc_info=True) return groups.items() diff --git a/releasenotes/notes/fix-profiling-stack-event-data-dc50c8e7663718cc.yaml b/releasenotes/notes/fix-profiling-stack-event-data-dc50c8e7663718cc.yaml new file mode 100644 index 00000000000..14acc2b8970 --- /dev/null +++ b/releasenotes/notes/fix-profiling-stack-event-data-dc50c8e7663718cc.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + profiling: handle unexpected stack data to prevent the profiler from + stopping.