diff --git a/.gitignore b/.gitignore index 9e66a75cb8a..f93c439edba 100644 --- a/.gitignore +++ b/.gitignore @@ -13,8 +13,7 @@ ddtrace/profiling/collector/_traceback.c ddtrace/profiling/collector/stack.c ddtrace/profiling/exporter/pprof.c ddtrace/profiling/_build.c -ddtrace/internal/datadog/profiling/ddup.cpp -ddtrace/internal/datadog/profiling/_ddup.cpp +ddtrace/internal/datadog/profiling/ddup/_ddup.cpp ddtrace/internal/_encoding.c ddtrace/internal/_rand.c ddtrace/internal/_tagset.c diff --git a/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake new file mode 100644 index 00000000000..da2584a08d1 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/cmake/AnalysisFunc.cmake @@ -0,0 +1,31 @@ +include(CheckIPOSupported) + +function(add_ddup_config target) + target_compile_options(${target} PRIVATE + "$<$:-Og;-ggdb3>" + "$<$:-Os>" + -ffunction-sections -fno-semantic-interposition -Wall -Werror -Wextra -Wshadow -Wnon-virtual-dtor -Wold-style-cast + ) + target_link_options(${target} PRIVATE + "$<$:-s>" + -Wl,--as-needed -Wl,-Bsymbolic-functions -Wl,--gc-sections + ) + set_property(TARGET ${target} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) + check_ipo_supported(RESULT result) + if (result) + set_property(TARGET ${target} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) + endif() + + # Propagate sanitizers + if (SANITIZE_OPTIONS) + # Some sanitizers (or the analysis--such as symbolization--tooling thereof) work better with frame + # pointers, so we include it here. + target_compile_options(${target} PRIVATE -fsanitize=${SANITIZE_OPTIONS} -fno-omit-frame-pointer) + target_link_options(${target} PRIVATE -fsanitize=${SANITIZE_OPTIONS}) + endif() + + # If DO_FANALYZER is specified and we're using gcc, then we can use -fanalyzer + if (DO_FANALYZER AND CMAKE_CXX_COMPILER_ID MATCHES "GNU") + target_compile_options(${target} PRIVATE -fanalyzer) + endif() +endfunction() diff --git a/ddtrace/internal/datadog/profiling/cmake/FindCppcheck.cmake b/ddtrace/internal/datadog/profiling/cmake/FindCppcheck.cmake index 458d0c824e2..b215e1dfad2 100644 --- a/ddtrace/internal/datadog/profiling/cmake/FindCppcheck.cmake +++ b/ddtrace/internal/datadog/profiling/cmake/FindCppcheck.cmake @@ -1,3 +1,8 @@ +# Only add this project if it doesn't already exist +if (TARGET cppcheck_project) + return() +endif() + include(ExternalProject) # Build cppcheck from sources @@ -17,10 +22,20 @@ endif() # This function will add a cppcheck target for a given directory # unless DO_CPPCHECK is set to false, then it will add a target that does nothing -function(add_cppcheck_target NAME DIRECTORY) +function(add_cppcheck_target) + # Parse additional arguments as lists + set(options) + set(oneValueArgs TARGET) + set(multiValueArgs INCLUDE SRC) + cmake_parse_arguments(PARSE_ARGV 0 ARG "${options}" "${oneValueArgs}" "${multiValueArgs}") + + # Automatically generate the cppcheck target name + set(NAME "cppcheck_dd_${ARGV0}") + if (DO_CPPCHECK) - add_custom_target("${NAME}" - COMMAND ${CPPCHECK_EXECUTABLE} + # Initialize command variable + set(cppcheck_cmd + ${CPPCHECK_EXECUTABLE} --enable=all --addon=threadsafety.py --addon=misc @@ -31,18 +46,31 @@ function(add_cppcheck_target NAME DIRECTORY) --suppress=missingIncludeSystem --inline-suppr --error-exitcode=1 - -I ${CMAKE_SOURCE_DIR}/include - -I ${Datadog_INCLUDE_DIRS} - ${CMAKE_SOURCE_DIR}/src - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - COMMENT "Running cppcheck" ) - add_dependencies(cppcheck_runner "${NAME}") + + # Append include directories to the command + foreach(INCLUDE_DIR ${ARG_INCLUDE}) + list(APPEND cppcheck_cmd -I ${INCLUDE_DIR}) + endforeach() + + # Append source directories/files to the command + foreach(SRC_FILE ${ARG_SRC}) + list(APPEND cppcheck_cmd ${SRC_FILE}) + endforeach() + + # Define the custom target with the constructed command + add_custom_target(${NAME} + COMMAND ${cppcheck_cmd} + COMMENT "Running cppcheck on ${ARGV0}" + ) + + # Make the cppcheck target a dependent of the specified target + add_dependencies(${ARGV0} ${NAME}) else() - # Need to make this function work, but kinda do nothing - add_custom_target("${NAME}" - COMMAND echo "building ${NAME} without cppcheck" - COMMENT "cppcheck is disabled" + # Define a do-nothing target if cppcheck is disabled + add_custom_target(${NAME} + COMMAND echo "Cppcheck target ${NAME} is disabled." + COMMENT "cppcheck is disabled for ${ARGV0}" ) endif() endfunction() diff --git a/ddtrace/internal/datadog/profiling/cmake/FindLibdatadog.cmake b/ddtrace/internal/datadog/profiling/cmake/FindLibdatadog.cmake index cb327c187cc..b632f1a84fd 100644 --- a/ddtrace/internal/datadog/profiling/cmake/FindLibdatadog.cmake +++ b/ddtrace/internal/datadog/profiling/cmake/FindLibdatadog.cmake @@ -1,7 +1,14 @@ +# Only add this project if Datadog::Profiling is not already defined +if(TARGET Datadog::Profiling) + return() +endif() + +include(ExternalProject) set(TAG_LIBDATADOG "v5.0.0" CACHE STRING "libdatadog github tag") +set(Datadog_BUILD_DIR ${CMAKE_BINARY_DIR}/libdatadog) set(Datadog_ROOT ${Datadog_BUILD_DIR}/libdatadog-${TAG_LIBDATADOG}) message(STATUS "${CMAKE_CURRENT_LIST_DIR}/tools/fetch_libdatadog.sh ${TAG_LIBDATADOG} ${Datadog_ROOT}") @@ -15,7 +22,7 @@ set(Datadog_DIR "${Datadog_ROOT}/cmake") # Prefer static library to shared library set(CMAKE_FIND_LIBRARY_SUFFIXES_BACKUP ${CMAKE_FIND_LIBRARY_SUFFIXES}) -set(CMAKE_FIND_LIBRARY_SUFFIXES .a .so) +set(CMAKE_FIND_LIBRARY_SUFFIXES .a) find_package(Datadog REQUIRED) diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt index 90ad5cccb39..9013774b7a5 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/CMakeLists.txt @@ -4,37 +4,19 @@ project(dd_wrapper LANGUAGES CXX ) -# Infer some basic properties about the build. This is necessary because multiple -# extensions reuse this cmake file, so we need its artifacts to go in a consistent -# place -get_filename_component(dd_wrapper_BUILD_PARENT "${CMAKE_BINARY_DIR}/../.." ABSOLUTE) -set(dd_wrapper_BUILD_DIR "${dd_wrapper_BUILD_PARENT}/ddtrace.internal.datadog.profiling") +# Build in a predictable location. This is needed for setup.py +get_filename_component(dd_wrapper_BUILD_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../ddtrace.internal.datadog.profiling" ABSOLUTE) # Custom modules are in the parent directory list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") -# Setup state for FindLibdatadog -set(Datadog_BUILD_DIR "${dd_wrapper_BUILD_PARENT}/libdatadog") - # Includes -include(CMakePackageConfigHelpers) # For generating the config file include(FetchContent) include(ExternalProject) include(FindLibdatadog) +include(AnalysisFunc) include(FindCppcheck) -# Send some of these vars back up to the parent -if (DD_WRAPPER_IS_SUBPROJECT) - set(Datadog_INCLUDE_DIRS "${Datadog_INCLUDE_DIRS}" PARENT_SCOPE) - set(dd_wrapper_BUILD_DIR "${dd_wrapper_BUILD_PARENT}/ddtrace.internal.datadog.profiling" PARENT_SCOPE) -endif() - -add_compile_options( - "$<$:-Og;-ggdb3>" - "$<$:-Os -s>" - -fno-semantic-interposition -Wall -Werror -Wextra -Wshadow -Wnon-virtual-dtor -Wold-style-cast -) - # Library sources add_library(dd_wrapper SHARED src/uploader_builder.cpp @@ -45,19 +27,17 @@ add_library(dd_wrapper SHARED src/interface.cpp ) +# Add common configuration flags +add_ddup_config(dd_wrapper) + # At present, C++17 is chosen as the minimum standard. This conflicts with the # manylinux2014 standard upon which we must rely. We'll have to statically link # libstdc++ to avoid this conflict, but need to remain mindful of symbol visibility # and overall binary size. target_compile_features(dd_wrapper PUBLIC cxx_std_17) -# Set some linker options -target_compile_options(dd_wrapper PRIVATE -fno-semantic-interposition) -target_compile_options(dd_wrapper PRIVATE -ffunction-sections -fdata-sections) +# Statically link libstdc++ to avoid manylinux2014 conflicts target_link_options(dd_wrapper PRIVATE -static-libstdc++) -target_link_options(dd_wrapper PRIVATE -Wl,-Bsymbolic-functions) -target_link_options(dd_wrapper PRIVATE -Wl,--gc-sections) -set_property(TARGET dd_wrapper PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) target_include_directories(dd_wrapper PRIVATE include @@ -68,35 +48,22 @@ target_link_libraries(dd_wrapper PRIVATE ) set_target_properties(dd_wrapper PROPERTIES POSITION_INDEPENDENT_CODE ON) -set_target_properties(dd_wrapper - PROPERTIES - LIBRARY_OUTPUT_DIRECTORY "${dd_wrapper_BUILD_DIR}" -) - -# If DO_FANALYZER is specified and we're using gcc, then we can use -fanalyzer -if (DO_FANALYZER AND CMAKE_CXX_COMPILER_ID MATCHES "GNU") - target_compile_options(dd_wrapper PRIVATE -fanalyzer) -endif() - -# If DO_CPPCHECK is specified, then we can use cppcheck -add_cppcheck_target(cppcheck_dd_wrapper ${CMAKE_CURRENT_SOURCE_DIR}) - -# Propagate sanitizers -if (SANITIZE_OPTIONS) - # Some sanitizers (or the analysis--such as symbolization--tooling thereof) work better with frame - # pointers, so we include it here. - target_compile_options(dd_wrapper PRIVATE -fsanitize=${SANITIZE_OPTIONS} -fno-omit-frame-pointer) - target_link_options(dd_wrapper PRIVATE -fsanitize=${SANITIZE_OPTIONS}) +# If LIB_INSTALL_DIR is set, install the library. +# Install one directory up--both ddup and stackv2 are set to the same relative level. +if (LIB_INSTALL_DIR) + install(TARGETS dd_wrapper + LIBRARY DESTINATION ${LIB_INSTALL_DIR}/.. + ARCHIVE DESTINATION ${LIB_INSTALL_DIR}/.. + RUNTIME DESTINATION ${LIB_INSTALL_DIR}/.. + ) endif() -# If dd_wrapper_INSTALL_DIR is specified by the parent, use it -if (NOT dd_wrapper_INSTALL_DIR) - # If not, then just keep it in the build directory - set(dd_wrapper_INSTALL_DIR "${CMAKE_BINARY_DIR}/lib") -endif() -message(STATUS "dd_wrapper_INSTALL_DIR: ${dd_wrapper_INSTALL_DIR}") -install(TARGETS dd_wrapper - DESTINATION ${dd_wrapper_INSTALL_DIR} +# Configure cppcheck +add_cppcheck_target(dd_wrapper + DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/include + ${Datadog_INCLUDE_DIRS} + SRC ${CMAKE_CURRENT_SOURCE_DIR}/src ) # Add the tests diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/include/interface.hpp b/ddtrace/internal/datadog/profiling/dd_wrapper/include/interface.hpp index 10baa7e93ed..0018a4b5e15 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/include/interface.hpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/include/interface.hpp @@ -2,6 +2,7 @@ #include #include +#include // Forward decl of the return pointer namespace Datadog { @@ -12,21 +13,21 @@ class Sample; extern "C" { #endif - void ddup_config_env(const char* dd_env); - void ddup_config_service(const char* service); - void ddup_config_version(const char* version); - void ddup_config_runtime(const char* runtime); - void ddup_config_runtime_version(const char* runtime_version); - void ddup_config_profiler_version(const char* profiler_version); - void ddup_config_url(const char* url); + void ddup_config_env(std::string_view dd_env); + void ddup_config_service(std::string_view service); + void ddup_config_version(std::string_view version); + void ddup_config_runtime(std::string_view runtime); + void ddup_config_runtime_version(std::string_view runtime_version); + void ddup_config_profiler_version(std::string_view profiler_version); + void ddup_config_url(std::string_view url); void ddup_config_max_nframes(int max_nframes); - void ddup_config_user_tag(const char* key, const char* val); + void ddup_config_user_tag(std::string_view key, std::string_view val); void ddup_config_sample_type(unsigned int type); bool ddup_is_initialized(); void ddup_init(); - void ddup_set_runtime_id(const char* id, size_t sz); + void ddup_set_runtime_id(std::string_view id); bool ddup_upload(); // Proxy functions to the underlying sample @@ -37,22 +38,22 @@ extern "C" void ddup_push_release(Datadog::Sample* sample, int64_t release_time, int64_t count); void ddup_push_alloc(Datadog::Sample* sample, uint64_t size, uint64_t count); void ddup_push_heap(Datadog::Sample* sample, uint64_t size); - void ddup_push_lock_name(Datadog::Sample* sample, const char* lock_name); + void ddup_push_lock_name(Datadog::Sample* sample, std::string_view lock_name); void ddup_push_threadinfo(Datadog::Sample* sample, int64_t thread_id, int64_t thread_native_id, - const char* thread_name); + std::string_view thread_name); void ddup_push_task_id(Datadog::Sample* sample, int64_t task_id); - void ddup_push_task_name(Datadog::Sample* sample, const char* task_name); + void ddup_push_task_name(Datadog::Sample* sample, std::string_view task_name); void ddup_push_span_id(Datadog::Sample* sample, int64_t span_id); void ddup_push_local_root_span_id(Datadog::Sample* sample, int64_t local_root_span_id); - void ddup_push_trace_type(Datadog::Sample* sample, const char* trace_type); - void ddup_push_trace_resource_container(Datadog::Sample* sample, const char* trace_resource_container); - void ddup_push_exceptioninfo(Datadog::Sample* sample, const char* exception_type, int64_t count); - void ddup_push_class_name(Datadog::Sample* sample, const char* class_name); + void ddup_push_trace_type(Datadog::Sample* sample, std::string_view trace_type); + void ddup_push_trace_resource_container(Datadog::Sample* sample, std::string_view trace_resource_container); + void ddup_push_exceptioninfo(Datadog::Sample* sample, std::string_view exception_type, int64_t count); + void ddup_push_class_name(Datadog::Sample* sample, std::string_view class_name); void ddup_push_frame(Datadog::Sample* sample, - const char* _name, - const char* _filename, + std::string_view _name, + std::string_view _filename, uint64_t address, int64_t line); void ddup_flush_sample(Datadog::Sample* sample); diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/include/sample.hpp b/ddtrace/internal/datadog/profiling/dd_wrapper/include/sample.hpp index da65416c240..c70b831847b 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/include/sample.hpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/include/sample.hpp @@ -39,9 +39,6 @@ class Sample std::vector values = {}; public: - // Initialization and stuff - void start_sample(); - // Helpers bool push_label(ExportLabelKey key, std::string_view val); bool push_label(ExportLabelKey key, int64_t val); diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/setup_custom.sh b/ddtrace/internal/datadog/profiling/dd_wrapper/setup_custom.sh deleted file mode 100755 index 255747651e3..00000000000 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/setup_custom.sh +++ /dev/null @@ -1,172 +0,0 @@ -#!/bin/bash - -# This script is used to build the library independently of the setup.py -# build system. This is mostly used to integrate different analysis tools -# which I haven't promoted into our normal builds yet. - - -### Compiler discovery -# Initialize variables to store the highest versions -highest_gcc="" -highest_gxx="" -highest_clang="" -highest_clangxx="" - -# Function to find the highest version of compilers -find_highest_compiler_version() { - local base_name=$1 - local highest_var_name=$2 - local highest_version=0 - - for version in {20..5}; do - if command -v "${base_name}-${version}" &> /dev/null; then - if [ $version -gt $highest_version ]; then - highest_version=$version - eval "$highest_var_name=${base_name}-${version}" - fi - fi - done - - # Check for the base version if no numbered version was found - if [ $highest_version -eq 0 ] && command -v "$base_name" &> /dev/null; then - eval "$highest_var_name=$base_name" - fi -} - -# Find highest versions for each compiler -find_highest_compiler_version gcc highest_gcc -find_highest_compiler_version g++ highest_gxx -find_highest_compiler_version clang highest_clang -find_highest_compiler_version clang++ highest_clangxx - -### Build setup -SANITIZE_OPTIONS="" # passed directly to cmake -SAFETY_OPTIONS="address,leak,undefined" -THREAD_OPTIONS="thread" -NUMERICAL_OPTIONS="integer,nullability,signed-integer-overflow,bounds,float-divide-by-zero" -DATAFLOW_OPTIONS="dataflow" -MEMORY_OPTIONS="memory" -ANALYZE_OPTIONS="-fanalyzer" - -# helper function for setting the compiler -# Final cmake args -cmake_args=( - -DCMAKE_EXPORT_COMPILE_COMMANDS=ON - -DCMAKE_VERBOSE_MAKEFILE=ON - -DBUILD_TESTING=ON -) - -set_clang() { - export CC=$highest_clang - export CXX=$highest_clangxx - cmake_args+=( - -DCMAKE_C_COMPILER=$CC - -DCMAKE_CXX_COMPILER=$CXX - ) -} - -set_gcc() { - export CC=$highest_gcc - export CXX=$highest_gxx - cmake_args+=( - -DCMAKE_C_COMPILER=$CC - -DCMAKE_CXX_COMPILER=$CXX - ) -} - - - -# Check input -if [ -n "$1" ]; then - case "$1" in - -h|--help) - echo "Usage: $0 [options] [build_mode]" - echo "Options (one of)" - echo " -h, --help Show this help message and exit" - echo " -s, --safety Clang + fsanitize=$SAFETY_OPTIONS" - echo " -t, --thread Clang + fsanitize=$THREAD_OPTIONS" - echo " -n, --numerical Clang + fsanitize=$NUMERICAL_OPTIONS" - echo " -d, --dataflow Clang + fsanitize=$DATAFLOW_OPTIONS" - echo " -m --memory Clang + fsanitize=$MEMORY_OPTIONS" - echo " -C --cppcheck Clang + cppcheck" - echo " -f, --fanalyze GCC + -fanalyzer" - echo " -c, --clang Clang (alone)" - echo " -g, --gcc GCC (alone)" - echo " -- Don't do anything special" - echo "" - echo "Build Modes:" - echo " Debug (default)" - echo " Release" - echo " RelWithDebInfo" - echo "" - echo "(any possible others, depending on what cmake supports for" - echo "BUILD_TYPE out of the box)" - echo "" - echo "-------------------------------------------------" - echo "Diagnostic parameters" - echo "Highest gcc: $highest_gcc" - echo "Highest g++: $highest_gxx" - echo "Highest Clang: $highest_clang" - echo "Highest Clang++: $highest_clangxx" - exit 0 - ;; - -s|--safety) - cmake_args+=(-DSANITIZE_OPTIONS=$SAFETY_OPTIONS) - set_clang - ;; - -t|--thread) - cmake_args+=(-DSANITIZE_OPTIONS=$THREAD_OPTIONS) - set_clang - ;; - -n|--numerical) - cmake_args+=(-DSANITIZE_OPTIONS=$NUMERICAL_OPTIONS) - set_clang - ;; - -d|--dataflow) - cmake_args+=(-DSANITIZE_OPTIONS=$DATAFLOW_OPTIONS) - set_clang - ;; - -m|--memory) - cmake_args+=(-DSANITIZE_OPTIONS=$MEMORY_OPTIONS) - set_clang - ;; - -C|--cppcheck) - cmake_args+=(-DDO_CPPCHECK=ON) - set_clang - ;; - -f|--fanalyze) - cmake_args+=(-DDO_FANALYZE=ON) - set_gcc - ;; - -c|--clang) - set_clang - ;; - -g|--gcc) - set_gcc - ;; - --) - ;; - *) - echo "Unknown option: $1" - exit 1 - ;; - esac -else - set_gcc -fi - -# If there are two arguments, override build mode -BUILD_MODE=${2:-Debug} -cmake_args+=(-DCMAKE_BUILD_TYPE=$BUILD_MODE) - -# Setup cmake stuff -BUILD_DIR="build" -mkdir -p $BUILD_DIR && cd $BUILD_DIR || { echo "Failed to create build directory"; exit 1; } - -# Run cmake -cmake "${cmake_args[@]}" .. || { echo "cmake failed"; exit 1; } -cmake --build . || { echo "build failed"; exit 1; } -if [[ "$SANITIZE_OPTIONS" == "cppcheck" ]]; then - make cppcheck_run || { echo "cppcheck failed"; exit 1; } -fi -ctest --output-on-failure || { echo "tests failed!"; exit 1; } diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/src/interface.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/src/interface.cpp index 254ca8a4124..647a9eb192a 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/src/interface.cpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/src/interface.cpp @@ -40,73 +40,57 @@ ddup_prefork() // Configuration void -ddup_config_env(const char* dd_env) +ddup_config_env(std::string_view dd_env) { - if (dd_env) { - Datadog::UploaderBuilder::set_env(dd_env); - } + Datadog::UploaderBuilder::set_env(dd_env); } void -ddup_config_service(const char* service) +ddup_config_service(std::string_view service) { - if (service) { - Datadog::UploaderBuilder::set_service(service); - } + Datadog::UploaderBuilder::set_service(service); } void -ddup_config_version(const char* version) +ddup_config_version(std::string_view version) { - if (version) { - Datadog::UploaderBuilder::set_version(version); - } + Datadog::UploaderBuilder::set_version(version); } void -ddup_config_runtime(const char* runtime) +ddup_config_runtime(std::string_view runtime) { - if (runtime) { - Datadog::UploaderBuilder::set_runtime(runtime); - } + Datadog::UploaderBuilder::set_runtime(runtime); } void -ddup_config_runtime_version(const char* runtime_version) +ddup_config_runtime_version(std::string_view runtime_version) { - if (runtime_version) { - Datadog::UploaderBuilder::set_runtime_version(runtime_version); - } + Datadog::UploaderBuilder::set_runtime_version(runtime_version); } void -ddup_config_profiler_version(const char* profiler_version) +ddup_config_profiler_version(std::string_view profiler_version) { - if (profiler_version) { - Datadog::UploaderBuilder::set_profiler_version(profiler_version); - } + Datadog::UploaderBuilder::set_profiler_version(profiler_version); } void -ddup_config_url(const char* url) +ddup_config_url(std::string_view url) { - if (url) { - Datadog::UploaderBuilder::set_url(url); - } + Datadog::UploaderBuilder::set_url(url); } void -ddup_config_user_tag(const char* key, const char* val) +ddup_config_user_tag(std::string_view key, std::string_view val) { - if (key && val) { - Datadog::UploaderBuilder::set_tag(key, val); - } + Datadog::UploaderBuilder::set_tag(key, val); } void -ddup_set_runtime_id(const char* id, size_t sz) +ddup_set_runtime_id(std::string_view id) { - Datadog::UploaderBuilder::set_runtime_id(std::string_view(id, sz)); + Datadog::UploaderBuilder::set_runtime_id(id); } void @@ -193,21 +177,15 @@ ddup_push_heap(Datadog::Sample* sample, uint64_t size) } void -ddup_push_lock_name(Datadog::Sample* sample, const char* lock_name) +ddup_push_lock_name(Datadog::Sample* sample, std::string_view lock_name) { - if (lock_name) { - sample->push_lock_name(lock_name); - } + sample->push_lock_name(lock_name); } void -ddup_push_threadinfo(Datadog::Sample* sample, int64_t thread_id, int64_t thread_native_id, const char* thread_name) +ddup_push_threadinfo(Datadog::Sample* sample, int64_t thread_id, int64_t thread_native_id, std::string_view thread_name) { - if (thread_name) { - sample->push_threadinfo(thread_id, thread_native_id, thread_name); - } else { - sample->push_threadinfo(thread_id, thread_native_id, ""); - } + sample->push_threadinfo(thread_id, thread_native_id, thread_name); } void @@ -217,11 +195,9 @@ ddup_push_task_id(Datadog::Sample* sample, int64_t task_id) } void -ddup_push_task_name(Datadog::Sample* sample, const char* task_name) +ddup_push_task_name(Datadog::Sample* sample, std::string_view task_name) { - if (task_name) { - sample->push_task_name(task_name); - } + sample->push_task_name(task_name); } void @@ -237,43 +213,37 @@ ddup_push_local_root_span_id(Datadog::Sample* sample, int64_t local_root_span_id } void -ddup_push_trace_type(Datadog::Sample* sample, const char* trace_type) +ddup_push_trace_type(Datadog::Sample* sample, std::string_view trace_type) { - if (trace_type) { - sample->push_trace_type(trace_type); - } + sample->push_trace_type(trace_type); } void -ddup_push_trace_resource_container(Datadog::Sample* sample, const char* trace_resource_container) +ddup_push_trace_resource_container(Datadog::Sample* sample, std::string_view trace_resource_container) { - if (trace_resource_container) { - sample->push_trace_resource_container(trace_resource_container); - } + sample->push_trace_resource_container(trace_resource_container); } void -ddup_push_exceptioninfo(Datadog::Sample* sample, const char* exception_type, int64_t count) +ddup_push_exceptioninfo(Datadog::Sample* sample, std::string_view exception_type, int64_t count) { - if (exception_type) { - sample->push_exceptioninfo(exception_type, count); - } + sample->push_exceptioninfo(exception_type, count); } void -ddup_push_class_name(Datadog::Sample* sample, const char* class_name) +ddup_push_class_name(Datadog::Sample* sample, std::string_view class_name) { - if (class_name) { - sample->push_class_name(class_name); - } + sample->push_class_name(class_name); } void -ddup_push_frame(Datadog::Sample* sample, const char* _name, const char* _filename, uint64_t address, int64_t line) +ddup_push_frame(Datadog::Sample* sample, + std::string_view _name, + std::string_view _filename, + uint64_t address, + int64_t line) { - if (_name && _filename) { - sample->push_frame(_name, _filename, address, line); - } + sample->push_frame(_name, _filename, address, line); } void diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample.cpp index f2051eb1516..40f5792696b 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample.cpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample.cpp @@ -16,10 +16,6 @@ Sample::Sample(SampleType _type_mask, unsigned int _max_nframes) locations.reserve(max_nframes + 1); // +1 for a "truncated frames" virtual frame } -void -Sample::start_sample() -{} - void Sample::profile_clear_state() { @@ -54,8 +50,9 @@ Sample::push_frame(std::string_view name, std::string_view filename, uint64_t ad if (locations.size() <= max_nframes) { push_frame_impl(name, filename, address, line); + } else { + ++dropped_frames; } - ++dropped_frames; } bool diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample_manager.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample_manager.cpp index 6841368b75b..87fbf25d8e1 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample_manager.cpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/src/sample_manager.cpp @@ -27,9 +27,7 @@ SampleManager::set_max_nframes(unsigned int _max_nframes) Sample* SampleManager::start_sample() { - auto sample = new Sample(type_mask, max_nframes); - sample->start_sample(); - return sample; + return new Sample(type_mask, max_nframes); } void diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/src/uploader_builder.cpp b/ddtrace/internal/datadog/profiling/dd_wrapper/src/uploader_builder.cpp index d5affa66df4..8c8496e7a40 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/src/uploader_builder.cpp +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/src/uploader_builder.cpp @@ -114,9 +114,11 @@ UploaderBuilder::build() }; for (const auto& [tag, data] : tag_data) { - std::string errmsg; - if (!add_tag(tags, tag, data, errmsg)) { - reasons.push_back(std::string(to_string(tag)) + ": " + errmsg); + if (!data.empty()) { + std::string errmsg; + if (!add_tag(tags, tag, data, errmsg)) { + reasons.push_back(std::string(to_string(tag)) + ": " + errmsg); + } } } @@ -131,7 +133,7 @@ UploaderBuilder::build() // If any mistakes were made, report on them now and throw if (!reasons.empty()) { ddog_Vec_Tag_drop(tags); - throw std::runtime_error("Error initializing exporter: missing or bad configuration: " + join(reasons, ", ")); + throw std::runtime_error("Error initializing exporter, missing or bad configuration: " + join(reasons, ", ")); } // If we're here, the tags are good, so we can initialize the exporter diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/test/CMakeLists.txt b/ddtrace/internal/datadog/profiling/dd_wrapper/test/CMakeLists.txt index a2690c28c53..6163229a5df 100644 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/test/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/dd_wrapper/test/CMakeLists.txt @@ -8,6 +8,7 @@ set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) set(INSTALL_GTEST OFF CACHE BOOL "" FORCE) FetchContent_MakeAvailable(googletest) include(GoogleTest) +include(AnalysisFunc) function(dd_wrapper_add_test name) add_executable(${name} ${ARGN}) @@ -18,23 +19,11 @@ function(dd_wrapper_add_test name) gtest_main dd_wrapper ) + add_ddup_config(dd_wrapper) - if (SANITIZE_OPTIONS) - if(NOT (SANITIZE_OPTIONS MATCHES "fanalyzer" OR SANITIZE_OPTIONS MATCHES "cppcheck")) - target_compile_options(${name} PRIVATE -fsanitize=${SANITIZE_OPTIONS}) - target_link_options(${name} PRIVATE -fsanitize=${SANITIZE_OPTIONS}) - endif() - endif() gtest_discover_tests(${name}) endfunction() -# Shouldn't be necessary, but whatever -add_compile_options( - "$<$:-Og;-ggdb3;-fno-omit-frame-pointer>" - "$<$:-O3>" - -Wall -Werror -Wextra -Wshadow -Wnon-virtual-dtor -Wold-style-cast -) - # Add the tests dd_wrapper_add_test(initialization initialization.cpp diff --git a/ddtrace/internal/datadog/profiling/CMakeLists.txt b/ddtrace/internal/datadog/profiling/ddup/CMakeLists.txt similarity index 53% rename from ddtrace/internal/datadog/profiling/CMakeLists.txt rename to ddtrace/internal/datadog/profiling/ddup/CMakeLists.txt index 0570b9ab471..9a59d9344a0 100644 --- a/ddtrace/internal/datadog/profiling/CMakeLists.txt +++ b/ddtrace/internal/datadog/profiling/ddup/CMakeLists.txt @@ -5,44 +5,59 @@ cmake_minimum_required(VERSION 3.19) # setup.py to build the wheel. Otherwise, we have to propagate a lot of state all around. # Thus, we use the same name as the Python package (i.e., the caller sets EXTENSION_NAME) # ddup is used by a default for standalone/test builds. -set(EXTENSION_NAME "ddup" CACHE STRING "Name of the extension") +set(EXTENSION_NAME "_ddup" CACHE STRING "Name of the extension") project(${EXTENSION_NAME}) +message(STATUS "Building extension: ${EXTENSION_NAME}") + +# Get the cmake modules for this project +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/../cmake") # Includes include(FetchContent) include(ExternalProject) -include(FindPython3) +include(FindLibdatadog) -# Build the dd_wrapper -# Need to specify that we're calling it via add_subdirectory, so that it knows to -# propagate the right variables to its parent scope -set(DD_WRAPPER_IS_SUBPROJECT ON) -add_subdirectory(dd_wrapper "${LIB_INSTALL_DIR}") +# Technically, this should be its own project which we `include()`, but I don't +# want to deal with that when so many things may yet be factored differently. +add_subdirectory(../dd_wrapper ${CMAKE_CURRENT_BINARY_DIR}/../dd_wrapper_build) -# Find the Python interpreter -find_package(Python3 COMPONENTS Interpreter REQUIRED) +# Make sure we have necessary Python variables if (NOT Python3_INCLUDE_DIRS) message(FATAL_ERROR "Python3_INCLUDE_DIRS not found") endif() # This sets some parameters for the target build, which can only be defined by setup.py -set (ENV{PY_MAJOR_VERSION} ${PY_MAJOR_VERSION}) -set (ENV{PY_MINOR_VERSION} ${PY_MINOR_VERSION}) -set (ENV{PY_MICRO_VERSION} ${PY_MICRO_VERSION}) +set(ENV{PY_MAJOR_VERSION} ${PY_MAJOR_VERSION}) +set(ENV{PY_MINOR_VERSION} ${PY_MINOR_VERSION}) +set(ENV{PY_MICRO_VERSION} ${PY_MICRO_VERSION}) # Cythonize the .pyx file +set(DDUP_CPP_SRC ${CMAKE_CURRENT_BINARY_DIR}/_ddup.cpp) add_custom_command( - OUTPUT ${CMAKE_CURRENT_LIST_DIR}/_ddup.cpp - COMMAND ${PYTHON_EXECUTABLE} -m cython ${CMAKE_CURRENT_LIST_DIR}/_ddup.pyx -o ${CMAKE_CURRENT_LIST_DIR}/_ddup.cpp + OUTPUT ${DDUP_CPP_SRC} + COMMAND ${PYTHON_EXECUTABLE} -m cython ${CMAKE_CURRENT_LIST_DIR}/_ddup.pyx -o ${DDUP_CPP_SRC} DEPENDS ${CMAKE_CURRENT_LIST_DIR}/_ddup.pyx ) # Specify the target C-extension that we want to build add_library(${EXTENSION_NAME} SHARED - _ddup.cpp + ${DDUP_CPP_SRC} +) + +# We can't add common Profiling configuration because cython generates messy code, so we just setup some +# basic flags and features +target_compile_options(${EXTENSION_NAME} PRIVATE + "$<$:-Og;-ggdb3>" + "$<$:-Os>" + -ffunction-sections -fno-semantic-interposition +) +target_link_options(${EXTENSION_NAME} PRIVATE + "$<$:-s>" + -Wl,--as-needed -Wl,-Bsymbolic-functions -Wl,--gc-sections ) +set_property(TARGET ${EXTENSION_NAME} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) -target_compile_features(dd_wrapper PUBLIC cxx_std_17) +target_compile_features(${EXTENSION_NAME} PUBLIC cxx_std_17) # cmake may mutate the name of the library (e.g., lib- and -.so for dynamic libraries). # This suppresses that behavior, which is required to ensure all paths can be inferred @@ -52,11 +67,9 @@ set_target_properties(${EXTENSION_NAME} PROPERTIES SUFFIX "") # RPATH is needed for sofile discovery at runtime, since Python packages are not # installed in the system path. This is typical. -set_target_properties(${EXTENSION_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN") - +set_target_properties(${EXTENSION_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN/..") target_include_directories(${EXTENSION_NAME} PRIVATE - include - dd_wrapper/include + ../dd_wrapper/include ${Datadog_INCLUDE_DIRS} ${Python3_INCLUDE_DIRS} ) @@ -69,9 +82,10 @@ target_link_libraries(${EXTENSION_NAME} PRIVATE set_target_properties(${EXTENSION_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) # Set the output directory for the built library -set_target_properties(${EXTENSION_NAME} - PROPERTIES - LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}" -) - -install(TARGETS ${EXTENSION_NAME} DESTINATION ${LIB_INSTALL_DIR}) +if (LIB_INSTALL_DIR) + install(TARGETS ${EXTENSION_NAME} + LIBRARY DESTINATION ${LIB_INSTALL_DIR} + ARCHIVE DESTINATION ${LIB_INSTALL_DIR} + RUNTIME DESTINATION ${LIB_INSTALL_DIR} + ) +endif() diff --git a/ddtrace/internal/datadog/profiling/ddup.py b/ddtrace/internal/datadog/profiling/ddup/__init__.py similarity index 92% rename from ddtrace/internal/datadog/profiling/ddup.py rename to ddtrace/internal/datadog/profiling/ddup/__init__.py index 8f5ad4fe0b0..32bd273c5a4 100644 --- a/ddtrace/internal/datadog/profiling/ddup.py +++ b/ddtrace/internal/datadog/profiling/ddup/__init__.py @@ -3,11 +3,19 @@ try: from ._ddup import * # noqa: F403, F401 -except Exception: + + is_available = True + +except Exception as e: from typing import Dict # noqa:F401 from typing import Optional # noqa:F401 - from ddtrace._trace.span import Span # noqa:F401 + from ddtrace.internal.logger import get_logger + + LOG = get_logger(__name__) + LOG.debug("Failed to import _ddup: %s", e) + + is_available = False # Decorator for not-implemented def not_implemented(func): diff --git a/ddtrace/internal/datadog/profiling/_ddup.pyi b/ddtrace/internal/datadog/profiling/ddup/_ddup.pyi similarity index 100% rename from ddtrace/internal/datadog/profiling/_ddup.pyi rename to ddtrace/internal/datadog/profiling/ddup/_ddup.pyi diff --git a/ddtrace/internal/datadog/profiling/_ddup.pyx b/ddtrace/internal/datadog/profiling/ddup/_ddup.pyx similarity index 50% rename from ddtrace/internal/datadog/profiling/_ddup.pyx rename to ddtrace/internal/datadog/profiling/ddup/_ddup.pyx index 20ad71d91e7..94521a888e4 100644 --- a/ddtrace/internal/datadog/profiling/_ddup.pyx +++ b/ddtrace/internal/datadog/profiling/ddup/_ddup.pyx @@ -1,3 +1,6 @@ +# distutils: language = c++ +# cython: language_level=3 + import platform import typing from typing import Optional @@ -13,16 +16,16 @@ from .utils import sanitize_string IF UNAME_SYSNAME == "Linux": - cdef extern from "types.hpp": - ctypedef enum ProfileType "ProfileType": - CPU "ProfileType::CPU" - Wall "ProfileType::Wall" - Exception "ProfileType::Exception" - LockAcquire "ProfileType::LockAcquire" - LockRelease "ProfileType::LockRelease" - Allocation "ProfileType::Allocation" - Heap "ProfileType::Heap" - All "ProfileType::All" + def ensure_binary_or_empty(s) -> bytes: + try: + return ensure_binary(s) + except Exception: + pass + return b"" + + cdef extern from "" namespace "std" nogil: + cdef cppclass string_view: + string_view(const char* s, size_t count) cdef extern from "sample.hpp" namespace "Datadog": ctypedef struct Sample: @@ -31,16 +34,16 @@ IF UNAME_SYSNAME == "Linux": cdef extern from "interface.hpp": ctypedef signed int int64_t ctypedef unsigned int uint64_t - void ddup_config_env(const char *env) - void ddup_config_service(const char *service) - void ddup_config_version(const char *version) - void ddup_config_runtime(const char *runtime) - void ddup_config_runtime_version(const char *runtime_version) - void ddup_config_profiler_version(const char *profiler_version) - void ddup_config_url(const char *url) + void ddup_config_env(string_view env) + void ddup_config_service(string_view service) + void ddup_config_version(string_view version) + void ddup_config_runtime(string_view runtime) + void ddup_config_runtime_version(string_view runtime_version) + void ddup_config_profiler_version(string_view profiler_version) + void ddup_config_url(string_view url) void ddup_config_max_nframes(int max_nframes) - void ddup_config_user_tag(const char *key, const char *val) + void ddup_config_user_tag(string_view key, string_view val) void ddup_config_sample_type(unsigned int type) void ddup_init() @@ -52,22 +55,47 @@ IF UNAME_SYSNAME == "Linux": void ddup_push_release(Sample *sample, int64_t release_time, int64_t count) void ddup_push_alloc(Sample *sample, uint64_t size, uint64_t count) void ddup_push_heap(Sample *sample, uint64_t size) - void ddup_push_lock_name(Sample *sample, const char *lock_name) - void ddup_push_threadinfo(Sample *sample, int64_t thread_id, int64_t thread_native_id, const char *thread_name) + void ddup_push_lock_name(Sample *sample, string_view lock_name) + void ddup_push_threadinfo(Sample *sample, int64_t thread_id, int64_t thread_native_id, string_view thread_name) void ddup_push_task_id(Sample *sample, int64_t task_id) - void ddup_push_task_name(Sample *sample, const char *task_name) + void ddup_push_task_name(Sample *sample, string_view task_name) void ddup_push_span_id(Sample *sample, uint64_t span_id) void ddup_push_local_root_span_id(Sample *sample, uint64_t local_root_span_id) - void ddup_push_trace_type(Sample *sample, const char *trace_type) - void ddup_push_trace_resource_container(Sample *sample, const char *trace_resource_container) - void ddup_push_exceptioninfo(Sample *sample, const char *exception_type, int64_t count) - void ddup_push_class_name(Sample *sample, const char *class_name) - void ddup_push_frame(Sample *sample, const char *_name, const char *_filename, uint64_t address, int64_t line) + void ddup_push_trace_type(Sample *sample, string_view trace_type) + void ddup_push_trace_resource_container(Sample *sample, string_view trace_resource_container) + void ddup_push_exceptioninfo(Sample *sample, string_view exception_type, int64_t count) + void ddup_push_class_name(Sample *sample, string_view class_name) + void ddup_push_frame(Sample *sample, string_view _name, string_view _filename, uint64_t address, int64_t line) void ddup_flush_sample(Sample *sample) void ddup_drop_sample(Sample *sample) - void ddup_set_runtime_id(const char *_id, size_t sz) + void ddup_set_runtime_id(string_view _id) bint ddup_upload() nogil + # Create wrappers for cython + cdef call_ddup_config_service(bytes service): + ddup_config_service(string_view(service, len(service))) + + cdef call_ddup_config_env(bytes env): + ddup_config_env(string_view(env, len(env))) + + cdef call_ddup_config_version(bytes version): + ddup_config_version(string_view(version, len(version))) + + cdef call_ddup_config_url(bytes url): + ddup_config_url(string_view(url, len(url))) + + cdef call_ddup_config_runtime(bytes runtime): + ddup_config_runtime(string_view(runtime, len(runtime))) + + cdef call_ddup_config_runtime_version(bytes runtime_version): + ddup_config_runtime_version(string_view(runtime_version, len(runtime_version))) + + cdef call_ddup_config_profiler_version(bytes profiler_version): + ddup_config_profiler_version(string_view(profiler_version, len(profiler_version))) + + cdef call_ddup_config_user_tag(bytes key, bytes val): + ddup_config_user_tag(string_view(key, len(key)), string_view(val, len(val))) + def init( service: Optional[str], env: Optional[str], @@ -78,31 +106,31 @@ IF UNAME_SYSNAME == "Linux": # Try to provide a ddtrace-specific default service if one is not given service = service or DEFAULT_SERVICE_NAME - ddup_config_service(ensure_binary(service)) + call_ddup_config_service(ensure_binary_or_empty(service)) # If otherwise no values are provided, the uploader will omit the fields # and they will be auto-populated in the backend if env: - ddup_config_env(ensure_binary(env)) + call_ddup_config_env(ensure_binary_or_empty(env)) if version: - ddup_config_version(ensure_binary(version)) + call_ddup_config_version(ensure_binary_or_empty(version)) if url: - ddup_config_url(ensure_binary(url)) + call_ddup_config_url(ensure_binary_or_empty(url)) # Inherited - ddup_config_runtime(ensure_binary(platform.python_implementation())) - ddup_config_runtime_version(ensure_binary(platform.python_version())) - ddup_config_profiler_version(ensure_binary(ddtrace.__version__)) - ddup_config_max_nframes(max_nframes) + call_ddup_config_runtime(ensure_binary_or_empty(platform.python_implementation())) + call_ddup_config_runtime_version(ensure_binary_or_empty(platform.python_version())) + call_ddup_config_profiler_version(ensure_binary_or_empty(ddtrace.__version__)) + ddup_config_max_nframes(max_nframes) # call_* only needed for string-type args if tags is not None: for key, val in tags.items(): if key and val: - ddup_config_user_tag(ensure_binary(key), ensure_binary(val)) + call_ddup_config_user_tag(ensure_binary_or_empty(key), ensure_binary_or_empty(val)) ddup_init() def upload() -> None: - runtime_id = ensure_binary(runtime.get_runtime_id()) - ddup_set_runtime_id(runtime_id, len(runtime_id)) + runtime_id = ensure_binary_or_empty(runtime.get_runtime_id()) + ddup_set_runtime_id(string_view(runtime_id, len(runtime_id))) with nogil: ddup_upload() @@ -143,20 +171,33 @@ IF UNAME_SYSNAME == "Linux": def push_lock_name(self, lock_name: str) -> None: if self.ptr is not NULL: - ddup_push_lock_name(self.ptr, ensure_binary(lock_name)) + lock_name_bytes = ensure_binary_or_empty(lock_name) + ddup_push_lock_name(self.ptr, string_view(lock_name_bytes, len(lock_name_bytes))) def push_frame(self, name: str, filename: str, int address, int line) -> None: if self.ptr is not NULL: - name = sanitize_string(name) - filename = sanitize_string(filename) - ddup_push_frame(self.ptr, ensure_binary(name), ensure_binary(filename), address, line) + # Customers report `name` and `filename` may be unexpected objects, so sanitize. + name_bytes = ensure_binary_or_empty(sanitize_string(name)) + filename_bytes = ensure_binary_or_empty(sanitize_string(filename)) + ddup_push_frame( + self.ptr, + string_view(name_bytes, len(name_bytes)), + string_view(filename_bytes, len(filename_bytes)), + address, + line + ) def push_threadinfo(self, thread_id: int, thread_native_id: int, thread_name: str) -> None: if self.ptr is not NULL: thread_id = thread_id if thread_id is not None else 0 thread_native_id = thread_native_id if thread_native_id is not None else 0 - thread_name = thread_name if thread_name is not None else "" - ddup_push_threadinfo(self.ptr, thread_id, thread_native_id, ensure_binary(thread_name)) + thread_name_bytes = ensure_binary_or_empty(thread_name) + ddup_push_threadinfo( + self.ptr, + thread_id, + thread_native_id, + string_view(thread_name_bytes, len(thread_name_bytes)) + ) def push_task_id(self, task_id: int) -> None: if self.ptr is not NULL: @@ -165,18 +206,19 @@ IF UNAME_SYSNAME == "Linux": def push_task_name(self, task_name: str) -> None: if self.ptr is not NULL: if task_name: - ddup_push_task_name(self.ptr, ensure_binary(task_name)) + task_name_bytes = ensure_binary_or_empty(task_name) + ddup_push_task_name(self.ptr, string_view(task_name_bytes, len(task_name_bytes))) def push_exceptioninfo(self, exc_type: type, count: int) -> None: if self.ptr is not NULL: if exc_type is not None: - exc_name = exc_type.__module__ + "." + exc_type.__name__ - ddup_push_exceptioninfo(self.ptr, ensure_binary(exc_name), count) + exc_name = ensure_binary_or_empty(exc_type.__module__ + "." + exc_type.__name__) + ddup_push_exceptioninfo(self.ptr, string_view(exc_name, len(exc_name)), count) def push_class_name(self, class_name: str) -> None: if self.ptr is not NULL: - class_name = class_name if class_name is not None else "" - ddup_push_class_name(self.ptr, ensure_binary(class_name)) + class_name_bytes = ensure_binary_or_empty(class_name) + ddup_push_class_name(self.ptr, string_view(class_name_bytes, len(class_name_bytes))) def push_span(self, span: typing.Optional[Span], endpoint_collection_enabled: bool) -> None: if self.ptr is NULL: @@ -190,9 +232,14 @@ IF UNAME_SYSNAME == "Linux": if span._local_root.span_id: ddup_push_local_root_span_id(self.ptr, span._local_root.span_id) if span._local_root.span_type: - ddup_push_trace_type(self.ptr, span._local_root.span_type) + span_type_bytes = ensure_binary_or_empty(span._local_root.span_type) + ddup_push_trace_type(self.ptr, string_view(span_type_bytes, len(span_type_bytes))) if endpoint_collection_enabled: - ddup_push_trace_resource_container(self.ptr, span._local_root._resource) + root_service_bytes = ensure_binary_or_empty(span._local_root.service) + ddup_push_trace_resource_container( + self.ptr, + string_view(root_service_bytes, len(root_service_bytes)) + ) def flush_sample(self) -> None: # Flushing the sample consumes it. The user will no longer be able to use diff --git a/ddtrace/internal/datadog/profiling/utils.py b/ddtrace/internal/datadog/profiling/ddup/utils.py similarity index 100% rename from ddtrace/internal/datadog/profiling/utils.py rename to ddtrace/internal/datadog/profiling/ddup/utils.py diff --git a/ddtrace/internal/datadog/profiling/dd_wrapper/runner.sh b/ddtrace/internal/datadog/profiling/runner.sh similarity index 96% rename from ddtrace/internal/datadog/profiling/dd_wrapper/runner.sh rename to ddtrace/internal/datadog/profiling/runner.sh index b5636a05acd..e2afdec2443 100755 --- a/ddtrace/internal/datadog/profiling/dd_wrapper/runner.sh +++ b/ddtrace/internal/datadog/profiling/runner.sh @@ -1,4 +1,6 @@ #!/bin/bash +set -euox pipefail + ./setup_custom.sh -C || { echo "Failed cppcheck"; exit 1; } ./setup_custom.sh -s || { echo "Failed safety tests"; exit 1; } ./setup_custom.sh -f || { echo "Failed -fanalyzer"; exit 1; } diff --git a/ddtrace/internal/datadog/profiling/setup_custom.sh b/ddtrace/internal/datadog/profiling/setup_custom.sh new file mode 100755 index 00000000000..0c41aed8517 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/setup_custom.sh @@ -0,0 +1,284 @@ +#!/bin/bash +set -euox pipefail + +### Useful globals +MY_DIR=$(dirname $(realpath $0)) +MY_NAME="$0" +BUILD_DIR="build" + +### Compiler discovery +# Initialize variables to store the highest versions +highest_gcc="" +highest_gxx="" +highest_clang="" +highest_clangxx="" + +# Function to find the highest version of compilers +find_highest_compiler_version() { + local base_name=$1 + local highest_var_name=$2 + local highest_version=0 + + # Try to find the latest versions of both GCC and Clang + # The range 5-20 is arbitrary (GCC 5 was released in 2015, and 20 is just a + # a high number since Clang is on version 17) + # TODO provide a passthrough in CI where we want to use a pinned or specified + # version. Not a big deal since this script is currently only used for local + # development. + for version in {20..5}; do + if command -v "${base_name}-${version}" &> /dev/null; then + if [ $version -gt $highest_version ]; then + highest_version=$version + eval "$highest_var_name=${base_name}-${version}" + fi + fi + done + + # Check for the base version if no numbered version was found + if [ $highest_version -eq 0 ] && command -v "$base_name" &> /dev/null; then + eval "$highest_var_name=$base_name" + fi +} + +# Find highest versions for each compiler +find_highest_compiler_version gcc highest_gcc +find_highest_compiler_version g++ highest_gxx +find_highest_compiler_version clang highest_clang +find_highest_compiler_version clang++ highest_clangxx + +### Build setup +# Targets to target dirs +declare -A target_dirs +target_dirs["ddup"]="ddup" +target_dirs["stack_v2"]="stack_v2" +target_dirs["dd_wrapper"]="dd_wrapper" + +# Compiler options +declare -A compiler_args +compiler_args["safety"]="-DSANITIZE_OPTIONS=address,leak,undefined" +compiler_args["thread"]="-DSANITIZE_OPTIONS=thread" +compiler_args["numerical"]="-DSANITIZE_OPTIONS=integer,nullability,signed-integer-overflow,bounds,float-divide-by-zero" +compiler_args["dataflow"]="-DSANITIZE_OPTIONS=dataflow" +compiler_args["memory"]="-DSANITIZE_OPTIONS=memory" +compiler_args["fanalyzer"]="-DDO_FANALYZE=ON" +compiler_args["cppcheck"]="-DDO_CPPCHECK=ON" + +# Initial cmake args +cmake_args=( + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + -DCMAKE_VERBOSE_MAKEFILE=ON + -DLIB_INSTALL_DIR=$(realpath $MY_DIR)/lib + -DPython3_INCLUDE_DIRS=$(python3 -c "from distutils.sysconfig import get_python_inc; print(get_python_inc())") + -DPython3_EXECUTABLE=$(which python3) + -DPython3_LIBRARY=$(python3 -c "import distutils.sysconfig as sysconfig; print(sysconfig.get_config_var('LIBDIR'))") +) + +# Initial build targets; no matter what, dd_wrapper is the base dependency, so it's always built +targets=("dd_wrapper") + +# Helper functions for finding the compiler(s) +set_clang() { + export CC=$highest_clang + export CXX=$highest_clangxx + cmake_args+=( + -DCMAKE_C_COMPILER=$CC + -DCMAKE_CXX_COMPILER=$CXX + ) +} + +set_gcc() { + export CC=$highest_gcc + export CXX=$highest_gxx + cmake_args+=( + -DCMAKE_C_COMPILER=$CC + -DCMAKE_CXX_COMPILER=$CXX + ) +} + +### Build runners +run_cmake() { + target=$1 + dir=${target_dirs[$target]} + if [ -z "$dir" ]; then + echo "No directory specified for cmake" + exit 1 + fi + + # Enter the directory and create the build directory + pushd $dir || { echo "Failed to change to $dir"; exit 1; } + mkdir -p $BUILD_DIR && cd $BUILD_DIR || { echo "Failed to create build directory for $dir"; exit 1; } + + # Run cmake + cmake "${cmake_args[@]}" .. || { echo "cmake failed"; exit 1; } + cmake --build . || { echo "build failed"; exit 1; } + if [[ " ${cmake_args[*]} " =~ " -DDO_CPPCHECK=ON " ]]; then + echo "--------------------------------------------------------------------- Running CPPCHECK" + make cppcheck || { echo "cppcheck failed"; exit 1; } + fi + if [[ " ${cmake_args[*]} " =~ " -DBUILD_TESTING=ON " ]]; then + echo "--------------------------------------------------------------------- Running Tests" +# make test || { echo "tests failed"; exit 1; } + ctest --output-on-failure || { echo "tests failed!"; exit 1; } + fi + + # OK, the build or whatever went fine I guess. + popd +} + +### Print help +print_help() { + echo "Usage: ${MY_NAME} [options] [build_mode] [target]" + echo "Options (one of)" + echo " -h, --help Show this help message and exit" + echo " -s, --safety Clang + " ${compile_args["safety"]} + echo " -t, --thread Clang + " ${compile_args["thread"]} + echo " -n, --numerical Clang + " ${compile_args["numerical"]} + echo " -d, --dataflow Clang + " ${compile_args["dataflow"]} + echo " -m --memory Clang + " ${compile_args["memory"]} + echo " -C --cppcheck Clang + " ${compile_args["cppcheck"]} + echo " -f, --fanalyze GCC + " ${compile_args["fanalyzer"]} + echo " -c, --clang Clang (alone)" + echo " -g, --gcc GCC (alone)" + echo " -- Don't do anything special" + echo "" + echo "Build Modes:" + echo " Debug (default)" + echo " Release" + echo " RelWithDebInfo" + echo "" + echo "(any possible others, depending on what cmake supports for" + echo "BUILD_TYPE out of the box)" + echo "" + echo "Targets:" + echo " all" + echo " all_test (default)" + echo " dd_wrapper" + echo " dd_wrapper_test" + echo " stack_v2 (also builds dd_wrapper)" + echo " stack_v2_test (also builds dd_wrapper_test)" + echo " ddup (also builds dd_wrapper)" + echo " ddup_test (also builds dd_wrapper_test)" +} + +print_cmake_args() { + echo "CMake Args: ${cmake_args[*]}" + echo "Targets: ${targets[*]}" +} + +### Check input +# Check the first slot, options +add_compiler_args() { + case "$1" in + -h|--help) + print_help + exit 0 + ;; + -s|--safety) + cmake_args+=(${compiler_args["safety"]}) + set_clang + ;; + -t|--thread) + cmake_args+=(${compiler_args["thread"]}) + set_clang + ;; + -n|--numerical) + cmake_args+=(${compiler_args["numerical"]}) + set_clang + ;; + -d|--dataflow) + cmake_args+=(${compiler_args["dataflow"]}) + set_clang + ;; + -m|--memory) + cmake_args+=(${compiler_args["memory"]}) + set_clang + ;; + -C|--cppcheck) + cmake_args+=(${compiler_args["cppcheck"]}) + set_clang + ;; + -f|--fanalyze) + cmake_args+=(${compiler_args["fanalyzer"]}) + set_gcc + ;; + -c|--clang) + set_clang + ;; + -g|--gcc) + set_gcc + ;; + --) + set_gcc # Default to GCC, since this is what will happen in the official build + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +} + +# Check the second slot, build mode +add_build_mode() { + case "$1" in + Debug|Release|RelWithDebInfo) + cmake_args+=(-DCMAKE_BUILD_TYPE=$1) + ;; + ""|--) + cmake_args+=(-DCMAKE_BUILD_TYPE=Debug) + ;; + *) + echo "Unknown build mode: $1" + exit 1 + ;; + esac +} + +# Check the third slot, target +add_target() { + arg=${1:-"all_test"} + if [[ "${arg}" =~ _test$ ]]; then + cmake_args+=(-DBUILD_TESTING=ON) + fi + target=${arg%_test} + + case "${target}" in + all|--) + targets+=("stack_v2") + targets+=("ddup") + ;; + dd_wrapper) + # We always build dd_wrapper, so no need to add it to the list + ;; + stack_v2) + targets+=("stack_v2") + ;; + ddup) + targets+=("ddup") + ;; + *) + echo "Unknown target: $1" + exit 1 + ;; + esac +} + + +### ENTRYPOINT +# Check for basic input validity +if [ $# -eq 0 ]; then + echo "No arguments given. At least one is needed, otherwise I'd (a m b i g u o u s l y) do a lot of work!" + print_help + exit 1 +fi + +add_compiler_args "$1" +add_build_mode "$2" +add_target "$3" + +# Print cmake args +print_cmake_args + +# Run cmake +for target in "${targets[@]}"; do + run_cmake $target +done diff --git a/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt new file mode 100644 index 00000000000..2a021fb49b8 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/CMakeLists.txt @@ -0,0 +1,106 @@ +cmake_minimum_required(VERSION 3.19) + +# The exact name of this extension determines aspects of the installation and build paths, +# which need to be kept in sync with setup.py. Accordingly, take the name passed in by the +# caller, defaulting to "stack_v2" if needed. +set(EXTENSION_NAME "_stack_v2" CACHE STRING "Name of the extension") +project(${EXTENSION_NAME}) +message(STATUS "Building extension: ${EXTENSION_NAME}") + +# Custom cmake modules are in the parent directory +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../cmake") + +# Includes +include(FetchContent) +include(ExternalProject) +include(AnalysisFunc) +include(FindCppcheck) + +# dd_wrapper should be its own project at one point, if the current design is kept, +# but whether or not we keep that design is unknown. Hack it for now. +add_subdirectory(../dd_wrapper ${CMAKE_CURRENT_BINARY_DIR}/../dd_wrapper_build) + +# Make sure we have necessary Python variables +if (NOT Python3_INCLUDE_DIRS) + message(FATAL_ERROR "Python3_INCLUDE_DIRS not found") +endif() + +# Add echion +FetchContent_Declare( + echion + GIT_REPOSITORY "https://github.com/sanchda/echion.git" + GIT_TAG "sanchda/more_rendering" +) +FetchContent_GetProperties(echion) +if(NOT echion_POPULATED) + FetchContent_Populate(echion) +endif() + +# Specify the target C-extension that we want to build +add_library(${EXTENSION_NAME} SHARED + src/sampler.cpp + src/stack_renderer.cpp + src/stack_v2.cpp +) + +# Add common config +add_ddup_config(${EXTENSION_NAME}) +add_cppcheck_target(${EXTENSION_NAME} + DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + INCLUDE ${CMAKE_CURRENT_SOURCE_DIR}/include + SRC ${CMAKE_CURRENT_SOURCE_DIR}/src +) + +# This project is build with C++17, even though the underlying repo adheres to the manylinux 2014 standard. +# This isn't currently a problem, but if it becomes one, we may have to structure the library differently. +target_compile_features(${EXTENSION_NAME} PUBLIC cxx_std_17) + +# Never build with native unwinding, since this is not currently used +target_compile_definitions(${EXTENSION_NAME} PRIVATE UNWIND_NATIVE_DISABLE) + +# Includes; echion and python are marked "system" to suppress warnings, +# but note in MSVC we'll have to #pragma warning(push, 0 then pop for the same effect. +target_include_directories(${EXTENSION_NAME} PRIVATE + .. # include dd_wrapper from the root in order to make its paths transparent in the code + include +) +target_include_directories(${EXTENSION_NAME} SYSTEM PRIVATE + ${echion_SOURCE_DIR} + ${Python3_INCLUDE_DIRS} + include/vendored + include/util +) + +# Echion sources need to be given the current platform +if(WIN32) + target_compile_definitions(${EXTENSION_NAME} PRIVATE PL_WINDOWS) +elseif(APPLE) + target_compile_definitions(${EXTENSION_NAME} PRIVATE PL_DARWIN) +elseif(UNIX) + target_compile_definitions(${EXTENSION_NAME} PRIVATE PL_LINUX) +endif() + +# cmake may mutate the name of the library (e.g., lib- and -.so for dynamic libraries). +# This suppresses that behavior, which is required to ensure all paths can be inferred +# correctly by setup.py. +set_target_properties(${EXTENSION_NAME} PROPERTIES PREFIX "") +set_target_properties(${EXTENSION_NAME} PROPERTIES SUFFIX "") + +# RPATH is needed for sofile discovery at runtime, since Python packages are not +# installed in the system path. This is typical. +set_target_properties(${EXTENSION_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN/..") +target_link_libraries(${EXTENSION_NAME} PRIVATE + dd_wrapper +) + +# Extensions are built as dynamic libraries, so PIC is required. +set_target_properties(${EXTENSION_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) + +# Set the output directory for the built library +if (LIB_INSTALL_DIR) + install(TARGETS ${EXTENSION_NAME} + LIBRARY DESTINATION ${LIB_INSTALL_DIR} + ARCHIVE DESTINATION ${LIB_INSTALL_DIR} + RUNTIME DESTINATION ${LIB_INSTALL_DIR} + ) +endif() diff --git a/ddtrace/internal/datadog/profiling/stack_v2/__init__.py b/ddtrace/internal/datadog/profiling/stack_v2/__init__.py new file mode 100644 index 00000000000..040035082b3 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/__init__.py @@ -0,0 +1,29 @@ +try: + from ._stack_v2 import * # noqa: F401, F403 + + is_available = True + +except Exception as e: + from ddtrace.internal.logger import get_logger + + LOG = get_logger(__name__) + LOG.debug("Failed to import _stack_v2: %s", e) + + is_available = False + + # Decorator for not-implemented + def not_implemented(func): + def wrapper(*args, **kwargs): + raise NotImplementedError("{} is not implemented on this platform".format(func.__name__)) + + @not_implemented + def start(*args, **kwargs): + pass + + @not_implemented + def stop(*args, **kwargs): + pass + + @not_implemented + def set_interval(*args, **kwargs): + pass diff --git a/ddtrace/internal/datadog/profiling/stack_v2/include/constants.hpp b/ddtrace/internal/datadog/profiling/stack_v2/include/constants.hpp new file mode 100644 index 00000000000..1dd51a8e87d --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/include/constants.hpp @@ -0,0 +1,8 @@ +#include "dd_wrapper/include/constants.hpp" + +// Default sampling frequency in microseconds. This will almost certainly be overridden by dynamic sampling. +constexpr unsigned int g_default_sampling_period_us = 10000; // 100 Hz +constexpr double g_default_sampling_period_s = g_default_sampling_period_us / 1e6; + +// Echion maintains a cache of frames--the size of this cache is specified up-front. +constexpr unsigned int g_default_echion_frame_cache_size = 1024; diff --git a/ddtrace/internal/datadog/profiling/stack_v2/include/python_headers.hpp b/ddtrace/internal/datadog/profiling/stack_v2/include/python_headers.hpp new file mode 100644 index 00000000000..d7d6398c3b4 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/include/python_headers.hpp @@ -0,0 +1,7 @@ +#define PY_SSIZE_T_CLEAN +#include + +#if PY_VERSION_HEX >= 0x030c0000 +// https://github.com/python/cpython/issues/108216#issuecomment-1696565797 +#undef _PyGC_FINALIZED +#endif diff --git a/ddtrace/internal/datadog/profiling/stack_v2/include/sampler.hpp b/ddtrace/internal/datadog/profiling/stack_v2/include/sampler.hpp new file mode 100644 index 00000000000..9ebb76c4085 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/include/sampler.hpp @@ -0,0 +1,50 @@ +#pragma once +#include "constants.hpp" +#include "stack_renderer.hpp" + +#include + +namespace Datadog { + +class Sampler +{ + // This class manages the initialization of echion as well as the sampling thread. + // The underlying echion instance it manages keeps much of its state globally, so this class is a singleton in order + // to keep it aligned with the echion state. + private: + std::shared_ptr renderer_ptr; + + // The sampling interval is atomic because it needs to be safely propagated to the sampling thread + std::atomic sample_interval_us{ g_default_sampling_period_us }; + + // This is not a running total of the number of launched threads; it is a sequence for the + // transactions upon the sampling threads (usually starts + stops). This allows threads to be + // stopped or started in a straightforward manner without finer-grained control (locks) + std::atomic thread_seq_num{ 0 }; + + // Parameters + uint64_t echion_frame_cache_size = g_default_echion_frame_cache_size; + + // Helper function; implementation of the echion sampling thread + void sampling_thread(const uint64_t seq_num); + + // This is a singleton, so no public constructor + Sampler(); + + // One-time setup of echion + void one_time_setup(); + + public: + // Singleton instance + static Sampler& get(); + void start(); + void stop(); + + // The Python side dynamically adjusts the sampling rate based on overhead, so we need to be able to update our own + // intervals accordingly. Rather than a preemptive measure, we assume the rate is ~fairly stable and just update + // the next rate with the latest interval. This is not perfect because the adjustment is based on self-time, and + // we're not currently accounting for the echion self-time. + void set_interval(double new_interval); +}; + +} // namespace Datadog diff --git a/ddtrace/internal/datadog/profiling/stack_v2/include/stack_renderer.hpp b/ddtrace/internal/datadog/profiling/stack_v2/include/stack_renderer.hpp new file mode 100644 index 00000000000..57139f5e481 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/include/stack_renderer.hpp @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "python_headers.hpp" + +#include "dd_wrapper/include/interface.hpp" +#include "echion/render.h" + +namespace Datadog { + +class StackRenderer : public RendererInterface +{ + Sample* sample = nullptr; + + virtual void render_message(std::string_view msg) override; + virtual void render_thread_begin(PyThreadState* tstate, + std::string_view name, + microsecond_t wall_time_us, + uintptr_t thread_id, + unsigned long native_id) override; + + virtual void render_stack_begin() override; + virtual void render_python_frame(std::string_view name, std::string_view file, uint64_t line) override; + virtual void render_native_frame(std::string_view name, std::string_view file, uint64_t line) override; + virtual void render_cpu_time(microsecond_t cpu_time_us) override; + virtual void render_stack_end() override; + virtual bool is_valid() override; +}; + +} // namespace Datadog diff --git a/ddtrace/internal/datadog/profiling/stack_v2/include/util/cast_to_pyfunc.hpp b/ddtrace/internal/datadog/profiling/stack_v2/include/util/cast_to_pyfunc.hpp new file mode 100644 index 00000000000..1ba9ef131a1 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/include/util/cast_to_pyfunc.hpp @@ -0,0 +1,11 @@ +#pragma once +#include + +// Abiding by the contract required in the Python interface requires abandoning certain checks in one's main project. +// Rather than abandoning discipline, we can include this header in a way that bypasses these checks. +inline static PyCFunction +cast_to_pycfunction(PyObject* (*func)(PyObject*, PyObject*, PyObject*)) +{ + // Direct C-style cast + return (PyCFunction)(func); +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/include/vendored/utf8_validate.hpp b/ddtrace/internal/datadog/profiling/stack_v2/include/vendored/utf8_validate.hpp new file mode 100644 index 00000000000..1715b7ec5ff --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/include/vendored/utf8_validate.hpp @@ -0,0 +1,34 @@ +#pragma once + +// From https://gist.github.com/ichramm/3ffeaf7ba4f24853e9ecaf176da84566 +inline static bool +utf8_check_is_valid(const char* str, int len) +{ + int n; + for (int i = 0; i < len; ++i) { + unsigned char c = (unsigned char)str[i]; + // if (c==0x09 || c==0x0a || c==0x0d || (0x20 <= c && c <= 0x7e) ) n = 0; // is_printable_ascii + if (0x00 <= c && c <= 0x7f) { + n = 0; // 0bbbbbbb + } else if ((c & 0xE0) == 0xC0) { + n = 1; // 110bbbbb + } else if (c == 0xed && i < (len - 1) && ((unsigned char)str[i + 1] & 0xa0) == 0xa0) { + return false; // U+d800 to U+dfff + } else if ((c & 0xF0) == 0xE0) { + n = 2; // 1110bbbb + } else if ((c & 0xF8) == 0xF0) { + n = 3; // 11110bbb + //} else if (($c & 0xFC) == 0xF8) { n=4; // 111110bb //byte 5, unnecessary in 4 byte UTF-8 + //} else if (($c & 0xFE) == 0xFC) { n=5; // 1111110b //byte 6, unnecessary in 4 byte UTF-8 + } else { + return false; + } + + for (int j = 0; j < n && i < len; ++j) { // n bytes matching 10bbbbbb follow ? + if ((++i == len) || (((unsigned char)str[i] & 0xC0) != 0x80)) { + return false; + } + } + } + return true; +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/sampler.cpp b/ddtrace/internal/datadog/profiling/stack_v2/src/sampler.cpp new file mode 100644 index 00000000000..f32eeeb648e --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/sampler.cpp @@ -0,0 +1,88 @@ +#include "sampler.hpp" + +#include "echion/interp.h" +#include "echion/tasks.h" +#include "echion/threads.h" + +using namespace Datadog; + +void +Sampler::sampling_thread(const uint64_t seq_num) +{ + using namespace std::chrono; + auto sample_time_prev = steady_clock::now(); + + while (seq_num == thread_seq_num.load()) { + auto sample_time_now = steady_clock::now(); + auto wall_time_us = duration_cast(sample_time_now - sample_time_prev).count(); + sample_time_prev = sample_time_now; + + // Perform the sample + for_each_interp([&](PyInterpreterState* interp) -> void { + for_each_thread(interp, [&](PyThreadState* tstate, ThreadInfo& thread) { + thread.sample(interp->id, tstate, wall_time_us); + }); + }); + + // Before sleeping, check whether the user has called for this thread to die. + if (seq_num != thread_seq_num.load()) { + break; + } + + // Sleep for the remainder of the interval, get it atomically + // Generally speaking system "sleep" times will wait _at least_ as long as the specified time, so + // in actual fact the duration may be more than we indicated. This tends to be more true on busy + // systems. + std::this_thread::sleep_until(sample_time_now + microseconds(sample_interval_us.load())); + } +} + +void +Sampler::set_interval(double new_interval_s) +{ + microsecond_t new_interval_us = static_cast(new_interval_s * 1e6); + sample_interval_us.store(new_interval_us); +} + +Sampler::Sampler() + : renderer_ptr{ std::make_shared() } +{} + +Sampler& +Sampler::get() +{ + static Sampler instance; + return instance; +} + +void +Sampler::one_time_setup() +{ + _set_cpu(true); + init_frame_cache(echion_frame_cache_size); + _set_pid(getpid()); + + // Register our rendering callbacks with echion's Renderer singleton + Renderer::get().set_renderer(renderer_ptr); +} + +void +Sampler::start() +{ + static std::once_flag once; + std::call_once(once, [this]() { this->one_time_setup(); }); + + // Launch the sampling thread. + // Thread lifetime is bounded by the value of the sequence number. When it is changed from the value the thread was + // launched with, the thread will exit. + std::thread t(&Sampler::sampling_thread, this, ++thread_seq_num); + t.detach(); +} + +void +Sampler::stop() +{ + // Modifying the thread sequence number will cause the sampling thread to exit when it completes + // a sampling loop. Currently there is no mechanism to force stuck threads, should they get locked. + ++thread_seq_num; +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp new file mode 100644 index 00000000000..8c1e704f38d --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp @@ -0,0 +1,105 @@ +#include "stack_renderer.hpp" +#include "utf8_validate.hpp" + +using namespace Datadog; + +void +StackRenderer::render_message(std::string_view msg) +{ + // This function is part of the necessary API, but it is unused by the Datadog profiler for now. + (void)msg; +} + +void +StackRenderer::render_thread_begin(PyThreadState* tstate, + std::string_view name, + microsecond_t wall_time_us, + uintptr_t thread_id, + unsigned long native_id) +{ + (void)tstate; + static bool failed = false; + if (failed) { + return; + } + sample = ddup_start_sample(); + if (sample == nullptr) { + std::cerr << "Failed to create a sample. Stack v2 sampler will be disabled." << std::endl; + failed = true; + return; + } + + //#warning stack_v2 should use a C++ interface instead of re-converting intermediates + ddup_push_threadinfo(sample, static_cast(thread_id), static_cast(native_id), name); + ddup_push_walltime(sample, 1000 * wall_time_us, 1); +} + +void +StackRenderer::render_stack_begin() +{ + // This function is part of the necessary API, but it is unused by the Datadog profiler for now. +} + +void +StackRenderer::render_python_frame(std::string_view name, std::string_view file, uint64_t line) +{ + if (sample == nullptr) { + std::cerr << "Received a new frame without sample storage. Some profiling data has been lost." << std::endl; + return; + } + + // Normally, further utf-8 validation would be pointless here, but we may be reading data where the + // string pointer was valid, but the string is actually garbage data at the exact time of the read. + // This is rare, but blowing some cycles on early validation allows the sample to be retained by + // libdatadog, so we can evaluate the actual impact of this scenario in live scenarios. + static const std::string_view invalid = ""; + if (!utf8_check_is_valid(name.data(), name.size())) { + name = invalid; + } + if (!utf8_check_is_valid(file.data(), file.size())) { + file = invalid; + } + ddup_push_frame(sample, name, file, 0, line); +} + +void +StackRenderer::render_native_frame(std::string_view name, std::string_view file, uint64_t line) +{ + // This function is part of the necessary API, but it is unused by the Datadog profiler for now. + (void)name; + (void)file; + (void)line; +} + +void +StackRenderer::render_cpu_time(microsecond_t cpu_time_us) +{ + if (sample == nullptr) { + std::cerr << "Received a CPU time without sample storage. Some profiling data has been lost." << std::endl; + return; + } + + // ddup is configured to expect nanoseconds + ddup_push_cputime(sample, 1000 * cpu_time_us, 1); +} + +void +StackRenderer::render_stack_end() +{ + if (sample == nullptr) { + std::cerr << "Ending a stack without any context. Some profiling data has been lost." << std::endl; + return; + } + + ddup_flush_sample(sample); + ddup_drop_sample(sample); + sample = nullptr; +} + +bool +StackRenderer::is_valid() +{ + // In general, echion may need to check whether the extension has invalid state before calling into it, + // but in this case it doesn't matter + return true; +} diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_v2.cpp b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_v2.cpp new file mode 100644 index 00000000000..dd816836a17 --- /dev/null +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_v2.cpp @@ -0,0 +1,74 @@ +#include "cast_to_pyfunc.hpp" +#include "python_headers.hpp" +#include "sampler.hpp" + +#include + +using namespace Datadog; + +static PyObject* +_stack_v2_start(PyObject* self, PyObject* args, PyObject* kwargs) +{ + (void)self; + std::cout << "Starting the sampler" << std::endl; + static const char* const_kwlist[] = { "min_interval", NULL }; + static char** kwlist = const_cast(const_kwlist); + double min_interval_s = g_default_sampling_period_s; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|d", kwlist, &min_interval_s)) { + return NULL; // If an error occurs during argument parsing + } + + Sampler::get().set_interval(min_interval_s); + Sampler::get().start(); + + Py_INCREF(Py_None); + return Py_None; +} + +// Bypasses the old-style cast warning with an unchecked helper function +PyCFunction stack_v2_start = cast_to_pycfunction(_stack_v2_start); + +static PyObject* +stack_v2_stop(PyObject* self, PyObject* args) +{ + (void)self; + (void)args; + Sampler::get().stop(); + Py_RETURN_NONE +} + +static PyObject* +stack_v2_set_interval(PyObject* self, PyObject* args) +{ + // Assumes the interval is given in fractional seconds + (void)self; + double new_interval; + if (!PyArg_ParseTuple(args, "d", &new_interval)) { + return NULL; // If an error occurs during argument parsing + } + Sampler::get().set_interval(new_interval); + Py_RETURN_NONE; +} + +static PyMethodDef _stack_v2_methods[] = { + { "start", reinterpret_cast(stack_v2_start), METH_VARARGS | METH_KEYWORDS, "Start the sampler" }, + { "stop", stack_v2_stop, METH_VARARGS, "Stop the sampler" }, + { "set_interval", stack_v2_set_interval, METH_VARARGS, "Set the sampling interval" }, + { NULL, NULL, 0, NULL } +}; + +PyMODINIT_FUNC +PyInit__stack_v2(void) +{ + PyObject* m; + static struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, "_stack_v2", NULL, -1, _stack_v2_methods, NULL, NULL, NULL, NULL + }; + + m = PyModule_Create(&moduledef); + if (!m) + return NULL; + + return m; +} diff --git a/ddtrace/internal/telemetry/constants.py b/ddtrace/internal/telemetry/constants.py index 8fa2c41dd48..d8f86e0392b 100644 --- a/ddtrace/internal/telemetry/constants.py +++ b/ddtrace/internal/telemetry/constants.py @@ -62,7 +62,6 @@ TELEMETRY_PROFILING_MEMORY_ENABLED = "DD_PROFILING_MEMORY_ENABLED" TELEMETRY_PROFILING_HEAP_ENABLED = "DD_PROFILING_HEAP_ENABLED" TELEMETRY_PROFILING_LOCK_ENABLED = "DD_PROFILING_LOCK_ENABLED" -TELEMETRY_PROFILING_EXPORT_PY_ENABLED = "DD_PROFILING_EXPORT_PY_ENABLED" TELEMETRY_PROFILING_EXPORT_LIBDD_ENABLED = "DD_PROFILING_EXPORT_LIBDD_ENABLED" TELEMETRY_PROFILING_CAPTURE_PCT = "DD_PROFILING_CAPTURE_PCT" TELEMETRY_PROFILING_UPLOAD_INTERVAL = "DD_PROFILING_UPLOAD_INTERVAL" diff --git a/ddtrace/internal/telemetry/writer.py b/ddtrace/internal/telemetry/writer.py index f8ef9ff0830..2582c08e83c 100644 --- a/ddtrace/internal/telemetry/writer.py +++ b/ddtrace/internal/telemetry/writer.py @@ -59,7 +59,6 @@ from .constants import TELEMETRY_PRIORITY_SAMPLING from .constants import TELEMETRY_PROFILING_CAPTURE_PCT from .constants import TELEMETRY_PROFILING_EXPORT_LIBDD_ENABLED -from .constants import TELEMETRY_PROFILING_EXPORT_PY_ENABLED from .constants import TELEMETRY_PROFILING_HEAP_ENABLED from .constants import TELEMETRY_PROFILING_LOCK_ENABLED from .constants import TELEMETRY_PROFILING_MAX_FRAMES @@ -449,7 +448,6 @@ def _app_started_event(self, register_app_shutdown=True): (TELEMETRY_PROFILING_MEMORY_ENABLED, prof_config.memory.enabled, "unknown"), (TELEMETRY_PROFILING_HEAP_ENABLED, prof_config.heap.sample_size > 0, "unknown"), (TELEMETRY_PROFILING_LOCK_ENABLED, prof_config.lock.enabled, "unknown"), - (TELEMETRY_PROFILING_EXPORT_PY_ENABLED, prof_config.export.py_enabled, "unknown"), (TELEMETRY_PROFILING_EXPORT_LIBDD_ENABLED, prof_config.export.libdd_enabled, "unknown"), (TELEMETRY_PROFILING_CAPTURE_PCT, prof_config.capture_pct, "unknown"), (TELEMETRY_PROFILING_MAX_FRAMES, prof_config.max_frames, "unknown"), diff --git a/ddtrace/profiling/collector/memalloc.py b/ddtrace/profiling/collector/memalloc.py index d8ab54e9d38..06dd16463c0 100644 --- a/ddtrace/profiling/collector/memalloc.py +++ b/ddtrace/profiling/collector/memalloc.py @@ -64,7 +64,6 @@ class MemoryCollector(collector.PeriodicCollector): heap_sample_size = attr.ib(type=int, default=config.heap.sample_size) ignore_profiler = attr.ib(default=config.ignore_profiler, type=bool) _export_libdd_enabled = attr.ib(type=bool, default=config.export.libdd_enabled) - _export_py_enabled = attr.ib(type=bool, default=config.export.py_enabled) def _start_service(self): # type: (...) -> None @@ -128,8 +127,8 @@ def snapshot(self): # DEV: This might happen if the memalloc sofile is unlinked and relinked without module # re-initialization. LOG.debug("Invalid state detected in memalloc module, suppressing profile") - - if self._export_py_enabled: + return tuple() + else: return ( tuple( MemoryHeapSampleEvent( @@ -145,8 +144,6 @@ def snapshot(self): if not self.ignore_profiler or thread_id not in thread_id_ignore_set ), ) - else: - return tuple() def collect(self): # TODO: The event timestamp is slightly off since it's going to be the time we copy the data from the @@ -182,8 +179,8 @@ def collect(self): # DEV: This might happen if the memalloc sofile is unlinked and relinked without module # re-initialization. LOG.debug("Invalid state detected in memalloc module, suppressing profile") - - if self._export_py_enabled: + return tuple() + else: return ( tuple( MemoryAllocSampleEvent( @@ -200,5 +197,3 @@ def collect(self): if not self.ignore_profiler or thread_id not in thread_id_ignore_set ), ) - else: - return [] diff --git a/ddtrace/profiling/collector/stack.pyx b/ddtrace/profiling/collector/stack.pyx index 31e0deb4375..c9d15bd124d 100644 --- a/ddtrace/profiling/collector/stack.pyx +++ b/ddtrace/profiling/collector/stack.pyx @@ -13,6 +13,7 @@ from ddtrace import context from ddtrace import span as ddspan from ddtrace.internal import compat from ddtrace.internal.datadog.profiling import ddup +from ddtrace.internal.datadog.profiling import stack_v2 from ddtrace.internal.utils import formats from ddtrace.profiling import _threading from ddtrace.profiling import collector @@ -32,19 +33,14 @@ FEATURES = { "transparent_events": False, } -# These are flags indicating the enablement of the profiler. This is handled at the level of -# a global rather than a passed parameter because this is a time of transition +# Switch to use libdd (ddup) collector/exporter. Handled as a global since we're transitioning into +# the now-experimental interface. cdef bint use_libdd = False -cdef bint use_py = True cdef void set_use_libdd(bint flag): global use_libdd use_libdd = flag -cdef void set_use_py(bint flag): - global use_py - use_py = flag - IF UNAME_SYSNAME == "Linux": FEATURES['cpu-time'] = True @@ -327,89 +323,89 @@ cdef stack_collect(ignore_profiler, thread_time, max_nframes, interval, wall_tim frames, nframes = _traceback.pyframe_to_frames(task_pyframes, max_nframes) - if use_libdd and nframes: - handle = ddup.SampleHandle() - handle.push_walltime(wall_time, 1) - handle.push_threadinfo(thread_id, thread_native_id, thread_name) - handle.push_task_id(task_id) - handle.push_task_name(task_name) - handle.push_class_name(frames[0].class_name) - for frame in frames: - handle.push_frame(frame.function_name, frame.file_name, 0, frame.lineno) - handle.flush_sample() - - if use_py and nframes: - stack_events.append( - stack_event.StackSampleEvent( - thread_id=thread_id, - thread_native_id=thread_native_id, - thread_name=thread_name, - task_id=task_id, - task_name=task_name, - nframes=nframes, frames=frames, - wall_time_ns=wall_time, - sampling_period=int(interval * 1e9), + if nframes: + if use_libdd: + handle = ddup.SampleHandle() + handle.push_walltime(wall_time, 1) + handle.push_threadinfo(thread_id, thread_native_id, thread_name) + handle.push_task_id(task_id) + handle.push_task_name(task_name) + handle.push_class_name(frames[0].class_name) + for frame in frames: + handle.push_frame(frame.function_name, frame.file_name, 0, frame.lineno) + handle.flush_sample() + else: + stack_events.append( + stack_event.StackSampleEvent( + thread_id=thread_id, + thread_native_id=thread_native_id, + thread_name=thread_name, + task_id=task_id, + task_name=task_name, + nframes=nframes, frames=frames, + wall_time_ns=wall_time, + sampling_period=int(interval * 1e9), + ) ) - ) frames, nframes = _traceback.pyframe_to_frames(thread_pyframes, max_nframes) - if use_libdd and nframes: - handle = ddup.SampleHandle() - handle.push_cputime( cpu_time, 1) - handle.push_walltime( wall_time, 1) - handle.push_threadinfo(thread_id, thread_native_id, thread_name) - handle.push_class_name(frames[0].class_name) - for frame in frames: - handle.push_frame(frame.function_name, frame.file_name, 0, frame.lineno) - handle.push_span(span, collect_endpoint) - handle.flush_sample() - - if use_py and nframes: - event = stack_event.StackSampleEvent( - thread_id=thread_id, - thread_native_id=thread_native_id, - thread_name=thread_name, - task_id=None, - task_name=None, - nframes=nframes, - frames=frames, - wall_time_ns=wall_time, - cpu_time_ns=cpu_time, - sampling_period=int(interval * 1e9), - ) - event.set_trace_info(span, collect_endpoint) - stack_events.append(event) - - if exception is not None: - exc_type, exc_traceback = exception - - frames, nframes = _traceback.traceback_to_frames(exc_traceback, max_nframes) - - if use_libdd and nframes: + if nframes: + if use_libdd: handle = ddup.SampleHandle() + handle.push_cputime( cpu_time, 1) + handle.push_walltime( wall_time, 1) handle.push_threadinfo(thread_id, thread_native_id, thread_name) - handle.push_exceptioninfo(exc_type, 1) handle.push_class_name(frames[0].class_name) for frame in frames: handle.push_frame(frame.function_name, frame.file_name, 0, frame.lineno) handle.push_span(span, collect_endpoint) handle.flush_sample() - - if use_py and nframes: - exc_event = stack_event.StackExceptionSampleEvent( + else: + event = stack_event.StackSampleEvent( thread_id=thread_id, - thread_name=thread_name, thread_native_id=thread_native_id, + thread_name=thread_name, task_id=None, task_name=None, nframes=nframes, frames=frames, + wall_time_ns=wall_time, + cpu_time_ns=cpu_time, sampling_period=int(interval * 1e9), - exc_type=exc_type, ) - exc_event.set_trace_info(span, collect_endpoint) - exc_events.append(exc_event) + event.set_trace_info(span, collect_endpoint) + stack_events.append(event) + + if exception is not None: + exc_type, exc_traceback = exception + + frames, nframes = _traceback.traceback_to_frames(exc_traceback, max_nframes) + + if nframes: + if use_libdd: + handle = ddup.SampleHandle() + handle.push_threadinfo(thread_id, thread_native_id, thread_name) + handle.push_exceptioninfo(exc_type, 1) + handle.push_class_name(frames[0].class_name) + for frame in frames: + handle.push_frame(frame.function_name, frame.file_name, 0, frame.lineno) + handle.push_span(span, collect_endpoint) + handle.flush_sample() + else: + exc_event = stack_event.StackExceptionSampleEvent( + thread_id=thread_id, + thread_name=thread_name, + thread_native_id=thread_native_id, + task_id=None, + task_name=None, + nframes=nframes, + frames=frames, + sampling_period=int(interval * 1e9), + exc_type=exc_type, + ) + exc_event.set_trace_info(span, collect_endpoint) + exc_events.append(exc_event) return stack_events, exc_events @@ -474,6 +470,7 @@ class StackCollector(collector.PeriodicCollector): _thread_time = attr.ib(init=False, repr=False, eq=False) _last_wall_time = attr.ib(init=False, repr=False, eq=False, type=int) _thread_span_links = attr.ib(default=None, init=False, repr=False, eq=False) + _stack_collector_v2_enabled = attr.ib(type=bool, default=config.stack.v2.enabled) @max_time_usage_pct.validator def _check_max_time_usage(self, attribute, value): @@ -487,8 +484,30 @@ class StackCollector(collector.PeriodicCollector): if self.tracer is not None: self._thread_span_links = _ThreadSpanLinks() self.tracer.context_provider._on_activate(self._thread_span_links.link_span) - set_use_libdd(config.export.libdd_enabled) - set_use_py(config.export.py_enabled) + + # If libdd is enabled, propagate the configuration + if config.export.libdd_enabled: + if not ddup.is_available: + # We probably already told the user about this in profiler.py, but let's do it again here. + LOG.error("Failed to load the libdd collector from stack.pyx; falling back to the legacy collector") + set_use_libdd(False) + else: + set_use_libdd(True) + + # If stack v2 is requested, verify it is loaded properly and that libdd has been enabled. + if self._stack_collector_v2_enabled: + if not stack_v2.is_available: + self._stack_collector_v2_enabled = False + LOG.error("Failed to load the v2 stack sampler; falling back to the v1 stack sampler") + if not use_libdd: + self._stack_collector_v2_enabled = False + LOG.error("libdd collector not enabled; falling back to the v1 stack sampler. Did you set DD_PROFILING_EXPORT_LIBDD_ENABLED=true?") + + # If at the end of things, stack v2 is still enabled, then start the native thread running the v2 sampler + if self._stack_collector_v2_enabled: + LOG.debug("Starting the stack v2 sampler") + stack_v2.start() + def _start_service(self): # type: (...) -> None @@ -506,6 +525,10 @@ class StackCollector(collector.PeriodicCollector): self.tracer.context_provider._deregister_on_activate(self._thread_span_links.link_span) LOG.debug("Profiling StackCollector stopped") + # Also tell the native thread running the v2 sampler to stop, if needed + if self._stack_collector_v2_enabled: + stack_v2.stop() + def _compute_new_interval(self, used_wall_time_ns): interval = (used_wall_time_ns / (self.max_time_usage_pct / 100.0)) - used_wall_time_ns return max(interval / 1e9, self.min_interval_time) @@ -515,18 +538,24 @@ class StackCollector(collector.PeriodicCollector): now = compat.monotonic_ns() wall_time = now - self._last_wall_time self._last_wall_time = now - - all_events = stack_collect( - self.ignore_profiler, - self._thread_time, - self.nframes, - self.interval, - wall_time, - self._thread_span_links, - self.endpoint_collection_enabled, - ) + all_events = [] + + # If the stack v2 collector is enabled, then do not collect the stack samples here. + if not self._stack_collector_v2_enabled: + all_events = stack_collect( + self.ignore_profiler, + self._thread_time, + self.nframes, + self.interval, + wall_time, + self._thread_span_links, + self.endpoint_collection_enabled, + ) used_wall_time_ns = compat.monotonic_ns() - now self.interval = self._compute_new_interval(used_wall_time_ns) + if self._stack_collector_v2_enabled: + stack_v2.set_interval(self.interval) + return all_events diff --git a/ddtrace/profiling/exporter/pprof.pyx b/ddtrace/profiling/exporter/pprof.pyx index 68061a38608..79f40685fef 100644 --- a/ddtrace/profiling/exporter/pprof.pyx +++ b/ddtrace/profiling/exporter/pprof.pyx @@ -12,7 +12,7 @@ from ddtrace import ext from ddtrace.internal import packages from ddtrace.internal._encoding import ListStringTable as _StringTable from ddtrace.internal.compat import ensure_text -from ddtrace.internal.datadog.profiling.utils import sanitize_string +from ddtrace.internal.datadog.profiling.ddup.utils import sanitize_string from ddtrace.internal.utils import config from ddtrace.profiling import event from ddtrace.profiling import exporter diff --git a/ddtrace/profiling/profiler.py b/ddtrace/profiling/profiler.py index 0b5d9b87f40..e7382106f34 100644 --- a/ddtrace/profiling/profiler.py +++ b/ddtrace/profiling/profiler.py @@ -128,7 +128,6 @@ class _ProfilerInstance(service.Service): init=False, factory=lambda: os.environ.get("AWS_LAMBDA_FUNCTION_NAME"), type=Optional[str] ) _export_libdd_enabled = attr.ib(type=bool, default=config.export.libdd_enabled) - _export_py_enabled = attr.ib(type=bool, default=config.export.py_enabled) ENDPOINT_TEMPLATE = "https://intake.profile.{}" @@ -169,19 +168,31 @@ def _build_default_exporters(self): if self._lambda_function_name is not None: self.tags.update({"functionname": self._lambda_function_name}) + # It's possible to fail to load the libdatadog collector, so we check. Any other consumers + # of libdd can do their own check, so we just log. + if self._export_libdd_enabled and not ddup.is_available: + LOG.error("Failed to load the libdd collector, falling back to the legacy collector") + self._export_libdd_enabled = False + elif self._export_libdd_enabled: + LOG.debug("Using the libdd collector") + # Build the list of enabled Profiling features and send along as a tag configured_features = [] if self._stack_collector_enabled: - configured_features.append("stack") + if config.stack.v2.enabled: + configured_features.append("stack_v2") + else: + configured_features.append("stack") if self._lock_collector_enabled: configured_features.append("lock") if self._memory_collector_enabled: configured_features.append("mem") if config.heap.sample_size > 0: configured_features.append("heap") + if self._export_libdd_enabled: configured_features.append("exp_dd") - if self._export_py_enabled: + else: configured_features.append("exp_py") configured_features.append("CAP" + str(config.capture_pct)) configured_features.append("MAXF" + str(config.max_frames)) @@ -192,21 +203,15 @@ def _build_default_exporters(self): endpoint_call_counter_span_processor.enable() if self._export_libdd_enabled: - versionname = ( - "{}.libdd".format(self.version) - if self._export_py_enabled and self.version is not None - else self.version - ) ddup.init( env=self.env, service=self.service, - version=versionname, + version=self.version, tags=self.tags, max_nframes=config.max_frames, url=endpoint, ) - - if self._export_py_enabled: + else: # DEV: Import this only if needed to avoid importing protobuf # unnecessarily from ddtrace.profiling.exporter import http @@ -380,4 +385,4 @@ def _stop_service(self, flush=True, join=True): col.join() def visible_events(self): - return self._export_py_enabled + return not self._export_libdd_enabled diff --git a/ddtrace/profiling/scheduler.py b/ddtrace/profiling/scheduler.py index b47c73a065e..d2fa9e76363 100644 --- a/ddtrace/profiling/scheduler.py +++ b/ddtrace/profiling/scheduler.py @@ -25,7 +25,6 @@ class Scheduler(periodic.PeriodicService): _configured_interval = attr.ib(init=False) _last_export = attr.ib(init=False, default=None, eq=False) _export_libdd_enabled = attr.ib(type=bool, default=config.export.libdd_enabled) - _export_py_enabled = attr.ib(type=bool, default=config.export.py_enabled) def __attrs_post_init__(self): # Copy the value to use it later since we're going to adjust the real interval @@ -45,9 +44,8 @@ def flush(self): if self._export_libdd_enabled: ddup.upload() - if not self._export_py_enabled: - # If we're not using the Python profiler, then stop now - # But set these fields for compatibility + # These are only used by the Python uploader, but set them here to keep logs/etc + # consistent for now start = self._last_export self._last_export = compat.time_ns() return diff --git a/ddtrace/settings/profiling.py b/ddtrace/settings/profiling.py index f9829d8389d..a84549ed71c 100644 --- a/ddtrace/settings/profiling.py +++ b/ddtrace/settings/profiling.py @@ -1,5 +1,4 @@ import math -import platform import typing as t from envier import En @@ -38,11 +37,6 @@ def _derive_default_heap_sample_size(heap_config, default_heap_sample_size=1024 return int(max(math.ceil(total_mem / max_samples), default_heap_sample_size)) -def _is_valid_libdatadog(): - # type: () -> bool - return platform.machine() in ["x86_64", "aarch64"] and "glibc" in platform.libc_ver()[0] - - class ProfilingConfig(En): __prefix__ = "dd.profiling" @@ -165,6 +159,17 @@ class Stack(En): help="Whether to enable the stack profiler", ) + class V2(En): + __item__ = __prefix__ = "v2" + + enabled = En.v( + bool, + "enabled", + default=False, + help_type="Boolean", + help="Whether to enable the v2 stack profiler. Also enables the libdatadog collector.", + ) + class Lock(En): __item__ = __prefix__ = "lock" @@ -218,7 +223,7 @@ class Heap(En): class Export(En): __item__ = __prefix__ = "export" - _libdd_enabled = En.v( + libdd_enabled = En.v( bool, "libdd_enabled", default=False, @@ -226,16 +231,5 @@ class Export(En): help="Enables collection and export using the experimental exporter", ) - # For now, only allow libdd to be enabled if the user asks for it - libdd_enabled = En.d(bool, lambda c: c._libdd_enabled and _is_valid_libdatadog()) - - py_enabled = En.v( - bool, - "py_enabled", - default=True, - help_type="Boolean", - help="Enables collection and export using the classic Python exporter", - ) - config = ProfilingConfig() diff --git a/pyproject.toml b/pyproject.toml index d753729da75..c8235fa7cf3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,7 +93,7 @@ exclude = ''' | ddtrace/profiling/collector/stack.pyx$ | ddtrace/profiling/exporter/pprof_.*_pb2.py$ | ddtrace/profiling/exporter/pprof.pyx$ - | ddtrace/internal/datadog/profiling/_ddup.pyx$ + | ddtrace/internal/datadog/profiling/ddup/_ddup.pyx$ | ddtrace/vendor/ | ddtrace/appsec/_iast/_taint_tracking/_vendor/ | ddtrace/_version.py @@ -150,6 +150,9 @@ exclude-modules = ''' | ddtrace.internal.datastreams.kafka # libdd_wrapper is a common native dependency, not a module | ddtrace.internal.datadog.profiling.libdd_wrapper + # _ddup and _stack_v2 miss a runtime dependency in slotscheck, but ddup and stack_v2 are fine + | ddtrace.internal.datadog.profiling.ddup._ddup + | ddtrace.internal.datadog.profiling.stack_v2._stack_v2 ) ''' diff --git a/releasenotes/notes/profiling-add-stack-v2-975ebe845cdadc8f.yaml b/releasenotes/notes/profiling-add-stack-v2-975ebe845cdadc8f.yaml new file mode 100644 index 00000000000..a6ebdbd0b62 --- /dev/null +++ b/releasenotes/notes/profiling-add-stack-v2-975ebe845cdadc8f.yaml @@ -0,0 +1,9 @@ +--- +features: + - | + profiling: implement an experimental stack sampling feature, which can be enabled by setting + ``DD_PROFILING_STACK_V2_ENABLED=true``. This new sampler should resolve segfault issues on Python + 3.11 and later, while also decreasing the latency contribution of the profiler in many + situations, and also improving the accuracy of stack-sampling data. This feature is currently + only available on Linux using CPython 3.8 or greater. Requires + ``DD_PROFILING_EXPORT_LIBDD_ENABLED=true`` to be set. diff --git a/setup.py b/setup.py index e85443160ae..5d10e4834dc 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,8 @@ LIBDDWAF_DOWNLOAD_DIR = HERE / "ddtrace" / "appsec" / "_ddwaf" / "libddwaf" IAST_DIR = HERE / "ddtrace" / "appsec" / "_iast" / "_taint_tracking" -DDUP_DIR = HERE / "ddtrace" / "internal" / "datadog" / "profiling" +DDUP_DIR = HERE / "ddtrace" / "internal" / "datadog" / "profiling" / "ddup" +STACK_V2_DIR = HERE / "ddtrace" / "internal" / "datadog" / "profiling" / "stack_v2" CURRENT_OS = platform.system() @@ -302,7 +303,6 @@ def build_extension_cmake(self, ext): "-DPython3_LIBRARIES={}".format(python_lib), "-DPYTHON_EXECUTABLE={}".format(sys.executable), "-DCMAKE_BUILD_TYPE={}".format(ext.build_type), - "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}".format(output_dir), "-DLIB_INSTALL_DIR={}".format(output_dir), "-DEXTENSION_NAME={}".format(extension_basename), ] @@ -314,7 +314,7 @@ def build_extension_cmake(self, ext): # CMAKE_BUILD_PARALLEL_LEVEL works across all generators # self.parallel is a Python 3 only way to set parallel jobs by hand # using -j in the build_ext call, not supported by pip or PyPA-build. - # DEV: -j is only supported in CMake 3.12+ only. + # DEV: -j is supported in CMake 3.12+ only. if hasattr(self, "parallel") and self.parallel: build_args += ["-j{}".format(self.parallel)] @@ -353,7 +353,7 @@ def __init__( build_args=[], install_args=[], build_type=None, - optional=False, + optional=True, # By default, extensions are optional ): super().__init__(name, sources=[]) self.source_dir = source_dir @@ -405,7 +405,6 @@ def get_exts_for(name): encoding_macros = [("__LITTLE_ENDIAN__", "1")] -# TODO can we specify the exact compiler version less literally? if CURRENT_OS == "Windows": encoding_libraries = ["ws2_32"] extra_compile_args = [] @@ -454,25 +453,30 @@ def get_exts_for(name): ) ) - ext_modules.append( - CMakeExtension("ddtrace.appsec._iast._taint_tracking._native", source_dir=IAST_DIR, optional=True) - ) + ext_modules.append(CMakeExtension("ddtrace.appsec._iast._taint_tracking._native", source_dir=IAST_DIR)) if platform.system() == "Linux" and is_64_bit_python(): ext_modules.append( CMakeExtension( - "ddtrace.internal.datadog.profiling._ddup", + "ddtrace.internal.datadog.profiling.ddup._ddup", source_dir=DDUP_DIR, - optional=True, cmake_args=[ "-DPY_MAJOR_VERSION={}".format(sys.version_info.major), "-DPY_MINOR_VERSION={}".format(sys.version_info.minor), "-DPY_MICRO_VERSION={}".format(sys.version_info.micro), - "-Ddd_wrapper_INSTALL_DIR={}".format(DDUP_DIR), ], ) ) + # Echion doesn't build on 3.7, so just skip it outright for now + if sys.version_info >= (3, 8): + ext_modules.append( + CMakeExtension( + "ddtrace.internal.datadog.profiling.stack_v2._stack_v2", + source_dir=STACK_V2_DIR, + ) + ) + else: ext_modules = [] @@ -507,7 +511,7 @@ def get_exts_for(name): package_data={ "ddtrace": ["py.typed"], "ddtrace.appsec": ["rules.json"], - "ddtrace.appsec._ddwaf": [str(Path("libddwaf") / "*" / "lib" / "libddwaf.*")], + "ddtrace.appsec._ddwaf": ["libddwaf/*/lib/libddwaf.*"], "ddtrace.appsec._iast._taint_tracking": ["CMakeLists.txt"], "ddtrace.internal.datadog.profiling": ["libdd_wrapper.*"], }, diff --git a/tests/profiling/collector/test_stack.py b/tests/profiling/collector/test_stack.py index 681c34d95d3..2e6e577b912 100644 --- a/tests/profiling/collector/test_stack.py +++ b/tests/profiling/collector/test_stack.py @@ -322,7 +322,8 @@ def test_repr(): stack.StackCollector, "StackCollector(status=, " "recorder=Recorder(default_max_events=16384, max_events={}), min_interval_time=0.01, max_time_usage_pct=1.0, " - "nframes=64, ignore_profiler=False, endpoint_collection_enabled=None, tracer=None)", + "nframes=64, ignore_profiler=False, endpoint_collection_enabled=None, tracer=None, " + "_stack_collector_v2_enabled=False)", ) diff --git a/tests/telemetry/test_writer.py b/tests/telemetry/test_writer.py index e8697e4c164..fcfcf6d5b4f 100644 --- a/tests/telemetry/test_writer.py +++ b/tests/telemetry/test_writer.py @@ -90,7 +90,6 @@ def test_app_started_event(telemetry_writer, test_agent_session, mock_time): {"name": "DD_PROFILING_MEMORY_ENABLED", "origin": "unknown", "value": True}, {"name": "DD_PROFILING_HEAP_ENABLED", "origin": "unknown", "value": True}, {"name": "DD_PROFILING_LOCK_ENABLED", "origin": "unknown", "value": True}, - {"name": "DD_PROFILING_EXPORT_PY_ENABLED", "origin": "unknown", "value": True}, {"name": "DD_PROFILING_EXPORT_LIBDD_ENABLED", "origin": "unknown", "value": False}, {"name": "DD_PROFILING_CAPTURE_PCT", "origin": "unknown", "value": 1.0}, {"name": "DD_PROFILING_UPLOAD_INTERVAL", "origin": "unknown", "value": 60.0}, @@ -204,7 +203,6 @@ def test_app_started_event_configuration_override(test_agent_session, run_python env["DD_PROFILING_MEMORY_ENABLED"] = "False" env["DD_PROFILING_HEAP_ENABLED"] = "False" env["DD_PROFILING_LOCK_ENABLED"] = "False" - env["DD_PROFILING_EXPORT_PY_ENABLED"] = "False" env["DD_PROFILING_EXPORT_LIBDD_ENABLED"] = "True" env["DD_PROFILING_CAPTURE_PCT"] = "5.0" env["DD_PROFILING_UPLOAD_INTERVAL"] = "10.0" @@ -249,7 +247,6 @@ def test_app_started_event_configuration_override(test_agent_session, run_python {"name": "DD_PROFILING_MEMORY_ENABLED", "origin": "unknown", "value": False}, {"name": "DD_PROFILING_HEAP_ENABLED", "origin": "unknown", "value": False}, {"name": "DD_PROFILING_LOCK_ENABLED", "origin": "unknown", "value": False}, - {"name": "DD_PROFILING_EXPORT_PY_ENABLED", "origin": "unknown", "value": False}, {"name": "DD_PROFILING_EXPORT_LIBDD_ENABLED", "origin": "unknown", "value": True}, {"name": "DD_PROFILING_CAPTURE_PCT", "origin": "unknown", "value": 5.0}, {"name": "DD_PROFILING_UPLOAD_INTERVAL", "origin": "unknown", "value": 10.0},