diff --git a/.circleci/config.templ.yml b/.circleci/config.templ.yml index fb49b013331..a02d01e8665 100644 --- a/.circleci/config.templ.yml +++ b/.circleci/config.templ.yml @@ -495,6 +495,7 @@ jobs: appsec_iast_packages: <<: *machine_executor + parallelism: 10 steps: - run_test: pattern: 'appsec_iast_packages' diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index a4285b92563..2d5d56e6c49 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -76,6 +76,7 @@ tests/contrib/django/django_app/appsec_urls.py @DataDog/asm-python tests/contrib/django/test_django_appsec.py @DataDog/asm-python tests/snapshots/tests*appsec*.json @DataDog/asm-python tests/contrib/*/test*appsec*.py @DataDog/asm-python +scripts/iast/* @DataDog/asm-python # Profiling ddtrace/profiling @DataDog/profiling-python @DataDog/apm-core-python diff --git a/benchmarks/bm/iast_utils/ast_patching.py b/benchmarks/bm/iast_utils/ast_patching.py new file mode 100644 index 00000000000..dc402e102b2 --- /dev/null +++ b/benchmarks/bm/iast_utils/ast_patching.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +import os +import shutil + + +PROJECT_NAME = "my_project" +NUM_MODULES = 300 # Number of modules to create + + +# Template for a Python module generator +def module_template(module_number, import_modules): + imports_block = "" + if import_modules: + imports_block = "\n".join([f"from {import_module} import *" for import_module in import_modules]) + + body_block = f""" +import os + +def func_{module_number}(): + print('This is function func_{module_number} from module_{module_number}.py') + +class Class{module_number}: + def __init__(self): + print('This is Class{module_number} from module_{module_number}.py') + + def slice_test(self, arg): + return arg[1:3] + + def index_access(self, arg): + return arg[1] + + def os_path_join(self, arg1, arg2): + return os.path.join(arg1, arg2) + + def string_concat(self, arg1, arg2): + return arg1 + arg2 + + def string_fstring(self, arg1, arg2): + return f'{{arg1}} {{arg2}}' + + def string_format(self, arg1, arg2): + return '{{0}} {{1}}'.format(arg1, arg2) + + def string_format_modulo(self, arg1, arg2): + return '%s %s' % (arg1, arg2) + + def string_join(self, arg1, arg2): + return ''.join([arg1, arg2]) + + def string_decode(self, arg): + return arg.decode() + + def string_encode(self, arg): + return arg.encode("utf-8") + + def string_replace(self, arg, old, new): + return arg.replace(old, new) + + def bytearray_extend(self, arg1, arg2): + return arg1.extend(arg2) + + def string_upper(self, arg): + return arg.upper() + + def string_lower(self, arg): + return arg.lower() + + def string_swapcase(self, arg): + return arg.swapcase() + + def string_title(self, arg): + return arg.title() + + def string_capitalize(self, arg): + return arg.capitalize() + + def string_casefold(self, arg): + return arg.casefold() + + def string_translate(self, arg, table): + return arg.translate(table) + + def string_zfill(self, arg, width): + return arg.zfill(width) + + def string_ljust(self, arg, width): + return arg.ljust(width) + + def str_call(self, arg): + return str(arg) + + def bytes_call(self, arg): + return bytes(arg) + + def bytearray_call(self, arg): + return bytearray(arg) + +if __name__ == "__main__": + print('This is module_{module_number}.py') +""" + return f"{imports_block}\n{body_block}" + + +def create_project_structure(): + project_name = PROJECT_NAME + num_modules = NUM_MODULES + + # Create the project directory + os.makedirs(project_name, exist_ok=True) + + # Create the __init__.py file to make the directory a package + with open(os.path.join(project_name, "__init__.py"), "w") as f: + f.write(f"# This is the __init__.py file for the {project_name} package\n") + + # last file path + module_path = "" + # Create the modules + for i in range(1, num_modules + 1): + module_name = f"module_{i}.py" + module_path = os.path.join(project_name, module_name) + + # Import all the previous modules in the last module only + if i == num_modules: + import_modules = [f"module_{j}" for j in range(1, i)] + else: + import_modules = None + + # Render the template with context + rendered_content = module_template(i, import_modules) + + with open(module_path, "w") as f: + f.write(rendered_content) + + return module_path + + +def destroy_project_structure(): + project_name = PROJECT_NAME + # Remove the project directory + shutil.rmtree(project_name) diff --git a/benchmarks/iast_ast_patching/config.yaml b/benchmarks/iast_ast_patching/config.yaml new file mode 100644 index 00000000000..659868e4bd7 --- /dev/null +++ b/benchmarks/iast_ast_patching/config.yaml @@ -0,0 +1,5 @@ +no_iast: &base_variant + iast_enabled: 0 + +iast_enabled: &iast_enabled + iast_enabled: 1 diff --git a/benchmarks/iast_ast_patching/scenario.py b/benchmarks/iast_ast_patching/scenario.py new file mode 100644 index 00000000000..4a61dcbf392 --- /dev/null +++ b/benchmarks/iast_ast_patching/scenario.py @@ -0,0 +1,29 @@ +import os +import subprocess +import sys + +import bm +from bm.iast_utils.ast_patching import create_project_structure +from bm.iast_utils.ast_patching import destroy_project_structure + + +class IAST_AST_Patching(bm.Scenario): + iast_enabled = bm.var_bool() + + def run(self): + try: + python_file_path = create_project_structure() + + env = os.environ.copy() + env["DD_IAST_ENABLED"] = str(self.iast_enabled) + + subp_cmd = ["ddtrace-run", sys.executable, python_file_path] + + def _(loops): + for _ in range(loops): + subprocess.check_output(subp_cmd, env=env) + + yield _ + + finally: + destroy_project_structure() diff --git a/ddtrace/appsec/_remoteconfiguration.py b/ddtrace/appsec/_remoteconfiguration.py index f3db74e4ac0..7fc08cbd4a8 100644 --- a/ddtrace/appsec/_remoteconfiguration.py +++ b/ddtrace/appsec/_remoteconfiguration.py @@ -121,6 +121,7 @@ def _appsec_rules_data(features: Mapping[str, Any], test_tracer: Optional[Tracer _add_rules_to_list(features, "rules_override", "rules override", ruleset) _add_rules_to_list(features, "scanners", "scanners", ruleset) _add_rules_to_list(features, "processors", "processors", ruleset) + _add_rules_to_list(features, "actions", "actions", ruleset) if ruleset: return tracer._appsec_processor._update_rules({k: v for k, v in ruleset.items() if v is not None}) diff --git a/ddtrace/contrib/pytest/_plugin_v1.py b/ddtrace/contrib/pytest/_plugin_v1.py index c4c5648a086..f508f89795b 100644 --- a/ddtrace/contrib/pytest/_plugin_v1.py +++ b/ddtrace/contrib/pytest/_plugin_v1.py @@ -14,6 +14,7 @@ from doctest import DocTest import json import os +from pathlib import Path import re from typing import Dict # noqa:F401 @@ -31,6 +32,8 @@ from ddtrace.contrib.pytest.constants import FRAMEWORK from ddtrace.contrib.pytest.constants import KIND from ddtrace.contrib.pytest.constants import XFAIL_REASON +from ddtrace.contrib.pytest.utils import _is_pytest_8_or_later +from ddtrace.contrib.pytest.utils import _pytest_version_supports_itr from ddtrace.contrib.unittest import unpatch as unpatch_unittest from ddtrace.ext import SpanTypes from ddtrace.ext import test @@ -75,12 +78,6 @@ COVER_SESSION = asbool(os.environ.get("_DD_COVER_SESSION", "false")) -def _is_pytest_8_or_later(): - if hasattr(pytest, "version_tuple"): - return pytest.version_tuple >= (8, 0, 0) - return False - - def encode_test_parameter(parameter): param_repr = repr(parameter) # if the representation includes an id() we'll remove it @@ -868,13 +865,20 @@ def pytest_ddtrace_get_item_test_name(item): return "%s.%s" % (item.cls.__name__, item.name) return item.name - @staticmethod - @pytest.hookimpl(trylast=True) - def pytest_terminal_summary(terminalreporter, exitstatus, config): - # Reports coverage if experimental session-level coverage is enabled. - if USE_DD_COVERAGE and COVER_SESSION: - ModuleCodeCollector.report() - try: - ModuleCodeCollector.write_json_report_to_file("dd_coverage.json") - except Exception: - log.debug("Failed to write coverage report to file", exc_info=True) + # Internal coverage is only used for ITR at the moment, so the hook is only added if the pytest version supports it + if _pytest_version_supports_itr(): + + @staticmethod + @pytest.hookimpl(trylast=True) + def pytest_terminal_summary(terminalreporter, exitstatus, config): + # Reports coverage if experimental session-level coverage is enabled. + if USE_DD_COVERAGE and COVER_SESSION: + from ddtrace.ext.git import extract_workspace_path + + workspace_path = Path(extract_workspace_path()) + + ModuleCodeCollector.report(workspace_path) + try: + ModuleCodeCollector.write_json_report_to_file("dd_coverage.json") + except Exception: + log.debug("Failed to write coverage report to file", exc_info=True) diff --git a/ddtrace/contrib/pytest/constants.py b/ddtrace/contrib/pytest/constants.py index 3a8b064aef9..79894626b6e 100644 --- a/ddtrace/contrib/pytest/constants.py +++ b/ddtrace/contrib/pytest/constants.py @@ -4,3 +4,5 @@ # XFail Reason XFAIL_REASON = "pytest.xfail.reason" + +ITR_MIN_SUPPORTED_VERSION = (6, 8, 0) diff --git a/ddtrace/contrib/pytest/plugin.py b/ddtrace/contrib/pytest/plugin.py index 5da501abe40..f363a113eaf 100644 --- a/ddtrace/contrib/pytest/plugin.py +++ b/ddtrace/contrib/pytest/plugin.py @@ -12,10 +12,13 @@ """ import os +from pathlib import Path from typing import Dict # noqa:F401 import pytest +from ddtrace.contrib.pytest.utils import _pytest_version_supports_itr + DDTRACE_HELP_MSG = "Enable tracing of pytest functions." NO_DDTRACE_HELP_MSG = "Disable tracing of pytest functions." @@ -24,14 +27,21 @@ def _is_enabled_early(early_config): - """Hackily checks if the ddtrace plugin is enabled before the config is fully populated. + """Checks if the ddtrace plugin is enabled before the config is fully populated. + + This is necessary because the module watchdog for coverage collection needs to be enabled as early as possible. - This is necessary because the module watchdog for coverage collectio needs to be enabled as early as possible. + Note: since coverage is used for ITR purposes, we only check if the plugin is enabled if the pytest version supports + ITR """ + if not _pytest_version_supports_itr(): + return False + if ( "--no-ddtrace" in early_config.invocation_params.args - or early_config.getini("ddtrace") is False or early_config.getini("no-ddtrace") + or "ddtrace" in early_config.inicfg + and early_config.getini("ddtrace") is False ): return False @@ -97,10 +107,15 @@ def pytest_load_initial_conftests(early_config, parser, args): COVER_SESSION = asbool(os.environ.get("_DD_COVER_SESSION", "false")) if USE_DD_COVERAGE: + from ddtrace.ext.git import extract_workspace_path from ddtrace.internal.coverage.code import ModuleCodeCollector + workspace_path = Path(extract_workspace_path()) + + log.debug("Installing ModuleCodeCollector with include_paths=%s", [workspace_path]) + if not ModuleCodeCollector.is_installed(): - ModuleCodeCollector.install() + ModuleCodeCollector.install(include_paths=[workspace_path]) if COVER_SESSION: ModuleCodeCollector.start_coverage() else: diff --git a/ddtrace/contrib/pytest/utils.py b/ddtrace/contrib/pytest/utils.py new file mode 100644 index 00000000000..19d1bc3b166 --- /dev/null +++ b/ddtrace/contrib/pytest/utils.py @@ -0,0 +1,17 @@ +import pytest + +from ddtrace.contrib.pytest.constants import ITR_MIN_SUPPORTED_VERSION + + +def _get_pytest_version_tuple(): + if hasattr(pytest, "version_tuple"): + return pytest.version_tuple + return tuple(map(int, pytest.__version__.split("."))) + + +def _is_pytest_8_or_later(): + return _get_pytest_version_tuple() >= (8, 0, 0) + + +def _pytest_version_supports_itr(): + return _get_pytest_version_tuple() >= ITR_MIN_SUPPORTED_VERSION diff --git a/ddtrace/internal/coverage/code.py b/ddtrace/internal/coverage/code.py index f6c126a3f59..7d2aa4dfc8e 100644 --- a/ddtrace/internal/coverage/code.py +++ b/ddtrace/internal/coverage/code.py @@ -1,5 +1,6 @@ from collections import defaultdict from collections import deque +import os from types import CodeType from types import ModuleType import typing as t @@ -14,8 +15,6 @@ from ddtrace.vendor.contextvars import ContextVar -CWD = Path.cwd() - _original_exec = exec ctx_covered = ContextVar("ctx_covered", default=None) @@ -61,6 +60,7 @@ def __init__(self): self.coverage_enabled = False self.lines = defaultdict(set) self.covered = defaultdict(set) + self._include_paths: t.List[Path] = [] # Replace the built-in exec function with our own in the pytest globals try: @@ -70,6 +70,19 @@ def __init__(self): except ImportError: pass + @classmethod + def install(cls, include_paths: t.Optional[t.List[Path]] = None, coverage_queue=None): + if ModuleCodeCollector.is_installed(): + return + + super().install() + + if not include_paths: + include_paths = [Path(os.getcwd())] + + if cls._instance is not None: + cls._instance._include_paths = include_paths + def hook(self, arg): path, line = arg if self.coverage_enabled: @@ -84,7 +97,7 @@ def hook(self, arg): ctx_lines.add(line) @classmethod - def report(cls, ignore_nocover: bool = False): + def report(cls, workspace_path: Path, ignore_nocover: bool = False): if cls._instance is None: return instance: ModuleCodeCollector = cls._instance @@ -92,10 +105,10 @@ def report(cls, ignore_nocover: bool = False): executable_lines = instance.lines covered_lines = instance._get_covered_lines() - print_coverage_report(executable_lines, covered_lines, ignore_nocover=ignore_nocover) + print_coverage_report(executable_lines, covered_lines, workspace_path, ignore_nocover=ignore_nocover) @classmethod - def write_json_report_to_file(cls, filename: str, ignore_nocover: bool = False): + def write_json_report_to_file(cls, filename: str, workspace_path: Path, ignore_nocover: bool = False): if cls._instance is None: return instance: ModuleCodeCollector = cls._instance @@ -104,7 +117,7 @@ def write_json_report_to_file(cls, filename: str, ignore_nocover: bool = False): covered_lines = instance._get_covered_lines() with open(filename, "w") as f: - f.write(get_json_report(executable_lines, covered_lines, ignore_nocover=ignore_nocover)) + f.write(get_json_report(executable_lines, covered_lines, workspace_path, ignore_nocover=ignore_nocover)) def _get_covered_lines(self) -> t.Dict[str, t.Set[int]]: if ctx_coverage_enabed.get(False): @@ -171,9 +184,9 @@ def report_seen_lines(cls): return files def transform(self, code: CodeType, _module: ModuleType) -> CodeType: - code_path = Path(code.co_filename).resolve() - # TODO: Remove hardcoded paths - if not code_path.is_relative_to(CWD): + code_path = Path(code.co_filename) + + if not any(code_path.is_relative_to(include_path) for include_path in self._include_paths): # Not a code object we want to instrument return code @@ -201,13 +214,11 @@ def instrument_code(self, code: CodeType) -> CodeType: return code self.seen.add(code) - path = str(Path(code.co_filename).resolve().relative_to(CWD)) - - new_code, lines = instrument_all_lines(code, self.hook, path) + new_code, lines = instrument_all_lines(code, self.hook, code.co_filename) # Keep note of all the lines that have been instrumented. These will be # the ones that can be covered. - self.lines[path] |= lines + self.lines[code.co_filename] |= lines return new_code diff --git a/ddtrace/internal/coverage/instrumentation.py b/ddtrace/internal/coverage/instrumentation.py index 63d778cd09b..32e7b32cda7 100644 --- a/ddtrace/internal/coverage/instrumentation.py +++ b/ddtrace/internal/coverage/instrumentation.py @@ -15,14 +15,15 @@ def instrument_all_lines(code: CodeType, hook: HookType, path: str) -> t.Tuple[C last_lineno = None for i, instr in enumerate(abstract_code): try: + if instr.lineno is None: + continue + if instr.lineno == last_lineno: continue last_lineno = instr.lineno - if last_lineno is None: - continue - if instr.name in ("NOP", "RESUME"): + if instr.name == "RESUME": continue # Inject the hook at the beginning of the line diff --git a/ddtrace/internal/coverage/report.py b/ddtrace/internal/coverage/report.py index 870886994d2..252b3ce99ae 100644 --- a/ddtrace/internal/coverage/report.py +++ b/ddtrace/internal/coverage/report.py @@ -2,6 +2,7 @@ import json import linecache import os +from pathlib import Path import re import typing as t @@ -18,6 +19,17 @@ ast_cache: t.Dict[str, t.Any] = {} +def _get_relative_path_strings(executable_lines, workspace_path: Path) -> t.Dict[str, str]: + relative_path_strs: t.Dict[str, str] = {} + + for path in executable_lines: + path_obj = Path(path) + path_str = str(path_obj.relative_to(workspace_path) if path_obj.is_relative_to(workspace_path) else path_obj) + relative_path_strs[path] = path_str + + return relative_path_strs + + def _get_ast_for_path(path: str): if path not in ast_cache: with open(path, "r") as f: @@ -64,11 +76,18 @@ def no_cover(path, src_line) -> t.Optional[t.Tuple[int, int]]: return None -def print_coverage_report(executable_lines, covered_lines, ignore_nocover=False): +def print_coverage_report(executable_lines, covered_lines, workspace_path: Path, ignore_nocover=False): total_executable_lines = 0 total_covered_lines = 0 total_missed_lines = 0 - n = max(len(path) for path in executable_lines) + 4 + + if len(executable_lines) == 0: + print("No Datadog line coverage recorded.") + return + + relative_path_strs: t.Dict[str, str] = _get_relative_path_strings(executable_lines, workspace_path) + + n = max(len(path_str) for path_str in relative_path_strs.values()) + 4 covered_lines = covered_lines @@ -104,14 +123,16 @@ def print_coverage_report(executable_lines, covered_lines, ignore_nocover=False) missed_ranges = collapse_ranges(sorted(path_lines - path_covered)) missed = ",".join([f"{start}-{end}" if start != end else str(start) for start, end in missed_ranges]) missed_str = f" [{missed}]" if missed else "" - print(f"{path:{n}s}{n_lines:>8}{n_missed:>8}{int(n_covered / n_lines * 100):>8}%{missed_str}") + print( + f"{relative_path_strs[path]:{n}s}{n_lines:>8}{n_missed:>8}{int(n_covered / n_lines * 100):>8}%{missed_str}" + ) print("-" * (w)) total_covered_percent = int((total_covered_lines / total_executable_lines) * 100) print(f"{'TOTAL':<{n}}{total_executable_lines:>8}{total_missed_lines:>8}{total_covered_percent:>8}%") print() -def get_json_report(executable_lines, covered_lines, ignore_nocover=False): +def get_json_report(executable_lines, covered_lines, workspace_path: Path, ignore_nocover=False): """Writes a JSON-formatted coverage report similar in structure to coverage.py 's JSON report, but only containing a subset (namely file-level executed and missing lines). @@ -126,7 +147,9 @@ def get_json_report(executable_lines, covered_lines, ignore_nocover=False): } """ - output = {"files": {}} + output: t.Dict[str, t.Any] = {"files": {}} + + relative_path_strs: t.Dict[str, str] = _get_relative_path_strings(executable_lines, workspace_path) for path, orig_lines in sorted(executable_lines.items()): path_lines = orig_lines.copy() @@ -142,7 +165,7 @@ def get_json_report(executable_lines, covered_lines, ignore_nocover=False): path_lines.discard(no_cover_line) path_covered.discard(no_cover_line) - output["files"][path] = { + output["files"][relative_path_strs[path]] = { "executed_lines": sorted(list(path_covered)), "missing_lines": sorted(list(path_lines - path_covered)), } diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index 58c849adbc0..0476e91803b 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -2,7 +2,6 @@ import os from typing import Any from typing import Dict -from typing import List from typing import Optional from typing import Union @@ -50,11 +49,7 @@ log = get_logger(__name__) -SUPPORTED_INTEGRATIONS = { - "bedrock": lambda: patch(botocore=True), - "langchain": lambda: patch(langchain=True), - "openai": lambda: patch(openai=True), -} +SUPPORTED_LLMOBS_INTEGRATIONS = {"bedrock": "botocore", "openai": "openai", "langchain": "langchain"} class LLMObs(Service): @@ -105,7 +100,7 @@ def _stop_service(self) -> None: def enable( cls, ml_app: Optional[str] = None, - integrations: Optional[List[str]] = None, + integrations_enabled: bool = True, agentless_enabled: bool = False, site: Optional[str] = None, api_key: Optional[str] = None, @@ -117,8 +112,7 @@ def enable( Enable LLM Observability tracing. :param str ml_app: The name of your ml application. - :param List[str] integrations: A list of integrations to enable auto-tracing for. - Must be subset of ("openai", "langchain", "bedrock") + :param bool integrations_enabled: Set to `true` to enable LLM integrations. :param bool agentless_enabled: Set to `true` to disable sending data that requires a Datadog Agent. :param str site: Your datadog site. :param str api_key: Your datadog api key. @@ -170,7 +164,8 @@ def enable( log.debug("Remote configuration disabled because DD_LLMOBS_AGENTLESS_ENABLED is set to true.") remoteconfig_poller.disable() - cls._patch_integrations(integrations) + if integrations_enabled: + cls._patch_integrations() # override the default _instance with a new tracer cls._instance = cls(tracer=_tracer) cls.enabled = True @@ -207,30 +202,10 @@ def flush(cls): log.warning("Failed to flush LLMObs spans and evaluation metrics.", exc_info=True) @staticmethod - def _patch_integrations(integrations: Optional[List[str]] = None): - """ - Patch LLM integrations based on a list of integrations passed in. Patch all supported integrations by default. - """ - integrations_to_patch = {} - if integrations is None: - integrations_to_patch.update(SUPPORTED_INTEGRATIONS) - else: - for integration in integrations: - integration = integration.lower() - if integration in SUPPORTED_INTEGRATIONS: - integrations_to_patch.update({integration: SUPPORTED_INTEGRATIONS[integration]}) - else: - log.warning( - "%s is unsupported - LLMObs currently supports %s", - integration, - str(SUPPORTED_INTEGRATIONS.keys()), - ) - for integration in integrations_to_patch: - try: - SUPPORTED_INTEGRATIONS[integration]() - except Exception: - log.warning("couldn't patch %s", integration, exc_info=True) - return + def _patch_integrations() -> None: + """Patch LLM integrations.""" + patch(**{integration: True for integration in SUPPORTED_LLMOBS_INTEGRATIONS.values()}) # type: ignore[arg-type] + log.debug("Patched LLM integrations: %s", list(SUPPORTED_LLMOBS_INTEGRATIONS.values())) @classmethod def export_span(cls, span: Optional[Span] = None) -> Optional[ExportedLLMObsSpan]: diff --git a/lib-injection/Dockerfile b/lib-injection/Dockerfile index 57240507d46..d15336e27a8 100644 --- a/lib-injection/Dockerfile +++ b/lib-injection/Dockerfile @@ -24,7 +24,7 @@ RUN python3 dl_wheels.py \ --output-dir /build/pkgs \ --verbose -FROM alpine:3.18.3 +FROM alpine:3.20 COPY --from=0 /build/pkgs /datadog-init/ddtrace_pkgs ARG UID=10000 RUN addgroup -g 10000 -S datadog && \ diff --git a/releasenotes/notes/actions_from_RC-08744fc9e91a8c02.yaml b/releasenotes/notes/actions_from_RC-08744fc9e91a8c02.yaml new file mode 100644 index 00000000000..08acc26573a --- /dev/null +++ b/releasenotes/notes/actions_from_RC-08744fc9e91a8c02.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + ASM: This fix resolves an issue where an org could not customize actions through remote config. diff --git a/releasenotes/notes/lib-inject-base-image-dffa9a9579a9350d.yaml b/releasenotes/notes/lib-inject-base-image-dffa9a9579a9350d.yaml new file mode 100644 index 00000000000..ae2964b581c --- /dev/null +++ b/releasenotes/notes/lib-inject-base-image-dffa9a9579a9350d.yaml @@ -0,0 +1,4 @@ +--- +other: + - | + lib-injection: update base Alpine image to 3.20. diff --git a/scripts/iast/README b/scripts/iast/README index f2f2a0af840..5340b0723f6 100644 --- a/scripts/iast/README +++ b/scripts/iast/README @@ -1,6 +1,6 @@ This folder (scripts/iast/) contains some scripts to check memory usage of native code. -## How to use +## Memory Leaks, How to ### 1. Build the docker image @@ -75,3 +75,72 @@ The valid traces of our C files, are like that: ==324555== by 0x1FFEFEFAFF: ??? ==324555== ``` + +## Segmentation fault, How to + +Have you been blessed by a Segmentation Fault? Have you got an error like...? + +```sh +riot run --python=3.11 -r flask +.... +tests/contrib/flask/test_blueprint.py ....... [ 9%] +tests/contrib/flask/test_errorhandler.py ..... [ 15%] +tests/contrib/flask/test_flask_appsec.py Test failed with exit code -11 +``` + +### 1. Compile the project in debug mode + +```sh +export DD_COMPILE_DEBUG=true +python setup.py build_ext --inplace +``` + +### 2. Run the tests with GDB + +```sh +DD_TRACE_AGENT_URL=http://127.0.0.1:8126/ gdb --args python -m pytest tests/appsec +``` + +When the application raises a Segmentation fault, GDB will stop the execution, type backtrace and… + +```sh +Program received signal SIGSEGV, Segmentation fault. +0x00007ffff57caa00 in std::_Hash_bytes(void const*, unsigned long, unsigned long) () from /lib/x86_64-linux-gnu/libstdc++.so.6 +(gdb) backtrace +#0 0x00007ffff57caa00 in std::_Hash_bytes(void const*, unsigned long, unsigned long) () from /lib/x86_64-linux-gnu/libstdc++.so.6 +#1 0x00007ffff59f35b7 in std::pair > const, ddwaf::parameter>, false, true>, bool> std::_Hashtable >, std::pair > const, ddwaf::parameter>, std::allocator > const, ddwaf::parameter> >, std::__detail::_Select1st, std::equal_to > >, std::hash > >, std::__detail::_Mod_range_hashing, std::__detail::_Default_ranged_hash, std::__detail::_Prime_rehash_policy, std::__detail::_Hashtable_traits >::_M_emplace >, ddwaf::parameter const&>(std::integral_constant, std::basic_string_view >&&, ddwaf::parameter const&) () from ddtrace/appsec/_ddwaf.so +#2 0x00007ffff59f27e5 in ddwaf::parameter::operator std::unordered_map >, ddwaf::parameter, std::hash > >, std::equal_to > >, std::allocator > const, ddwaf::parameter> > >() () from ddtrace/appsec/_ddwaf.so +#3 0x00007ffff59c8e94 in ddwaf::parser::parse(ddwaf::parameter, ddwaf::ruleset_info&, std::vector >&, PWManifest&, std::unordered_map, std::allocator >, std::vector, std::allocator > >, std::hash, std::allocator > >, std::equal_to, std::allocator > >, std::allocator, std::allocator > const, std::vector, std::allocator > > > > >&) () from ddtrace/appsec/_ddwaf.so +#4 0x00007ffff59aa1ae in PowerWAF::fromConfig(_ddwaf_object, _ddwaf_config const*, ddwaf::ruleset_info&) () from ddtrace/appsec/_ddwaf.so +#5 0x00007ffff5995045 in ddwaf_init () from ddtrace/appsec/_ddwaf.so +``` + +Pray to Linus Torvalds to understand the error ¯\_(ツ)_/¯ + +### 2.1 Common errors + +Linux users have restriction with ptrace: + +``` +Starting program: /home/alberto.vara/.pyenv/versions/3.8.13/envs/dd-trace-py-38/bin/python -m pytest tests/appsec +warning: Could not trace the inferior process. +warning: ptrace: Operation not permitted +During startup program exited with code 127. +``` +You can temporarily disable this restriction (and revert to the old behaviour allowing your user to ptrace (gdb) any of their other processes) by doing: + +``` +echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope +``` + +To permanently allow it edit /etc/sysctl.d/10-ptrace.conf and change the line: + +``` +kernel.yama.ptrace_scope = 1 +``` + +To read: + +``` +kernel.yama.ptrace_scope = 0 +``` diff --git a/tests/appsec/appsec/test_remoteconfiguration.py b/tests/appsec/appsec/test_remoteconfiguration.py index 13efed5e36a..b67cecd8ad5 100644 --- a/tests/appsec/appsec/test_remoteconfiguration.py +++ b/tests/appsec/appsec/test_remoteconfiguration.py @@ -985,9 +985,22 @@ def test_rc_rules_data(tracer): config = { "rules_data": [], "custom_rules": [], + "actions": [], "rules": json.load(dd_rules)["rules"], + "rules_override": [], + "scanners": [], + "processors": [], + "ignore": [], } - assert _appsec_rules_data(config, tracer) + with mock.patch("ddtrace.appsec._processor.AppSecSpanProcessor._update_rules", autospec=True) as mock_update: + mock_update.reset_mock() + _appsec_rules_data(config, tracer) + calls = mock_update.mock_calls + for v in config: + if v == "ignore": + assert v not in calls[-1][1][1] + else: + assert v in calls[-1][1][1] def test_rc_rules_data_error_empty(tracer): diff --git a/tests/contrib/botocore/test_bedrock.py b/tests/contrib/botocore/test_bedrock.py index d2f727f0372..bfdbb25a844 100644 --- a/tests/contrib/botocore/test_bedrock.py +++ b/tests/contrib/botocore/test_bedrock.py @@ -488,7 +488,7 @@ def _test_llmobs_invoke(cls, provider, bedrock_client, mock_llmobs_span_writer, pin.override(bedrock_client, tracer=mock_tracer) # Need to disable and re-enable LLMObs service to use the mock tracer LLMObs.disable() - LLMObs.enable(_tracer=mock_tracer, integrations=["bedrock"]) + LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False) # only want botocore patched if cassette_name is None: cassette_name = "%s_invoke.yaml" % provider @@ -524,7 +524,7 @@ def _test_llmobs_invoke_stream( pin.override(bedrock_client, tracer=mock_tracer) # Need to disable and re-enable LLMObs service to use the mock tracer LLMObs.disable() - LLMObs.enable(_tracer=mock_tracer, integrations=["bedrock"]) + LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False) # only want botocore patched if cassette_name is None: cassette_name = "%s_invoke_stream.yaml" % provider @@ -624,7 +624,7 @@ def test_llmobs_error(self, ddtrace_global_config, bedrock_client, mock_llmobs_s pin.override(bedrock_client, tracer=mock_tracer) # Need to disable and re-enable LLMObs service to use the mock tracer LLMObs.disable() - LLMObs.enable(_tracer=mock_tracer, integrations=["bedrock"]) + LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False) # only want botocore patched with pytest.raises(botocore.exceptions.ClientError): with request_vcr.use_cassette("meta_invoke_error.yaml"): body, model = json.dumps(_REQUEST_BODIES["meta"]), _MODELS["meta"] diff --git a/tests/contrib/langchain/test_langchain.py b/tests/contrib/langchain/test_langchain.py index 156a1bf79da..eb07c7e0b04 100644 --- a/tests/contrib/langchain/test_langchain.py +++ b/tests/contrib/langchain/test_langchain.py @@ -1352,7 +1352,7 @@ def _test_llmobs_llm_invoke( different_py39_cassette=False, ): LLMObs.disable() - LLMObs.enable(_tracer=mock_tracer, integrations=["langchain"]) + LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False) # only want langchain patched if sys.version_info < (3, 10, 0) and different_py39_cassette: cassette_name = cassette_name.replace(".yaml", "_39.yaml") @@ -1388,7 +1388,7 @@ def _test_llmobs_chain_invoke( ): # disable the service before re-enabling it, as it was enabled in another test LLMObs.disable() - LLMObs.enable(_tracer=mock_tracer, integrations=["langchain"]) + LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False) # only want langchain patched if sys.version_info < (3, 10, 0) and different_py39_cassette: cassette_name = cassette_name.replace(".yaml", "_39.yaml") diff --git a/tests/contrib/langchain/test_langchain_community.py b/tests/contrib/langchain/test_langchain_community.py index 35f56de208b..88f4223db7c 100644 --- a/tests/contrib/langchain/test_langchain_community.py +++ b/tests/contrib/langchain/test_langchain_community.py @@ -1339,7 +1339,7 @@ def _test_llmobs_llm_invoke( output_role=None, ): LLMObs.disable() - LLMObs.enable(_tracer=mock_tracer, integrations=["langchain"]) + LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False) # only want langchain patched with request_vcr.use_cassette(cassette_name): generate_trace("Can you explain what an LLM chain is?") @@ -1372,7 +1372,7 @@ def _test_llmobs_chain_invoke( ): # disable the service before re-enabling it, as it was enabled in another test LLMObs.disable() - LLMObs.enable(_tracer=mock_tracer, integrations=["langchain"]) + LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False) # only want langchain patched with request_vcr.use_cassette(cassette_name): generate_trace("Can you explain what an LLM chain is?") diff --git a/tests/contrib/openai/conftest.py b/tests/contrib/openai/conftest.py index f07e64787fa..f53d2b672d2 100644 --- a/tests/contrib/openai/conftest.py +++ b/tests/contrib/openai/conftest.py @@ -191,7 +191,7 @@ def mock_tracer(ddtrace_global_config, openai, patch_openai, mock_logs, mock_met if ddtrace_global_config.get("_llmobs_enabled", False): # Have to disable and re-enable LLMObs to use to mock tracer. LLMObs.disable() - LLMObs.enable(_tracer=mock_tracer, integrations=["openai"]) + LLMObs.enable(_tracer=mock_tracer, integrations_enabled=False) yield mock_tracer diff --git a/tests/contrib/openai/test_openai_v0.py b/tests/contrib/openai/test_openai_v0.py index 0175c8cad0d..ea042e7358f 100644 --- a/tests/contrib/openai/test_openai_v0.py +++ b/tests/contrib/openai/test_openai_v0.py @@ -1935,7 +1935,6 @@ def test_integration_service_name(openai_api_key, ddtrace_run_python_code_in_sub ) def test_llmobs_completion(openai_vcr, openai, ddtrace_global_config, mock_llmobs_writer, mock_tracer): """Ensure llmobs records are emitted for completion endpoints when configured. - Also ensure the llmobs records have the correct tagging including trace/span ID for trace correlation. """ with openai_vcr.use_cassette("completion.yaml"): @@ -1990,7 +1989,6 @@ def test_llmobs_completion_stream(openai_vcr, openai, ddtrace_global_config, moc ) def test_llmobs_chat_completion(openai_vcr, openai, ddtrace_global_config, mock_llmobs_writer, mock_tracer): """Ensure llmobs records are emitted for chat completion endpoints when configured. - Also ensure the llmobs records have the correct tagging including trace/span ID for trace correlation. """ if not hasattr(openai, "ChatCompletion"): @@ -2033,7 +2031,6 @@ async def test_llmobs_chat_completion_stream( openai_vcr, openai, ddtrace_global_config, mock_llmobs_writer, mock_tracer ): """Ensure llmobs records are emitted for chat completion endpoints when configured. - Also ensure the llmobs records have the correct tagging including trace/span ID for trace correlation. """ if not hasattr(openai, "ChatCompletion"):