From e40d29f7e6094d0692b0a3eb9f149ec147771774 Mon Sep 17 00:00:00 2001 From: Christophe Papazian <114495376+christophe-papazian@users.noreply.github.com> Date: Thu, 21 Mar 2024 17:11:02 +0100 Subject: [PATCH] chore(asm): add persistent and ephemeral addresses handling for the waf (#8734) Previously, all waf addresses were persistent, meaning that we only needed to send them once to the waf per request. With the introduction of new features, ephemeral addresses will become more and more common, and they need to be resent each time to the waf. This PR address that by keeping a list of persistent addresses and treat all other addresses as ephemeral. Required for https://github.com/DataDog/dd-trace-py/pull/8568 ## Checklist - [x] Change(s) are motivated and described in the PR description - [x] Testing strategy is described if automated tests are not included in the PR - [x] Risks are described (performance impact, potential for breakage, maintainability) - [x] Change is maintainable (easy to change, telemetry, documentation) - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed or label `changelog/no-changelog` is set - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)) - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) - [x] If this PR changes the public interface, I've notified `@DataDog/apm-tees`. - [x] If change touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. ## Reviewer Checklist - [ ] Title is accurate - [ ] All changes are related to the pull request's stated goal - [ ] Description motivates each change - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - [ ] Testing strategy adequately addresses listed risks - [ ] Change is maintainable (easy to change, telemetry, documentation) - [ ] Release note makes sense to a user of the library - [ ] Author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --- ddtrace/appsec/_constants.py | 20 ++++++++ ddtrace/appsec/_processor.py | 9 ++-- tests/appsec/appsec/test_processor.py | 73 +++++++++++++++++++-------- 3 files changed, 76 insertions(+), 26 deletions(-) diff --git a/ddtrace/appsec/_constants.py b/ddtrace/appsec/_constants.py index b8abd36fef9..7e0ed9342ad 100644 --- a/ddtrace/appsec/_constants.py +++ b/ddtrace/appsec/_constants.py @@ -96,6 +96,7 @@ class IAST_SPAN_TAGS(metaclass=Constant_Class): class WAF_DATA_NAMES(metaclass=Constant_Class): """string names used by the waf library for requesting data from requests""" + # PERSISTENT ADDRESSES REQUEST_BODY = "server.request.body" REQUEST_QUERY = "server.request.query" REQUEST_HEADERS_NO_COOKIES = "server.request.headers.no_cookies" @@ -108,7 +109,26 @@ class WAF_DATA_NAMES(metaclass=Constant_Class): RESPONSE_STATUS = "server.response.status" RESPONSE_HEADERS_NO_COOKIES = "server.response.headers.no_cookies" RESPONSE_BODY = "server.response.body" + PERSISTENT_ADDRESSES = frozenset( + ( + REQUEST_BODY, + REQUEST_QUERY, + REQUEST_HEADERS_NO_COOKIES, + REQUEST_URI_RAW, + REQUEST_METHOD, + REQUEST_PATH_PARAMS, + REQUEST_COOKIES, + REQUEST_HTTP_IP, + REQUEST_USER_ID, + RESPONSE_STATUS, + RESPONSE_HEADERS_NO_COOKIES, + RESPONSE_BODY, + ) + ) + + # EPHEMERAL ADDRESSES PROCESSOR_SETTINGS = "waf.context.processor" + LFI_ADDRESS = "server.io.fs.file" class SPAN_DATA_NAMES(metaclass=Constant_Class): diff --git a/ddtrace/appsec/_processor.py b/ddtrace/appsec/_processor.py index 92a8e71585c..5d48d9efa78 100644 --- a/ddtrace/appsec/_processor.py +++ b/ddtrace/appsec/_processor.py @@ -283,13 +283,13 @@ def _waf_action( if data_already_sent is None: data_already_sent = set() - # type ignore because mypy seems to not detect that both results of the if - # above can iter if not None + # persistent addresses must be sent if api security is used force_keys = custom_data.get("PROCESSOR_SETTINGS", {}).get("extract-schema", False) if custom_data else False + for key, waf_name in iter_data: # type: ignore[attr-defined] if key in data_already_sent: continue - if self._is_needed(waf_name) or force_keys: + if self._is_needed(waf_name) or force_keys or waf_name not in WAF_DATA_NAMES.PERSISTENT_ADDRESSES: value = None if custom_data is not None and custom_data.get(key) is not None: value = custom_data.get(key) @@ -298,7 +298,8 @@ def _waf_action( # if value is a callable, it's a lazy value for api security that should not be sent now if value is not None and not hasattr(value, "__call__"): data[waf_name] = _transform_headers(value) if key.endswith("HEADERS_NO_COOKIES") else value - data_already_sent.add(key) + if waf_name in WAF_DATA_NAMES.PERSISTENT_ADDRESSES: + data_already_sent.add(key) log.debug("[action] WAF got value %s", SPAN_DATA_NAMES.get(key, key)) waf_results = self._ddwaf.run(ctx, data, asm_config._waf_timeout) diff --git a/tests/appsec/appsec/test_processor.py b/tests/appsec/appsec/test_processor.py index 50daf04b3ea..095faf5c787 100644 --- a/tests/appsec/appsec/test_processor.py +++ b/tests/appsec/appsec/test_processor.py @@ -9,6 +9,7 @@ from ddtrace.appsec import _asm_request_context from ddtrace.appsec._constants import APPSEC from ddtrace.appsec._constants import DEFAULT +from ddtrace.appsec._constants import WAF_DATA_NAMES from ddtrace.appsec._ddwaf import DDWaf from ddtrace.appsec._processor import AppSecSpanProcessor from ddtrace.appsec._processor import _transform_headers @@ -708,6 +709,28 @@ def test_asm_context_registration(tracer_appsec): assert core.get_item("asm_env") is None +CUSTOM_RULE_METHOD = { + "custom_rules": [ + { + "conditions": [ + { + "operator": "match_regex", + "parameters": { + "inputs": [{"address": "server.request.method"}], + "options": {"case_sensitive": False}, + "regex": "GET", + }, + } + ], + "id": "32b243c7-26eb-4046-adf4-custom", + "name": "test required", + "tags": {"category": "attack_attempt", "custom": "1", "type": "custom"}, + "transformers": [], + } + ] +} + + def test_required_addresses(): with override_env(dict(DD_APPSEC_RULES=rules.RULES_GOOD_PATH)): processor = AppSecSpanProcessor() @@ -724,28 +747,7 @@ def test_required_addresses(): "usr.id", } - processor._update_rules( - { - "custom_rules": [ - { - "conditions": [ - { - "operator": "match_regex", - "parameters": { - "inputs": [{"address": "server.request.method"}], - "options": {"case_sensitive": False}, - "regex": "GET", - }, - } - ], - "id": "32b243c7-26eb-4046-adf4-custom", - "name": "test required", - "tags": {"category": "attack_attempt", "custom": "1", "type": "custom"}, - "transformers": [], - } - ] - } - ) + processor._update_rules(CUSTOM_RULE_METHOD) assert processor._addresses_to_keep == { "grpc.server.request.message", @@ -759,3 +761,30 @@ def test_required_addresses(): "server.response.headers.no_cookies", "usr.id", } + + +@pytest.mark.parametrize( + "persistent", [key for key, value in WAF_DATA_NAMES if value in WAF_DATA_NAMES.PERSISTENT_ADDRESSES] +) +@pytest.mark.parametrize("ephemeral", ["LFI_ADDRESS", "PROCESSOR_SETTINGS"]) +@mock.patch("ddtrace.appsec._ddwaf.DDWaf.run") +def test_ephemeral_addresses(mock_run, persistent, ephemeral): + from ddtrace import tracer + + processor = AppSecSpanProcessor() + processor._update_rules(CUSTOM_RULE_METHOD) + + with override_global_config( + dict(_asm_enabled=True) + ), _asm_request_context.asm_request_context_manager(), tracer.trace("test", span_type=SpanTypes.WEB) as span: + # first call must send all data to the waf + processor._waf_action(span, None, {persistent: {"key_1": "value_1"}, ephemeral: {"key_2": "value_2"}}) + assert mock_run.call_args[0][1] == { + WAF_DATA_NAMES[persistent]: {"key_1": "value_1"}, + WAF_DATA_NAMES[ephemeral]: {"key_2": "value_2"}, + } + # second call must only send ephemeral data to the waf, not persistent data again + processor._waf_action(span, None, {persistent: {"key_1": "value_1"}, ephemeral: {"key_2": "value_3"}}) + assert mock_run.call_args[0][1] == { + WAF_DATA_NAMES[ephemeral]: {"key_2": "value_3"}, + }