Skip to content

Commit

Permalink
feat(tracing): make tracer._sampler public (#8572)
Browse files Browse the repository at this point in the history
In anticipation of lazy sampling, users may create egress points that we
cannot account for, and therefore miss sampling the span on. For
example, an integration that where a user is manually injecting trace
headers into the outgoing request will miss calling
tracer.sampler.sample() which could lead to broken traces. To avoid that
we'll make sure to advise users when to call sampler in the lazy
sampling PR: #8308

## Checklist

- [x] Change(s) are motivated and described in the PR description
- [x] Testing strategy is described if automated tests are not included
in the PR
- [x] Risks are described (performance impact, potential for breakage,
maintainability)
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [x] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.
- [x] If change touches code that signs or publishes builds or packages,
or handles credentials of any kind, I've requested a review from
`@DataDog/security-design-and-guidance`.

## Reviewer Checklist

- [ ] Title is accurate
- [ ] All changes are related to the pull request's stated goal
- [ ] Description motivates each change
- [ ] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [ ] Testing strategy adequately addresses listed risks
- [ ] Change is maintainable (easy to change, telemetry, documentation)
- [ ] Release note makes sense to a user of the library
- [ ] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [ ] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
  • Loading branch information
ZStriker19 authored Mar 8, 2024
1 parent 3dd4c9d commit 41fb9fd
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 23 deletions.
16 changes: 8 additions & 8 deletions ddtrace/_trace/tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def __init__(
self.enabled = config._tracing_enabled
self.context_provider = context_provider or DefaultContextProvider()
self._user_sampler: Optional[BaseSampler] = None
self._sampler: BaseSampler = DatadogSampler()
self.sampler: BaseSampler = DatadogSampler()
self._dogstatsd_url = agent.get_stats_url() if dogstatsd_url is None else dogstatsd_url
self._compute_stats = config._trace_compute_stats
self._agent_url: str = agent.get_trace_url() if url is None else url
Expand Down Expand Up @@ -415,8 +415,8 @@ def configure(
self._iast_enabled = asm_config._iast_enabled = iast_enabled

if sampler is not None:
self._sampler = sampler
self._user_sampler = self._sampler
self.sampler = sampler
self._user_sampler = self.sampler

self._dogstatsd_url = dogstatsd_url or self._dogstatsd_url

Expand Down Expand Up @@ -519,8 +519,8 @@ def _agent_response_callback(self, resp):
The agent can return updated sample rates for the priority sampler.
"""
try:
if isinstance(self._sampler, BasePrioritySampler):
self._sampler.update_rate_by_service_sample_rates(
if isinstance(self.sampler, BasePrioritySampler):
self.sampler.update_rate_by_service_sample_rates(
resp.rate_by_service,
)
except ValueError:
Expand Down Expand Up @@ -752,7 +752,7 @@ def _start_span(
self._services.add(service)

if not trace_id:
self._sampler.sample(span)
self.sampler.sample(span)

# Only call span processors if the tracer is enabled
if self.enabled:
Expand Down Expand Up @@ -1072,15 +1072,15 @@ def _on_global_config_update(self, cfg, items):
if "_trace_sample_rate" in items:
# Reset the user sampler if one exists
if cfg._get_source("_trace_sample_rate") != "remote_config" and self._user_sampler:
self._sampler = self._user_sampler
self.sampler = self._user_sampler
return

if cfg._get_source("_trace_sample_rate") != "default":
sample_rate = cfg._trace_sample_rate
else:
sample_rate = None
sampler = DatadogSampler(default_sample_rate=sample_rate)
self._sampler = sampler
self.sampler = sampler

if "tags" in items:
self._tags = cfg.tags.copy()
Expand Down
6 changes: 3 additions & 3 deletions ddtrace/internal/debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def collect(tracer):
agent_error = None

sampler_rules = None
if isinstance(tracer._sampler, DatadogSampler):
sampler_rules = [str(rule) for rule in tracer._sampler.rules]
if isinstance(tracer.sampler, DatadogSampler):
sampler_rules = [str(rule) for rule in tracer.sampler.rules]

is_venv = in_venv()

Expand Down Expand Up @@ -138,7 +138,7 @@ def collect(tracer):
is_global_tracer=tracer == ddtrace.tracer,
enabled_env_setting=os.getenv("DATADOG_TRACE_ENABLED"),
tracer_enabled=tracer.enabled,
sampler_type=type(tracer._sampler).__name__ if tracer._sampler else "N/A",
sampler_type=type(tracer.sampler).__name__ if tracer.sampler else "N/A",
priority_sampler_type="N/A",
sampler_rules=sampler_rules,
service=ddtrace.config.service or "",
Expand Down
6 changes: 6 additions & 0 deletions releasenotes/notes/sampler_public-0570e2ce18bc604b.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
features:
- |
tracing: Makes ``tracer.sampler`` public, now named ``tracer.sampler``. This should not be called
in most circumstances, but will be useful for advanced users who want to access the sampler directly,
once lazy sampling is implemented.
10 changes: 5 additions & 5 deletions tests/integration/test_priority_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def _prime_tracer_with_priority_sample_rate_from_agent(t, service, env):
t.flush()

sampler_key = "service:{},env:{}".format(service, env)
while sampler_key not in t._writer._sampler._by_service_samplers:
while sampler_key not in t._writer.sampler._by_service_samplers:
time.sleep(1)
s = t.trace("operation", service=service)
s.finish()
Expand Down Expand Up @@ -70,9 +70,9 @@ def test_priority_sampling_rate_honored():

_prime_tracer_with_priority_sample_rate_from_agent(t, service, env)
sampler_key = "service:{},env:{}".format(service, env)
assert sampler_key in t._writer._sampler._by_service_samplers
assert sampler_key in t._writer.sampler._by_service_samplers

rate_from_agent = t._writer._sampler._by_service_samplers[sampler_key].sample_rate
rate_from_agent = t._writer.sampler._by_service_samplers[sampler_key].sample_rate
assert 0 < rate_from_agent < 1

_turn_tracer_into_dummy(t)
Expand Down Expand Up @@ -103,10 +103,10 @@ def test_priority_sampling_response():
with override_global_config(dict(env=env)):
service = "my-svc-{}".format(_id)
sampler_key = "service:{},env:{}".format(service, env)
assert sampler_key not in t._writer._sampler._by_service_samplers
assert sampler_key not in t._writer.sampler._by_service_samplers
_prime_tracer_with_priority_sample_rate_from_agent(t, service, env)
assert (
sampler_key in t._writer._sampler._by_service_samplers
sampler_key in t._writer.sampler._by_service_samplers
), "after fetching priority sample rates from the agent, the tracer should hold those rates"
t.shutdown()

Expand Down
14 changes: 7 additions & 7 deletions tests/tracer/test_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def test_sample_rate_deviation(self):

# Since RateSampler does not set the sampling priority on a span, we will use a DatadogSampler
# with rate limiting disabled.
tracer._sampler = DatadogSampler(default_sample_rate=sample_rate, rate_limit=-1)
tracer.sampler = DatadogSampler(default_sample_rate=sample_rate, rate_limit=-1)

iterations = int(1e4 / sample_rate)

Expand Down Expand Up @@ -124,7 +124,7 @@ def test_deterministic_behavior(self):
tracer = DummyTracer()
# Since RateSampler does not set the sampling priority on a span, we will use a DatadogSampler
# with rate limiting disabled.
tracer._sampler = DatadogSampler(default_sample_rate=0.5, rate_limit=-1)
tracer.sampler = DatadogSampler(default_sample_rate=0.5, rate_limit=-1)

for i in range(10):
span = tracer.trace(str(i))
Expand All @@ -137,20 +137,20 @@ def test_deterministic_behavior(self):
sampled = len(samples) == 1 and samples[0].context.sampling_priority > 0
for _ in range(10):
other_span = Span(str(i), trace_id=span.trace_id)
assert sampled == tracer._sampler.sample(
assert sampled == tracer.sampler.sample(
other_span
), "sampling should give the same result for a given trace_id"

def test_negative_sample_rate_raises_error(self):
tracer = DummyTracer()
with pytest.raises(ValueError, match="sample_rate of -0.5 is negative"):
tracer._sampler = RateSampler(sample_rate=-0.5)
tracer.sampler = RateSampler(sample_rate=-0.5)

def test_sample_rate_0_does_not_reset_to_1(self):
tracer = DummyTracer()
tracer._sampler = RateSampler(sample_rate=0)
tracer.sampler = RateSampler(sample_rate=0)
assert (
tracer._sampler.sample_rate == 0
tracer.sampler.sample_rate == 0
), "Setting the sample rate to zero should result in the sample rate being zero"


Expand Down Expand Up @@ -189,7 +189,7 @@ def _test_sample_rate_deviation(self):
for sample_rate in [0.1, 0.25, 0.5, 1]:
tracer = DummyTracer()
tracer.configure(sampler=RateByServiceSampler())
tracer._sampler.set_sample_rate(sample_rate)
tracer.sampler.set_sample_rate(sample_rate)

iterations = int(1e4 / sample_rate)

Expand Down

0 comments on commit 41fb9fd

Please sign in to comment.