Skip to content

Commit

Permalink
[jaeger-v2] Implement empty service name sanitizer for OTLP (#6077)
Browse files Browse the repository at this point in the history
## Which problem is this PR solving?
- Towards #5545

## Description of the changes
- Created a new `Sanitizer` type for sanitizing traces in OTLP format 
- Implemented the empty service name sanitizer; I'll open a separate PR
for the UTF-8 sanitizer

## How was this change tested?
- Unit tests

## Checklist
- [x] I have read
https://github.com/jaegertracing/jaeger/blob/master/CONTRIBUTING_GUIDELINES.md
- [x] I have signed all commits
- [x] I have added unit tests for the new functionality
- [x] I have run lint and test steps successfully
  - for `jaeger`: `make lint test`
  - for `jaeger-ui`: `yarn lint` and `yarn test`

---------

Signed-off-by: Mahad Zaryab <[email protected]>
  • Loading branch information
mahadzaryab1 authored Oct 12, 2024
1 parent 563f3b0 commit 4beead7
Show file tree
Hide file tree
Showing 7 changed files with 234 additions and 1 deletion.
7 changes: 6 additions & 1 deletion cmd/jaeger/internal/exporters/storageexporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,24 @@ import (
"go.uber.org/zap"

"github.com/jaegertracing/jaeger/cmd/jaeger/internal/extension/jaegerstorage"
"github.com/jaegertracing/jaeger/cmd/jaeger/internal/sanitizer"
"github.com/jaegertracing/jaeger/storage_v2/spanstore"
)

type storageExporter struct {
config *Config
logger *zap.Logger
traceWriter spanstore.Writer
sanitizer sanitizer.Func
}

func newExporter(config *Config, otel component.TelemetrySettings) *storageExporter {
return &storageExporter{
config: config,
logger: otel.Logger,
sanitizer: sanitizer.NewChainedSanitizer(
sanitizer.NewStandardSanitizers()...,
),
}
}

Expand All @@ -47,5 +52,5 @@ func (*storageExporter) close(_ context.Context) error {
}

func (exp *storageExporter) pushTraces(ctx context.Context, td ptrace.Traces) error {
return exp.traceWriter.WriteTraces(ctx, td)
return exp.traceWriter.WriteTraces(ctx, exp.sanitizer(td))
}
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ func TestExporter(t *testing.T) {
requiredTrace, err := spanReader.GetTrace(ctx, requiredTraceID)
require.NoError(t, err)
assert.Equal(t, spanID.String(), requiredTrace.Spans[0].SpanID.String())

// check that the service name attribute was added by the sanitizer
require.Equal(t, "missing-service-name", requiredTrace.Spans[0].Process.ServiceName)
}

func makeStorageExtension(t *testing.T, memstoreName string) component.Host {
Expand Down
41 changes: 41 additions & 0 deletions cmd/jaeger/internal/sanitizer/emptyservicename.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright (c) 2024 The Jaeger Authors.
// SPDX-License-Identifier: Apache-2.0

package sanitizer

import (
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/ptrace"

"github.com/jaegertracing/jaeger/pkg/otelsemconv"
)

const (
emptyServiceName = "empty-service-name"
serviceNameWrongType = "service-name-wrong-type"
missingServiceName = "missing-service-name"
)

// NewEmptyServiceNameSanitizer returns a sanitizer function that replaces
// empty and missing service names with placeholder strings.
func NewEmptyServiceNameSanitizer() Func {
return sanitizeEmptyServiceName
}

func sanitizeEmptyServiceName(traces ptrace.Traces) ptrace.Traces {
resourceSpans := traces.ResourceSpans()
for i := 0; i < resourceSpans.Len(); i++ {
resourceSpan := resourceSpans.At(i)
attributes := resourceSpan.Resource().Attributes()
serviceName, ok := attributes.Get(string(otelsemconv.ServiceNameKey))
switch {
case !ok:
attributes.PutStr(string(otelsemconv.ServiceNameKey), missingServiceName)
case serviceName.Type() != pcommon.ValueTypeStr:
attributes.PutStr(string(otelsemconv.ServiceNameKey), serviceNameWrongType)
case serviceName.Str() == "":
attributes.PutStr(string(otelsemconv.ServiceNameKey), emptyServiceName)
}
}
return traces
}
80 changes: 80 additions & 0 deletions cmd/jaeger/internal/sanitizer/emptyservicename_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright (c) 2024 The Jaeger Authors.
// SPDX-License-Identifier: Apache-2.0

package sanitizer

import (
"testing"

"github.com/stretchr/testify/require"
"go.opentelemetry.io/collector/pdata/ptrace"
)

func TestEmptyServiceNameSanitizer_SubstitutesCorrectlyForStrings(t *testing.T) {
emptyServiceName := ""
nonEmptyServiceName := "hello"
tests := []struct {
name string
serviceName *string
expectedServiceName string
}{
{
name: "no service name",
expectedServiceName: "missing-service-name",
},
{
name: "empty service name",
serviceName: &emptyServiceName,
expectedServiceName: "empty-service-name",
},
{
name: "non-empty service name",
serviceName: &nonEmptyServiceName,
expectedServiceName: "hello",
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
traces := ptrace.NewTraces()
attributes := traces.
ResourceSpans().
AppendEmpty().
Resource().
Attributes()
if test.serviceName != nil {
attributes.PutStr("service.name", *test.serviceName)
}
sanitizer := NewEmptyServiceNameSanitizer()
sanitized := sanitizer(traces)
serviceName, ok := sanitized.
ResourceSpans().
At(0).
Resource().
Attributes().
Get("service.name")
require.True(t, ok)
require.Equal(t, test.expectedServiceName, serviceName.Str())
})
}
}

func TestEmptyServiceNameSanitizer_SubstitutesCorrectlyForNonStringType(t *testing.T) {
traces := ptrace.NewTraces()
traces.
ResourceSpans().
AppendEmpty().
Resource().
Attributes().
PutInt("service.name", 1)
sanitizer := NewEmptyServiceNameSanitizer()
sanitized := sanitizer(traces)
serviceName, ok := sanitized.
ResourceSpans().
At(0).
Resource().
Attributes().
Get("service.name")
require.True(t, ok)
require.Equal(t, "service-name-wrong-type", serviceName.Str())
}
14 changes: 14 additions & 0 deletions cmd/jaeger/internal/sanitizer/package_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright (c) 2024 The Jaeger Authors.
// SPDX-License-Identifier: Apache-2.0

package sanitizer

import (
"testing"

"github.com/jaegertracing/jaeger/pkg/testutils"
)

func TestMain(m *testing.M) {
testutils.VerifyGoLeaks(m)
}
33 changes: 33 additions & 0 deletions cmd/jaeger/internal/sanitizer/sanitizer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright (c) 2024 The Jaeger Authors.
// SPDX-License-Identifier: Apache-2.0

package sanitizer

import (
"go.opentelemetry.io/collector/pdata/ptrace"
)

// Func is a function that performs enrichment, clean-up, or normalization of trace data.
type Func func(traces ptrace.Traces) ptrace.Traces

// NewStandardSanitizers returns a list of all the sanitizers that are used by the
// storage exporter.
func NewStandardSanitizers() []Func {
return []Func{
NewEmptyServiceNameSanitizer(),
}
}

// NewChainedSanitizer creates a Sanitizer from the variadic list of passed Sanitizers.
// If the list only has one element, it is returned directly to minimize indirection.
func NewChainedSanitizer(sanitizers ...Func) Func {
if len(sanitizers) == 1 {
return sanitizers[0]
}
return func(traces ptrace.Traces) ptrace.Traces {
for _, s := range sanitizers {
traces = s(traces)
}
return traces
}
}
57 changes: 57 additions & 0 deletions cmd/jaeger/internal/sanitizer/sanitizer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright (c) 2024 The Jaeger Authors.
// SPDX-License-Identifier: Apache-2.0

package sanitizer

import (
"testing"

"github.com/stretchr/testify/require"
"go.opentelemetry.io/collector/pdata/ptrace"
)

func TestNewStandardSanitizers(t *testing.T) {
sanitizers := NewStandardSanitizers()
require.Len(t, sanitizers, 1)
}

func TestNewChainedSanitizer(t *testing.T) {
var s1 Func = func(traces ptrace.Traces) ptrace.Traces {
traces.
ResourceSpans().
AppendEmpty().
Resource().
Attributes().
PutStr("hello", "world")
return traces
}
var s2 Func = func(traces ptrace.Traces) ptrace.Traces {
traces.
ResourceSpans().
At(0).
Resource().
Attributes().
PutStr("hello", "goodbye")
return traces
}
c1 := NewChainedSanitizer(s1)
t1 := c1(ptrace.NewTraces())
hello, ok := t1.
ResourceSpans().
At(0).
Resource().
Attributes().
Get("hello")
require.True(t, ok)
require.Equal(t, "world", hello.Str())
c2 := NewChainedSanitizer(s1, s2)
t2 := c2(ptrace.NewTraces())
hello, ok = t2.
ResourceSpans().
At(0).
Resource().
Attributes().
Get("hello")
require.True(t, ok)
require.Equal(t, "goodbye", hello.Str())
}

0 comments on commit 4beead7

Please sign in to comment.