From 5b9434437506ee77a5fda981e30679fd76c555b6 Mon Sep 17 00:00:00 2001 From: Steve Gordon Date: Fri, 4 Oct 2024 13:27:28 +0100 Subject: [PATCH] Improve OTel bridge compatibility with existing Azure instrumentation (#2455) This ensures that the Azure SDK instrumentation doesn't use the existing `Activity` span ID, which, when combined with the OTel bridge, can lead to spans being parented by themselves. This also enhances the logic of the OTel bridge so that we skip span creation for activities coming from the Azure SDK libraries if our Azure instrumentation assemblies are loaded. This ensures we don't create essentially repeated child spans. Fixes #2454 --- .../DiagnosticListeners/KnownListeners.cs | 8 ++-- src/Elastic.Apm/Model/Transaction.cs | 29 +++++++---- .../OpenTelemetry/ElasticActivityListener.cs | 38 ++++++++++++++- ...reMessagingServiceBusDiagnosticListener.cs | 5 +- .../ActivityIntegrationTests.cs | 48 +++++++++++++------ ...zureQueueStorageDiagnosticListenerTests.cs | 16 +++++++ .../SqlServerFixture.cs | 16 ++++++- .../AdoNet/SqlServerFixture.cs | 19 +++++++- 8 files changed, 144 insertions(+), 35 deletions(-) diff --git a/src/Elastic.Apm/DiagnosticListeners/KnownListeners.cs b/src/Elastic.Apm/DiagnosticListeners/KnownListeners.cs index 69126a476..3711ed0c7 100644 --- a/src/Elastic.Apm/DiagnosticListeners/KnownListeners.cs +++ b/src/Elastic.Apm/DiagnosticListeners/KnownListeners.cs @@ -9,18 +9,18 @@ namespace Elastic.Apm.DiagnosticListeners { internal static class KnownListeners { + // Known activity names public const string MicrosoftAspNetCoreHostingHttpRequestIn = "Microsoft.AspNetCore.Hosting.HttpRequestIn"; public const string SystemNetHttpHttpRequestOut = "System.Net.Http.HttpRequestOut"; public const string SystemNetHttpDesktopHttpRequestOut = "System.Net.Http.Desktop.HttpRequestOut"; public const string ApmTransactionActivityName = "ElasticApm.Transaction"; - - public static HashSet KnownListenersList => new() - { + public static HashSet SkippedActivityNamesSet => + [ MicrosoftAspNetCoreHostingHttpRequestIn, SystemNetHttpHttpRequestOut, SystemNetHttpDesktopHttpRequestOut, ApmTransactionActivityName - }; + ]; } } diff --git a/src/Elastic.Apm/Model/Transaction.cs b/src/Elastic.Apm/Model/Transaction.cs index 680b167e2..9280a7809 100644 --- a/src/Elastic.Apm/Model/Transaction.cs +++ b/src/Elastic.Apm/Model/Transaction.cs @@ -1,5 +1,4 @@ -// Licensed to Elasticsearch B.V under -// one or more agreements. +// Licensed to Elasticsearch B.V under one or more agreements. // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information @@ -27,6 +26,17 @@ internal class Transaction : ITransaction { internal static readonly string ApmTransactionActivityName = "ElasticApm.Transaction"; +#if NET + internal static readonly ActivitySource ElasticApmActivitySource = new("Elastic.Apm"); + + // This simply ensures our transaction activity is always created. + internal static readonly ActivityListener Listener = new() + { + ShouldListenTo = s => s.Name == ElasticApmActivitySource.Name, + Sample = (ref ActivityCreationOptions _) => ActivitySamplingResult.AllData + }; +#endif + internal readonly TraceState _traceState; internal readonly ConcurrentDictionary SpanTimings = new(); @@ -144,7 +154,6 @@ internal Transaction( (configuration.TraceContinuationStrategy == ConfigConsts.SupportedValues.RestartExternal && (distributedTracingData?.TraceState == null || distributedTracingData is { TraceState: { SampleRate: null } })); - // For each new transaction, start an Activity if we're not ignoring them. // If Activity.Current is not null, the started activity will be a child activity, // so the traceid and tracestate of the parent will flow to it. @@ -154,7 +163,7 @@ internal Transaction( if (current != null) _activity = current; - // Otherwise we will start an activity explicitly and ensure it trace_id and trace_state respect our bookkeeping. + // Otherwise we will start an activity explicitly and ensure its trace_id and trace_state respect our bookkeeping. // Unless explicitly asked not to through `ignoreActivity`: (https://github.com/elastic/apm-agent-dotnet/issues/867#issuecomment-650170150) else if (!ignoreActivity) _activity = StartActivity(shouldRestartTrace); @@ -539,16 +548,20 @@ internal void SetOutcome(Outcome outcome) _outcome = outcome; } - private Activity StartActivity(bool shouldRestartTrace) + private static Activity StartActivity(bool shouldRestartTrace) { +#if NET + var activity = ElasticApmActivitySource.CreateActivity(KnownListeners.ApmTransactionActivityName, ActivityKind.Internal); +#else var activity = new Activity(KnownListeners.ApmTransactionActivityName); +#endif if (shouldRestartTrace) { - activity.SetParentId(ActivityTraceId.CreateRandom(), ActivitySpanId.CreateRandom(), + activity?.SetParentId(ActivityTraceId.CreateRandom(), ActivitySpanId.CreateRandom(), Activity.Current != null ? Activity.Current.ActivityTraceFlags : ActivityTraceFlags.None); } - activity.SetIdFormat(ActivityIdFormat.W3C); - activity.Start(); + activity?.SetIdFormat(ActivityIdFormat.W3C); + activity?.Start(); return activity; } diff --git a/src/Elastic.Apm/OpenTelemetry/ElasticActivityListener.cs b/src/Elastic.Apm/OpenTelemetry/ElasticActivityListener.cs index 4751834a0..0ebbe1fdc 100644 --- a/src/Elastic.Apm/OpenTelemetry/ElasticActivityListener.cs +++ b/src/Elastic.Apm/OpenTelemetry/ElasticActivityListener.cs @@ -38,6 +38,14 @@ public class ElasticActivityListener : IDisposable internal ElasticActivityListener(IApmAgent agent, HttpTraceConfiguration httpTraceConfiguration) => (_logger, _httpTraceConfiguration) = (agent.Logger?.Scoped(nameof(ElasticActivityListener)), httpTraceConfiguration); + private static readonly bool HasServiceBusInstrumentation = + AppDomain.CurrentDomain.GetAssemblies().SingleOrDefault(assembly => + assembly.GetName().Name == "Elastic.Apm.Azure.ServiceBus") != null; + + private static readonly bool HasStorageInstrumentation = + AppDomain.CurrentDomain.GetAssemblies().SingleOrDefault(assembly => + assembly.GetName().Name == "Elastic.Apm.Azure.Storage") != null; + private readonly IApmLogger _logger; private Tracer _tracer; private readonly HttpTraceConfiguration _httpTraceConfiguration; @@ -64,8 +72,34 @@ internal void Start(Tracer tracerInternal) private Action ActivityStarted => activity => { - if (KnownListeners.KnownListenersList.Contains(activity.DisplayName)) + // If the Elastic instrumentation for ServiceBus is present, we skip duplicating the instrumentation through the OTel bridge. + // Without this, we end up with some redundant spans in the trace with subtle differences. + if (HasServiceBusInstrumentation && activity.Tags.Any(kvp => + kvp.Key.Equals("az.namespace", StringComparison.Ordinal) && kvp.Value.Equals("Microsoft.ServiceBus", StringComparison.Ordinal))) + { + _logger.Debug()?.Log("ActivityStarted: name:{DisplayName} id:{ActivityId} traceId:{TraceId} skipped 'Microsoft.ServiceBus' " + + "activity because 'Elastic.Apm.Azure.ServiceBus' is present in the application.", + activity.DisplayName, activity.Id, activity.TraceId); + + return; + } + + if (HasStorageInstrumentation && activity.Tags.Any(kvp => + kvp.Key.Equals("az.namespace", StringComparison.Ordinal) && kvp.Value.Equals("Microsoft.Storage", StringComparison.Ordinal))) + { + _logger.Debug()?.Log("ActivityStarted: name:{DisplayName} id:{ActivityId} traceId:{TraceId} skipped 'Microsoft.Storage' " + + "activity because 'Elastic.Apm.Azure.Storage' is present in the application.", + activity.DisplayName, activity.Id, activity.TraceId); + return; + } + + if (KnownListeners.SkippedActivityNamesSet.Contains(activity.DisplayName)) + { + _logger.Trace()?.Log("ActivityStarted: name:{DisplayName} id:{ActivityId} traceId:{TraceId} skipped because it matched " + + "a skipped activity name defined in KnownListeners."); + return; + } _logger.Trace()?.Log("ActivityStarted: name:{DisplayName} id:{ActivityId} traceId:{TraceId}", activity.DisplayName, activity.Id, activity.TraceId); @@ -150,7 +184,7 @@ private void CreateSpanForActivity(Activity activity, long timestamp, List kv, string action) _onMessageCurrent = currentSegment switch { - Span span => span.StartSpanInternal(name, ApiConstants.TypeMessaging, ServiceBus.SubType, action.ToLowerInvariant(), - id: activity.SpanId.ToString()), + Span span => span.StartSpanInternal(name, ApiConstants.TypeMessaging, ServiceBus.SubType, action.ToLowerInvariant()), Transaction transaction => transaction.StartSpanInternal(name, ApiConstants.TypeMessaging, ServiceBus.SubType, - action.ToLowerInvariant(), id: activity.SpanId.ToString()), + action.ToLowerInvariant()), _ => _onMessageCurrent }; } diff --git a/test/Elastic.Apm.Tests/ActivityIntegrationTests.cs b/test/Elastic.Apm.Tests/ActivityIntegrationTests.cs index 86870c850..fe232020e 100644 --- a/test/Elastic.Apm.Tests/ActivityIntegrationTests.cs +++ b/test/Elastic.Apm.Tests/ActivityIntegrationTests.cs @@ -2,7 +2,6 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information -using System; using System.Diagnostics; using System.Linq; using System.Threading; @@ -11,7 +10,6 @@ using Elastic.Apm.Tests.Utilities.XUnit; using FluentAssertions; using Xunit; -using Xunit.Abstractions; namespace Elastic.Apm.Tests { @@ -26,21 +24,42 @@ public class ActivityIntegrationTests [Fact] public void ElasticTransactionReusesTraceIdFromCurrentActivity() { - Activity.DefaultIdFormat = ActivityIdFormat.W3C; +#if NET + var listener = new ActivityListener + { + ShouldListenTo = a => a.Name == "Elastic.Apm", + Sample = (ref ActivityCreationOptions _) => ActivitySamplingResult.AllData, + ActivityStarted = activity => { }, + ActivityStopped = activity => { } + }; - var activity = new Activity("UnitTestActivity"); - activity.Start(); + ActivitySource.AddActivityListener(listener); +#endif - Activity.Current.TraceId.Should().Be(activity.TraceId); + try + { + Activity.DefaultIdFormat = ActivityIdFormat.W3C; - var payloadSender = new MockPayloadSender(); - using (var agent = new ApmAgent(new TestAgentComponents(payloadSender: payloadSender))) - agent.Tracer.CaptureTransaction("TestTransaction", "Test", () => Thread.Sleep(10)); + var activity = new Activity("UnitTestActivity"); + activity.Start(); - payloadSender.FirstTransaction.TraceId.Should().Be(activity.TraceId.ToString()); - payloadSender.FirstTransaction.ParentId.Should().BeNullOrEmpty(); + Activity.Current.TraceId.Should().Be(activity.TraceId); - activity.Stop(); + var payloadSender = new MockPayloadSender(); + using (var agent = new ApmAgent(new TestAgentComponents(payloadSender: payloadSender))) + agent.Tracer.CaptureTransaction("TestTransaction", "Test", () => Thread.Sleep(10)); + + payloadSender.FirstTransaction.TraceId.Should().Be(activity.TraceId.ToString()); + payloadSender.FirstTransaction.ParentId.Should().BeNullOrEmpty(); + + activity.Stop(); + } + catch + { +#if NET + listener.Dispose(); +#endif + } } /// @@ -147,7 +166,8 @@ public void MultipleTransactionInOneActivity() payloadSender.Transactions[0].Id.Should().NotBe(payloadSender.Transactions[1].Id); activity.Stop(); } -#if NET5_0_OR_GREATER + +#if NET /// /// Makes sure that transactions on the same Activity are part of the same trace. /// @@ -188,8 +208,6 @@ public async Task ActivityRespectsSampling() var sampledSpans = payloadSender.Spans.Where(t => t.IsSampled).ToArray(); sampledSpans.Length.Should().Be(sampled.Length); - - } #endif } diff --git a/test/azure/Elastic.Apm.Azure.Storage.Tests/AzureQueueStorageDiagnosticListenerTests.cs b/test/azure/Elastic.Apm.Azure.Storage.Tests/AzureQueueStorageDiagnosticListenerTests.cs index d7d9510f7..c4ce7a416 100644 --- a/test/azure/Elastic.Apm.Azure.Storage.Tests/AzureQueueStorageDiagnosticListenerTests.cs +++ b/test/azure/Elastic.Apm.Azure.Storage.Tests/AzureQueueStorageDiagnosticListenerTests.cs @@ -1,4 +1,5 @@ using System; +using System.Diagnostics; using System.Threading.Tasks; using Azure.Storage.Queues; using Elastic.Apm.Api; @@ -21,6 +22,21 @@ public class AzureQueueStorageDiagnosticListenerTests : IDisposable private readonly IDisposable _subscription; private readonly ITestOutputHelper _output; +#if NET + static AzureQueueStorageDiagnosticListenerTests() + { + var listener = new ActivityListener + { + ShouldListenTo = a => a.Name == "Elastic.Apm", + Sample = (ref ActivityCreationOptions _) => ActivitySamplingResult.AllData, + ActivityStarted = activity => { }, + ActivityStopped = activity => { } + }; + + ActivitySource.AddActivityListener(listener); + } +#endif + public AzureQueueStorageDiagnosticListenerTests(AzureStorageTestEnvironment environment, ITestOutputHelper output) { var logger = new XUnitLogger(LogLevel.Trace, output); diff --git a/test/instrumentations/Elastic.Apm.SqlClient.Tests/SqlServerFixture.cs b/test/instrumentations/Elastic.Apm.SqlClient.Tests/SqlServerFixture.cs index 2095543c0..1563fe387 100644 --- a/test/instrumentations/Elastic.Apm.SqlClient.Tests/SqlServerFixture.cs +++ b/test/instrumentations/Elastic.Apm.SqlClient.Tests/SqlServerFixture.cs @@ -3,6 +3,7 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information +using System.Runtime.InteropServices; using System.Threading.Tasks; using Testcontainers.MsSql; using Xunit; @@ -22,8 +23,19 @@ public sealed class SqlServerFixture : IAsyncLifetime public SqlServerFixture(IMessageSink sink) { _sink = sink; - _container = new MsSqlBuilder() - .Build(); + + // see: https://blog.rufer.be/2024/09/22/workaround-fix-testcontainers-sql-error-docker-dotnet-dockerapiexception-docker-api-responded-with-status-codeconflict/ + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + { + _container = new MsSqlBuilder() + .WithImage("mcr.microsoft.com/mssql/server:2022-latest") + .Build(); + } + else + { + _container = new MsSqlBuilder() + .Build(); + } } public async Task InitializeAsync() diff --git a/test/profiler/Elastic.Apm.Profiler.Managed.Tests/AdoNet/SqlServerFixture.cs b/test/profiler/Elastic.Apm.Profiler.Managed.Tests/AdoNet/SqlServerFixture.cs index b3d7b040c..3f25f8dcf 100644 --- a/test/profiler/Elastic.Apm.Profiler.Managed.Tests/AdoNet/SqlServerFixture.cs +++ b/test/profiler/Elastic.Apm.Profiler.Managed.Tests/AdoNet/SqlServerFixture.cs @@ -3,6 +3,7 @@ // Elasticsearch B.V licenses this file to you under the Apache 2.0 License. // See the LICENSE file in the project root for more information +using System.Runtime.InteropServices; using System.Threading.Tasks; using Testcontainers.MsSql; using Xunit; @@ -14,7 +15,23 @@ public sealed class SqlServerCollection : ICollectionFixture { public sealed class SqlServerFixture : IAsyncLifetime { - private readonly MsSqlContainer _container = new MsSqlBuilder().Build(); + private readonly MsSqlContainer _container; + + public SqlServerFixture() + { + // see: https://blog.rufer.be/2024/09/22/workaround-fix-testcontainers-sql-error-docker-dotnet-dockerapiexception-docker-api-responded-with-status-codeconflict/ + if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + { + _container = new MsSqlBuilder() + .WithImage("mcr.microsoft.com/mssql/server:2022-latest") + .Build(); + } + else + { + _container = new MsSqlBuilder() + .Build(); + } + } public string ConnectionString => _container.GetConnectionString();