diff --git a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs index b676afb68..d6969e479 100644 --- a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs +++ b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs @@ -121,7 +121,7 @@ public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, st builder.Services .AddHttpClient("ClearML-NoRetry") .ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString)); - + builder.Services.AddSingleton(); builder.Services.AddHealthChecks().AddCheck("ClearML Health Check"); return builder; diff --git a/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs b/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs index 649efca04..2a9a63a8c 100644 --- a/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs +++ b/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs @@ -3,6 +3,8 @@ public class ClearMLHealthCheck : IHealthCheck private readonly HttpClient _httpClient; private readonly IOptionsMonitor _options; private readonly IClearMLAuthenticationService _clearMLAuthenticationService; + private int _numConsecutiveFailures; + private readonly AsyncLock _lock; public ClearMLHealthCheck( IClearMLAuthenticationService clearMLAuthenticationService, @@ -13,6 +15,8 @@ IOptionsMonitor options _httpClient = httpClientFactory.CreateClient("ClearML-NoRetry"); _options = options; _clearMLAuthenticationService = clearMLAuthenticationService; + _numConsecutiveFailures = 0; + _lock = new AsyncLock(); } public async Task CheckHealthAsync( @@ -28,11 +32,15 @@ public async Task CheckHealthAsync( return HealthCheckResult.Unhealthy( $"No ClearML agents are available for configured queue \"{_options.CurrentValue.Queue}\"" ); + _numConsecutiveFailures = 0; return HealthCheckResult.Healthy("ClearML is available"); } catch (Exception e) { - return HealthCheckResult.Unhealthy(exception: e); + _numConsecutiveFailures++; + return _numConsecutiveFailures > 3 + ? HealthCheckResult.Unhealthy(exception: e) + : HealthCheckResult.Degraded(exception: e); } }