From 2f3f0529cdf2ec4b01c40ad933e5a5bf31e7dfce Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Fri, 10 Nov 2023 12:40:42 -0500 Subject: [PATCH 1/2] Fixes #133 --- .../Configuration/IMachineBuilderExtensions.cs | 2 +- .../Services/ClearMLHealthCheck.cs | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs index b676afb68..d6969e479 100644 --- a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs +++ b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs @@ -121,7 +121,7 @@ public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, st builder.Services .AddHttpClient("ClearML-NoRetry") .ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString)); - + builder.Services.AddSingleton(); builder.Services.AddHealthChecks().AddCheck("ClearML Health Check"); return builder; diff --git a/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs b/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs index 649efca04..2a9a63a8c 100644 --- a/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs +++ b/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs @@ -3,6 +3,8 @@ public class ClearMLHealthCheck : IHealthCheck private readonly HttpClient _httpClient; private readonly IOptionsMonitor _options; private readonly IClearMLAuthenticationService _clearMLAuthenticationService; + private int _numConsecutiveFailures; + private readonly AsyncLock _lock; public ClearMLHealthCheck( IClearMLAuthenticationService clearMLAuthenticationService, @@ -13,6 +15,8 @@ IOptionsMonitor options _httpClient = httpClientFactory.CreateClient("ClearML-NoRetry"); _options = options; _clearMLAuthenticationService = clearMLAuthenticationService; + _numConsecutiveFailures = 0; + _lock = new AsyncLock(); } public async Task CheckHealthAsync( @@ -28,11 +32,15 @@ public async Task CheckHealthAsync( return HealthCheckResult.Unhealthy( $"No ClearML agents are available for configured queue \"{_options.CurrentValue.Queue}\"" ); + _numConsecutiveFailures = 0; return HealthCheckResult.Healthy("ClearML is available"); } catch (Exception e) { - return HealthCheckResult.Unhealthy(exception: e); + _numConsecutiveFailures++; + return _numConsecutiveFailures > 3 + ? HealthCheckResult.Unhealthy(exception: e) + : HealthCheckResult.Degraded(exception: e); } } From 99891205b6bbd829a793302ec7554b9e1ef4bf98 Mon Sep 17 00:00:00 2001 From: Enkidu93 Date: Fri, 10 Nov 2023 13:20:23 -0500 Subject: [PATCH 2/2] Use locks (also fix locking in S3) --- .../Services/ClearMLHealthCheck.cs | 14 +++++++++----- .../Services/S3HealthCheck.cs | 3 ++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs b/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs index 2a9a63a8c..d7458214d 100644 --- a/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs +++ b/src/SIL.Machine.AspNetCore/Services/ClearMLHealthCheck.cs @@ -32,15 +32,19 @@ public async Task CheckHealthAsync( return HealthCheckResult.Unhealthy( $"No ClearML agents are available for configured queue \"{_options.CurrentValue.Queue}\"" ); - _numConsecutiveFailures = 0; + using (await _lock.LockAsync()) + _numConsecutiveFailures = 0; return HealthCheckResult.Healthy("ClearML is available"); } catch (Exception e) { - _numConsecutiveFailures++; - return _numConsecutiveFailures > 3 - ? HealthCheckResult.Unhealthy(exception: e) - : HealthCheckResult.Degraded(exception: e); + using (await _lock.LockAsync()) + { + _numConsecutiveFailures++; + return _numConsecutiveFailures > 3 + ? HealthCheckResult.Unhealthy(exception: e) + : HealthCheckResult.Degraded(exception: e); + } } } diff --git a/src/SIL.Machine.AspNetCore/Services/S3HealthCheck.cs b/src/SIL.Machine.AspNetCore/Services/S3HealthCheck.cs index aa30bf047..8275c61a4 100644 --- a/src/SIL.Machine.AspNetCore/Services/S3HealthCheck.cs +++ b/src/SIL.Machine.AspNetCore/Services/S3HealthCheck.cs @@ -37,7 +37,8 @@ public async Task CheckHealthAsync( } ) ).ListObjectsV2Async(request, cancellationToken); - _numConsecutiveFailures = 0; + using (await _lock.LockAsync()) + _numConsecutiveFailures = 0; return HealthCheckResult.Healthy("The S3 bucket is available"); } catch (Exception e)