From 9dd74a680cb096b93d8ddf1a628f13d461eb155b Mon Sep 17 00:00:00 2001 From: John Lambert Date: Wed, 7 Feb 2024 13:06:42 -0500 Subject: [PATCH] Presigned URL code cleaning script - Make it run once a day --- .../IMachineBuilderExtensions.cs | 36 ++++--- .../Models/ModelDownloadUrl.cs | 11 ++ .../Models/TranslationEngine.cs | 1 + .../Services/HangfireHealthCheck.cs | 12 +-- .../Services/IFileStorage.cs | 2 + .../Services/ISharedFileService.cs | 8 ++ .../Services/ITranslationEngineService.cs | 3 + .../Services/InMemoryStorage.cs | 9 ++ .../Services/LocalStorage.cs | 9 ++ .../Services/ModelCleanupService.cs | 100 ++++++++++++++++++ .../Services/NmtClearMLBuildJobFactory.cs | 31 +++--- .../Services/NmtEngineService.cs | 56 +++++++++- .../Services/NmtTrainBuildJob.cs | 7 ++ .../Services/S3FileStorage.cs | 22 ++++ .../ServalTranslationEngineServiceV1.cs | 26 +++++ .../Services/SharedFileService.cs | 14 +++ .../Services/SmtTransferEngineService.cs | 60 +++++------ src/SIL.Machine.AspNetCore/Usings.cs | 2 + .../Program.cs | 3 + .../Services/ModelCleanupServiceTests.cs | 93 ++++++++++++++++ .../Services/NmtEngineServiceTests.cs | 3 +- tests/SIL.Machine.AspNetCore.Tests/Usings.cs | 1 + 22 files changed, 433 insertions(+), 76 deletions(-) create mode 100644 src/SIL.Machine.AspNetCore/Models/ModelDownloadUrl.cs create mode 100644 src/SIL.Machine.AspNetCore/Services/ModelCleanupService.cs create mode 100644 tests/SIL.Machine.AspNetCore.Tests/Services/ModelCleanupServiceTests.cs diff --git a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs index d1fe54c17..bad6a2113 100644 --- a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs +++ b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs @@ -151,6 +151,21 @@ private static IMachineBuilder AddHangfireBuildJobRunner(this IMachineBuilder bu return builder; } + private static MongoStorageOptions GetMongoStorageOptions() + { + var mongoStorageOptions = new MongoStorageOptions + { + MigrationOptions = new MongoMigrationOptions + { + MigrationStrategy = new MigrateMongoMigrationStrategy(), + BackupStrategy = new CollectionMongoBackupStrategy() + }, + CheckConnection = true, + CheckQueuedJobsStrategy = CheckQueuedJobsStrategy.TailNotificationsCollection, + }; + return mongoStorageOptions; + } + public static IMachineBuilder AddMongoHangfireJobClient( this IMachineBuilder builder, string? connectionString = null @@ -164,19 +179,7 @@ public static IMachineBuilder AddMongoHangfireJobClient( c.SetDataCompatibilityLevel(CompatibilityLevel.Version_170) .UseSimpleAssemblyNameTypeSerializer() .UseRecommendedSerializerSettings() - .UseMongoStorage( - connectionString, - new MongoStorageOptions - { - MigrationOptions = new MongoMigrationOptions - { - MigrationStrategy = new MigrateMongoMigrationStrategy(), - BackupStrategy = new CollectionMongoBackupStrategy() - }, - CheckConnection = true, - CheckQueuedJobsStrategy = CheckQueuedJobsStrategy.TailNotificationsCollection, - } - ) + .UseMongoStorage(connectionString, GetMongoStorageOptions()) .UseFilter(new AutomaticRetryAttribute { Attempts = 0 }) ); builder.Services.AddHealthChecks().AddCheck(name: "Hangfire"); @@ -402,6 +405,13 @@ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder) return builder; } + public static IMachineBuilder AddModelCleanupService(this IMachineBuilder builder) + { + builder.Services.AddSingleton(); + builder.Services.AddHostedService(p => p.GetRequiredService()); + return builder; + } + private static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, BuildJobOptions options) { builder.Services.AddScoped(); diff --git a/src/SIL.Machine.AspNetCore/Models/ModelDownloadUrl.cs b/src/SIL.Machine.AspNetCore/Models/ModelDownloadUrl.cs new file mode 100644 index 000000000..f48d73014 --- /dev/null +++ b/src/SIL.Machine.AspNetCore/Models/ModelDownloadUrl.cs @@ -0,0 +1,11 @@ +using System; + +namespace SIL.Machine.AspNetCore.Models +{ + public class ModelDownloadUrl + { + public string Url { get; set; } = default!; + public int ModelRevision { get; set; } = default!; + public DateTime ExipiresAt { get; set; } = default!; + } +} diff --git a/src/SIL.Machine.AspNetCore/Models/TranslationEngine.cs b/src/SIL.Machine.AspNetCore/Models/TranslationEngine.cs index ffc639fc7..0c982f34a 100644 --- a/src/SIL.Machine.AspNetCore/Models/TranslationEngine.cs +++ b/src/SIL.Machine.AspNetCore/Models/TranslationEngine.cs @@ -7,6 +7,7 @@ public class TranslationEngine : IEntity public string EngineId { get; set; } = default!; public string SourceLanguage { get; set; } = default!; public string TargetLanguage { get; set; } = default!; + public bool IsModelPersisted { get; set; } = false; public int BuildRevision { get; set; } public Build? CurrentBuild { get; set; } } diff --git a/src/SIL.Machine.AspNetCore/Services/HangfireHealthCheck.cs b/src/SIL.Machine.AspNetCore/Services/HangfireHealthCheck.cs index 73bffff36..1d011c578 100644 --- a/src/SIL.Machine.AspNetCore/Services/HangfireHealthCheck.cs +++ b/src/SIL.Machine.AspNetCore/Services/HangfireHealthCheck.cs @@ -1,15 +1,9 @@ namespace SIL.Machine.AspNetCore.Services; -public class HangfireHealthCheck : IHealthCheck +public class HangfireHealthCheck(JobStorage jobStorage, IOptions options) : IHealthCheck { - private readonly JobStorage _jobStorage; - private readonly IOptions _options; - - public HangfireHealthCheck(JobStorage jobStorage, IOptions options) - { - _jobStorage = jobStorage; - _options = options; - } + private readonly JobStorage _jobStorage = jobStorage; + private readonly IOptions _options = options; public Task CheckHealthAsync( HealthCheckContext context, diff --git a/src/SIL.Machine.AspNetCore/Services/IFileStorage.cs b/src/SIL.Machine.AspNetCore/Services/IFileStorage.cs index 89a15ccc9..3417cffae 100644 --- a/src/SIL.Machine.AspNetCore/Services/IFileStorage.cs +++ b/src/SIL.Machine.AspNetCore/Services/IFileStorage.cs @@ -14,5 +14,7 @@ Task> ListFilesAsync( Task OpenWriteAsync(string path, CancellationToken cancellationToken = default); + Task GetDownloadUrlAsync(string path, DateTime expiresAt, CancellationToken cancellationToken = default); + Task DeleteAsync(string path, bool recurse = false, CancellationToken cancellationToken = default); } diff --git a/src/SIL.Machine.AspNetCore/Services/ISharedFileService.cs b/src/SIL.Machine.AspNetCore/Services/ISharedFileService.cs index acbac0687..f082a79c2 100644 --- a/src/SIL.Machine.AspNetCore/Services/ISharedFileService.cs +++ b/src/SIL.Machine.AspNetCore/Services/ISharedFileService.cs @@ -6,6 +6,14 @@ public interface ISharedFileService Uri GetResolvedUri(string path); + Task GetDownloadUrlAsync(string path, DateTime expiresAt); + + Task> ListFilesAsync( + string path, + bool recurse = false, + CancellationToken cancellationToken = default + ); + Task OpenReadAsync(string path, CancellationToken cancellationToken = default); Task OpenWriteAsync(string path, CancellationToken cancellationToken = default); diff --git a/src/SIL.Machine.AspNetCore/Services/ITranslationEngineService.cs b/src/SIL.Machine.AspNetCore/Services/ITranslationEngineService.cs index 2fea94301..33193382f 100644 --- a/src/SIL.Machine.AspNetCore/Services/ITranslationEngineService.cs +++ b/src/SIL.Machine.AspNetCore/Services/ITranslationEngineService.cs @@ -9,6 +9,7 @@ Task CreateAsync( string? engineName, string sourceLanguage, string targetLanguage, + bool isModelPersisted = false, CancellationToken cancellationToken = default ); Task DeleteAsync(string engineId, CancellationToken cancellationToken = default); @@ -40,6 +41,8 @@ Task StartBuildAsync( Task CancelBuildAsync(string engineId, CancellationToken cancellationToken = default); + Task GetModelDownloadUrlAsync(string engineId, CancellationToken cancellationToken = default); + Task GetQueueSizeAsync(CancellationToken cancellationToken = default); bool IsLanguageNativeToModel(string language, out string internalCode); diff --git a/src/SIL.Machine.AspNetCore/Services/InMemoryStorage.cs b/src/SIL.Machine.AspNetCore/Services/InMemoryStorage.cs index 76755f245..e92109a39 100644 --- a/src/SIL.Machine.AspNetCore/Services/InMemoryStorage.cs +++ b/src/SIL.Machine.AspNetCore/Services/InMemoryStorage.cs @@ -96,6 +96,15 @@ public Task> ListFilesAsync( ); } + public Task GetDownloadUrlAsync( + string path, + DateTime expiresAt, + CancellationToken cancellationToken = default + ) + { + throw new NotSupportedException(); + } + public Task OpenReadAsync(string path, CancellationToken cancellationToken = default) { if (!_memoryStreams.TryGetValue(Normalize(path), out Entry? ret)) diff --git a/src/SIL.Machine.AspNetCore/Services/LocalStorage.cs b/src/SIL.Machine.AspNetCore/Services/LocalStorage.cs index 6826869ee..9fc26c097 100644 --- a/src/SIL.Machine.AspNetCore/Services/LocalStorage.cs +++ b/src/SIL.Machine.AspNetCore/Services/LocalStorage.cs @@ -36,6 +36,15 @@ public Task> ListFilesAsync( ); } + public Task GetDownloadUrlAsync( + string path, + DateTime expiresAt, + CancellationToken cancellationToken = default + ) + { + throw new NotSupportedException(); + } + public Task OpenReadAsync(string path, CancellationToken cancellationToken = default) { Uri pathUri = new(_basePath, Normalize(path)); diff --git a/src/SIL.Machine.AspNetCore/Services/ModelCleanupService.cs b/src/SIL.Machine.AspNetCore/Services/ModelCleanupService.cs new file mode 100644 index 000000000..c666624d0 --- /dev/null +++ b/src/SIL.Machine.AspNetCore/Services/ModelCleanupService.cs @@ -0,0 +1,100 @@ +namespace SIL.Machine.AspNetCore.Services; + +public class ModelCleanupService( + IServiceProvider services, + ISharedFileService sharedFileService, + IRepository engines, + ILogger logger +) : RecurrentTask("Model Cleanup Service", services, RefreshPeriod, logger) +{ + private ISharedFileService SharedFileService { get; } = sharedFileService; + private ILogger _logger = logger; + private IRepository _engines = engines; + private List _filesPreviouslyMarkedForDeletion = []; + private readonly List _filesNewlyMarkedForDeletion = []; + private static readonly TimeSpan RefreshPeriod = TimeSpan.FromSeconds(10); + + protected override async Task DoWorkAsync(IServiceScope scope, CancellationToken cancellationToken) + { + await CheckModelsAsync(cancellationToken); + } + + private async Task CheckModelsAsync(CancellationToken cancellationToken) + { + _logger.LogInformation("Running model cleanup job"); + IReadOnlyCollection paths = await SharedFileService.ListFilesAsync( + NmtEngineService.ModelDirectory, + cancellationToken: cancellationToken + ); + // Get all engine ids from the database + Dictionary engineIdsToRevision = _engines + .GetAllAsync(cancellationToken: cancellationToken) + .Result.Select(e => (e.EngineId, e.BuildRevision)) + .ToDictionary(); + + foreach (string path in paths) + { + string filename = Path.GetFileName(path); + string filenameWithoutExtensions = filename.Split(".")[0]; + string extension = filename[^(filename.Length - filenameWithoutExtensions.Length)..]; + if (extension != ".tar.gz") + { + await DeleteFileAsync( + path, + $"filename has to have .tar.gz extension, instead has {extension}", + cancellationToken + ); + continue; + } + string[] parts = filenameWithoutExtensions.Split("_"); + if (parts.Length != 2) + { + await DeleteFileAsync( + path, + $"filename has to have one underscore, instead has {parts.Length - 1}", + cancellationToken + ); + continue; + } + string engineId = parts[0]; + if (!engineIdsToRevision.ContainsKey(engineId)) + { + await DeleteFileAsync(path, $"engine {engineId} does not exist in the database.", cancellationToken); + continue; + } + if (!int.TryParse(parts[1], out int parsedBuildRevision)) + { + await DeleteFileAsync( + path, + $"cannot parse build revision from {parts[1]} for engine {engineId}", + cancellationToken + ); + continue; + } + if (engineIdsToRevision[engineId] > parsedBuildRevision) + await DeleteFileAsync( + path, + $"build revision {parsedBuildRevision} is older than the current build revision {engineIdsToRevision[engineId]}", + cancellationToken + ); + } + // roll over the list of files previously marked for deletion + _filesPreviouslyMarkedForDeletion = new List(_filesNewlyMarkedForDeletion); + _filesNewlyMarkedForDeletion.Clear(); + } + + private async Task DeleteFileAsync(string filename, string message, CancellationToken cancellationToken = default) + { + // If a file has been requested to be deleted twice, delete it. Otherwise, mark it for deletion. + if (_filesPreviouslyMarkedForDeletion.Contains(filename)) + { + _logger.LogInformation("Deleting old model file {filename}: {message}", filename, message); + await SharedFileService.DeleteAsync(filename, cancellationToken); + } + else + { + _logger.LogInformation("Marking old model file {filename} for deletion: {message}", filename, message); + _filesNewlyMarkedForDeletion.Add(filename); + } + } +} diff --git a/src/SIL.Machine.AspNetCore/Services/NmtClearMLBuildJobFactory.cs b/src/SIL.Machine.AspNetCore/Services/NmtClearMLBuildJobFactory.cs index 927310e72..dfc8423ea 100644 --- a/src/SIL.Machine.AspNetCore/Services/NmtClearMLBuildJobFactory.cs +++ b/src/SIL.Machine.AspNetCore/Services/NmtClearMLBuildJobFactory.cs @@ -1,24 +1,16 @@ namespace SIL.Machine.AspNetCore.Services; -public class NmtClearMLBuildJobFactory : IClearMLBuildJobFactory +public class NmtClearMLBuildJobFactory( + ISharedFileService sharedFileService, + ILanguageTagService languageTagService, + IRepository engines, + IOptionsMonitor options +) : IClearMLBuildJobFactory { - private readonly ISharedFileService _sharedFileService; - private readonly ILanguageTagService _languageTagService; - private readonly IRepository _engines; - private readonly IOptionsMonitor _options; - - public NmtClearMLBuildJobFactory( - ISharedFileService sharedFileService, - ILanguageTagService languageTagService, - IRepository engines, - IOptionsMonitor options - ) - { - _sharedFileService = sharedFileService; - _languageTagService = languageTagService; - _engines = engines; - _options = options; - } + private readonly ISharedFileService _sharedFileService = sharedFileService; + private readonly ILanguageTagService _languageTagService = languageTagService; + private readonly IRepository _engines = engines; + private readonly IOptionsMonitor _options = options; public TranslationEngineType EngineType => TranslationEngineType.Nmt; @@ -52,6 +44,9 @@ public async Task CreateJobScriptAsync( + $" 'shared_file_uri': '{baseUri}',\n" + $" 'shared_file_folder': '{folder}',\n" + (buildOptions is not null ? $" 'build_options': '''{buildOptions}''',\n" : "") + // buildRevision + 1 because the build revision is incremented after the build job + // is finished successfully but the file should be saved with the new revision number + + (engine.IsModelPersisted ? $" 'save_model': '{engineId}_{engine.BuildRevision + 1}',\n" : $"") + $" 'clearml': True\n" + "}\n" + "run(args)\n"; diff --git a/src/SIL.Machine.AspNetCore/Services/NmtEngineService.cs b/src/SIL.Machine.AspNetCore/Services/NmtEngineService.cs index 8ca475e74..e3e2a7507 100644 --- a/src/SIL.Machine.AspNetCore/Services/NmtEngineService.cs +++ b/src/SIL.Machine.AspNetCore/Services/NmtEngineService.cs @@ -14,7 +14,8 @@ public class NmtEngineService( IRepository engines, IBuildJobService buildJobService, ILanguageTagService languageTagService, - ClearMLMonitorService clearMLMonitorService + ClearMLMonitorService clearMLMonitorService, + ISharedFileService sharedFileService ) : ITranslationEngineService { private readonly IDistributedReaderWriterLockFactory _lockFactory = lockFactory; @@ -22,16 +23,22 @@ ClearMLMonitorService clearMLMonitorService private readonly IDataAccessContext _dataAccessContext = dataAccessContext; private readonly IRepository _engines = engines; private readonly IBuildJobService _buildJobService = buildJobService; - private readonly ILanguageTagService _languageTagService = languageTagService; private readonly ClearMLMonitorService _clearMLMonitorService = clearMLMonitorService; + private readonly ILanguageTagService _languageTagService = languageTagService; + private readonly ISharedFileService _sharedFileService = sharedFileService; + + public const string ModelDirectory = "models/"; public TranslationEngineType Type => TranslationEngineType.Nmt; + private const int MinutesToExpire = 60; + public async Task CreateAsync( string engineId, string? engineName, string sourceLanguage, string targetLanguage, + bool isModelPersisted = false, CancellationToken cancellationToken = default ) { @@ -41,12 +48,13 @@ await _engines.InsertAsync( { EngineId = engineId, SourceLanguage = sourceLanguage, - TargetLanguage = targetLanguage + TargetLanguage = targetLanguage, + IsModelPersisted = isModelPersisted }, cancellationToken ); await _buildJobService.CreateEngineAsync( - new[] { BuildJobType.Cpu, BuildJobType.Gpu }, + [BuildJobType.Cpu, BuildJobType.Gpu], engineId, engineName, cancellationToken @@ -109,6 +117,38 @@ public async Task CancelBuildAsync(string engineId, CancellationToken cancellati } } + public async Task GetModelDownloadUrlAsync( + string engineId, + CancellationToken cancellationToken = default + ) + { + TranslationEngine engine = await GetEngineAsync(engineId, cancellationToken); + if (!engine.IsModelPersisted) + throw new NotSupportedException( + "The model cannot be downloaded. " + + "To enable downloading the model, recreate the engine with IsModelPersisted property to true." + ); + if (engine.BuildRevision == 0) + throw new InvalidOperationException("The engine has not been built yet."); + string filename = $"{engineId}_{engine.BuildRevision}.tar.gz"; + bool fileExists = await _sharedFileService.ExistsAsync( + NmtEngineService.ModelDirectory + filename, + cancellationToken + ); + if (!fileExists) + throw new FileNotFoundException( + $"The model should exist to be downloaded but is not there for BuildRevision {engine.BuildRevision}." + ); + var expiresAt = DateTime.UtcNow.AddMinutes(MinutesToExpire); + var modelInfo = new ModelDownloadUrl + { + Url = await _sharedFileService.GetDownloadUrlAsync(NmtEngineService.ModelDirectory + filename, expiresAt), + ModelRevision = engine.BuildRevision, + ExipiresAt = expiresAt + }; + return modelInfo; + } + public Task> TranslateAsync( string engineId, int n, @@ -159,4 +199,12 @@ private async Task CancelBuildJobAsync(string engineId, CancellationToken await _platformService.BuildCanceledAsync(buildId, CancellationToken.None); return buildId is not null; } + + private async Task GetEngineAsync(string engineId, CancellationToken cancellationToken) + { + TranslationEngine? engine = await _engines.GetAsync(e => e.EngineId == engineId, cancellationToken); + if (engine is null) + throw new InvalidOperationException($"The engine {engineId} does not exist."); + return engine; + } } diff --git a/src/SIL.Machine.AspNetCore/Services/NmtTrainBuildJob.cs b/src/SIL.Machine.AspNetCore/Services/NmtTrainBuildJob.cs index ad6d60612..8ed1020d0 100644 --- a/src/SIL.Machine.AspNetCore/Services/NmtTrainBuildJob.cs +++ b/src/SIL.Machine.AspNetCore/Services/NmtTrainBuildJob.cs @@ -68,6 +68,13 @@ await PipInstallModuleAsync( + $" 'trg_lang': '{ConvertLanguageTag(engine.TargetLanguage)}',\n" + $" 'shared_file_uri': '{_sharedFileService.GetBaseUri()}',\n" + (buildOptions is not null ? $" 'build_options': '''{buildOptions}''',\n" : "") + // buildRevision + 1 because the build revision is incremented after the build job + // is finished successfully but the file should be saved with the new revision number + + ( + engine.IsModelPersisted + ? $" 'save_model': '{engine.Id}_{engine.BuildRevision + 1}',\n" + : "" + ) + $" 'clearml': False\n" + "}\n" + "run(args)\n" diff --git a/src/SIL.Machine.AspNetCore/Services/S3FileStorage.cs b/src/SIL.Machine.AspNetCore/Services/S3FileStorage.cs index 3df6c67c1..176831406 100644 --- a/src/SIL.Machine.AspNetCore/Services/S3FileStorage.cs +++ b/src/SIL.Machine.AspNetCore/Services/S3FileStorage.cs @@ -65,6 +65,28 @@ public async Task> ListFilesAsync( return response.S3Objects.Select(s3Obj => s3Obj.Key[_basePath.Length..]).ToList(); } + public Task GetDownloadUrlAsync( + string path, + DateTime expiresAt, + CancellationToken cancellationToken = default + ) + { + return Task.FromResult( + _client.GetPreSignedURL( + new GetPreSignedUrlRequest + { + BucketName = _bucketName, + Key = _basePath + Normalize(path), + Expires = expiresAt, + ResponseHeaderOverrides = new ResponseHeaderOverrides + { + ContentDisposition = new ContentDisposition() { FileName = Path.GetFileName(path) }.ToString() + } + } + ) + ); + } + public async Task OpenReadAsync(string path, CancellationToken cancellationToken = default) { GetObjectRequest request = new() { BucketName = _bucketName, Key = _basePath + Normalize(path) }; diff --git a/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs b/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs index 0ea6542a6..b02db1dc8 100644 --- a/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs +++ b/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs @@ -23,6 +23,7 @@ await engineService.CreateAsync( request.HasEngineName ? request.EngineName : null, request.SourceLanguage, request.TargetLanguage, + request.IsModelPersisted, context.CancellationToken ); return Empty; @@ -126,6 +127,31 @@ public override async Task CancelBuild(CancelBuildRequest request, Server return Empty; } + public override async Task GetModelDownloadUrl( + GetModelDownloadUrlRequest request, + ServerCallContext context + ) + { + try + { + ITranslationEngineService engineService = GetEngineService(request.EngineType); + ModelDownloadUrl modelDownloadUrl = await engineService.GetModelDownloadUrlAsync( + request.EngineId, + context.CancellationToken + ); + return new GetModelDownloadUrlResponse + { + Url = modelDownloadUrl.Url, + ModelRevision = modelDownloadUrl.ModelRevision, + ExpiresAt = modelDownloadUrl.ExipiresAt.ToTimestamp() + }; + } + catch (InvalidOperationException e) + { + throw new RpcException(new Status(StatusCode.Aborted, e.Message)); + } + } + public override async Task GetQueueSize( GetQueueSizeRequest request, ServerCallContext context diff --git a/src/SIL.Machine.AspNetCore/Services/SharedFileService.cs b/src/SIL.Machine.AspNetCore/Services/SharedFileService.cs index db349dbaa..b4244211e 100644 --- a/src/SIL.Machine.AspNetCore/Services/SharedFileService.cs +++ b/src/SIL.Machine.AspNetCore/Services/SharedFileService.cs @@ -55,6 +55,20 @@ public Uri GetResolvedUri(string path) return new Uri(_baseUri, path); } + public async Task GetDownloadUrlAsync(string path, DateTime expiresAt) + { + return await _fileStorage.GetDownloadUrlAsync(path, expiresAt); + } + + public Task> ListFilesAsync( + string path, + bool recurse = false, + CancellationToken cancellationToken = default + ) + { + return _fileStorage.ListFilesAsync(path, recurse, cancellationToken); + } + public Task OpenReadAsync(string path, CancellationToken cancellationToken = default) { return _fileStorage.OpenReadAsync(path, cancellationToken); diff --git a/src/SIL.Machine.AspNetCore/Services/SmtTransferEngineService.cs b/src/SIL.Machine.AspNetCore/Services/SmtTransferEngineService.cs index d7d03d9ff..db7c0434e 100644 --- a/src/SIL.Machine.AspNetCore/Services/SmtTransferEngineService.cs +++ b/src/SIL.Machine.AspNetCore/Services/SmtTransferEngineService.cs @@ -5,37 +5,25 @@ public static class SmtTransferBuildStages public const string Train = "train"; } -public class SmtTransferEngineService : ITranslationEngineService +public class SmtTransferEngineService( + IDistributedReaderWriterLockFactory lockFactory, + IPlatformService platformService, + IDataAccessContext dataAccessContext, + IRepository engines, + IRepository trainSegmentPairs, + SmtTransferEngineStateService stateService, + IBuildJobService buildJobService, + JobStorage jobStorage +) : ITranslationEngineService { - private readonly IDistributedReaderWriterLockFactory _lockFactory; - private readonly IPlatformService _platformService; - private readonly IDataAccessContext _dataAccessContext; - private readonly IRepository _engines; - private readonly IRepository _trainSegmentPairs; - private readonly SmtTransferEngineStateService _stateService; - private readonly IBuildJobService _buildJobService; - private readonly JobStorage _jobStorage; - - public SmtTransferEngineService( - IDistributedReaderWriterLockFactory lockFactory, - IPlatformService platformService, - IDataAccessContext dataAccessContext, - IRepository engines, - IRepository trainSegmentPairs, - SmtTransferEngineStateService stateService, - IBuildJobService buildJobService, - JobStorage jobStorage - ) - { - _lockFactory = lockFactory; - _platformService = platformService; - _dataAccessContext = dataAccessContext; - _engines = engines; - _trainSegmentPairs = trainSegmentPairs; - _stateService = stateService; - _buildJobService = buildJobService; - _jobStorage = jobStorage; - } + private readonly IDistributedReaderWriterLockFactory _lockFactory = lockFactory; + private readonly IPlatformService _platformService = platformService; + private readonly IDataAccessContext _dataAccessContext = dataAccessContext; + private readonly IRepository _engines = engines; + private readonly IRepository _trainSegmentPairs = trainSegmentPairs; + private readonly SmtTransferEngineStateService _stateService = stateService; + private readonly IBuildJobService _buildJobService = buildJobService; + private readonly JobStorage _jobStorage = jobStorage; public TranslationEngineType Type => TranslationEngineType.SmtTransfer; @@ -44,6 +32,7 @@ public async Task CreateAsync( string? engineName, string sourceLanguage, string targetLanguage, + bool isModelPersisted = true, CancellationToken cancellationToken = default ) { @@ -53,7 +42,8 @@ await _engines.InsertAsync( { EngineId = engineId, SourceLanguage = sourceLanguage, - TargetLanguage = targetLanguage + TargetLanguage = targetLanguage, + IsModelPersisted = true // SMT transfer engines are always persisted }, cancellationToken ); @@ -227,6 +217,14 @@ private async Task CancelBuildJobAsync(string engineId, CancellationToken return buildId is not null; } + public Task GetModelDownloadUrlAsync( + string engineId, + CancellationToken cancellationToken = default + ) + { + throw new NotSupportedException(); + } + private async Task GetEngineAsync(string engineId, CancellationToken cancellationToken) { TranslationEngine? engine = await _engines.GetAsync(e => e.EngineId == engineId, cancellationToken); diff --git a/src/SIL.Machine.AspNetCore/Usings.cs b/src/SIL.Machine.AspNetCore/Usings.cs index bd130ee4a..6757c8c25 100644 --- a/src/SIL.Machine.AspNetCore/Usings.cs +++ b/src/SIL.Machine.AspNetCore/Usings.cs @@ -1,9 +1,11 @@ global using System.Collections.Concurrent; global using System.Diagnostics; global using System.Diagnostics.CodeAnalysis; +global using System.Globalization; global using System.IO.Compression; global using System.Linq.Expressions; global using System.Net; +global using System.Net.Mime; global using System.Reflection; global using System.Runtime.CompilerServices; global using System.Security.Cryptography; diff --git a/src/SIL.Machine.Serval.EngineServer/Program.cs b/src/SIL.Machine.Serval.EngineServer/Program.cs index 863598c86..5140b15a3 100644 --- a/src/SIL.Machine.Serval.EngineServer/Program.cs +++ b/src/SIL.Machine.Serval.EngineServer/Program.cs @@ -1,5 +1,6 @@ using Hangfire; using OpenTelemetry.Trace; +using SIL.Machine.AspNetCore.Services; var builder = WebApplication.CreateBuilder(args); @@ -10,7 +11,9 @@ .AddMongoHangfireJobClient() .AddServalTranslationEngineService() .AddBuildJobService() + .AddModelCleanupService() .AddClearMLService(); + if (builder.Environment.IsDevelopment()) builder .Services.AddOpenTelemetry() diff --git a/tests/SIL.Machine.AspNetCore.Tests/Services/ModelCleanupServiceTests.cs b/tests/SIL.Machine.AspNetCore.Tests/Services/ModelCleanupServiceTests.cs new file mode 100644 index 000000000..6bee1df38 --- /dev/null +++ b/tests/SIL.Machine.AspNetCore.Tests/Services/ModelCleanupServiceTests.cs @@ -0,0 +1,93 @@ +namespace SIL.Machine.AspNetCore.Services; + +[TestFixture] +public class ModelCleanupServiceTests +{ + private readonly ISharedFileService _sharedFileService = new SharedFileService(Substitute.For()); + private readonly MemoryRepository _engines = new MemoryRepository(); + private static readonly List validFiles = ["models/engineId1_1.tar.gz", "models/engineId2_2.tar.gz"]; + private static readonly List invalidFiles = + [ + "models/engineId2_1.targ.gz", // old build number + "models/worngId_1.tar.gz", + "models/engineId1_badbuildnumber.tar.gz", + "models/noBuildNumber.tar.gz", + "models/engineId1_1.differentExtension" + ]; + + private async Task SetUpAsync() + { + _engines.Add( + new TranslationEngine + { + Id = "engine1", + EngineId = "engineId1", + SourceLanguage = "es", + TargetLanguage = "en", + BuildRevision = 1, + IsModelPersisted = true + } + ); + _engines.Add( + new TranslationEngine + { + Id = "engine2", + EngineId = "engineId2", + SourceLanguage = "es", + TargetLanguage = "en", + BuildRevision = 2, + IsModelPersisted = true + } + ); + async Task WriteFileStub(string path, string content) + { + using StreamWriter streamWriter = + new(await _sharedFileService.OpenWriteAsync(path, CancellationToken.None)); + await streamWriter.WriteAsync(content); + } + foreach (string path in validFiles) + { + await WriteFileStub(path, "content"); + } + foreach (string path in invalidFiles) + { + await WriteFileStub(path, "content"); + } + } + + public class TestModelCleanupService( + IServiceProvider serviceProvider, + ISharedFileService sharedFileService, + IRepository engines, + ILogger logger + ) : ModelCleanupService(serviceProvider, sharedFileService, engines, logger) + { + public async Task DoWorkAsync() => + await base.DoWorkAsync(Substitute.For(), CancellationToken.None); + } + + [Test] + public async Task DoWorkAsync_ValidFiles() + { + await SetUpAsync(); + + var cleanupJob = new TestModelCleanupService( + Substitute.For(), + _sharedFileService, + _engines, + Substitute.For>() + ); + await cleanupJob.DoWorkAsync(); + // both valid and invalid files still exist after running once + Assert.That( + _sharedFileService.ListFilesAsync("models").Result.ToHashSet(), + Is.EquivalentTo(validFiles.Concat(invalidFiles).ToHashSet()) + ); + await cleanupJob.DoWorkAsync(); + // only valid files exist after running twice + Assert.That( + _sharedFileService.ListFilesAsync("models").Result.ToHashSet(), + Is.EquivalentTo(validFiles.ToHashSet()) + ); + } +} diff --git a/tests/SIL.Machine.AspNetCore.Tests/Services/NmtEngineServiceTests.cs b/tests/SIL.Machine.AspNetCore.Tests/Services/NmtEngineServiceTests.cs index 0794a914e..b1c861527 100644 --- a/tests/SIL.Machine.AspNetCore.Tests/Services/NmtEngineServiceTests.cs +++ b/tests/SIL.Machine.AspNetCore.Tests/Services/NmtEngineServiceTests.cs @@ -204,7 +204,8 @@ private NmtEngineService CreateService() Engines, BuildJobService, new LanguageTagService(), - ClearMLMonitorService + ClearMLMonitorService, + SharedFileService ); } diff --git a/tests/SIL.Machine.AspNetCore.Tests/Usings.cs b/tests/SIL.Machine.AspNetCore.Tests/Usings.cs index 222a7a747..4aafc901d 100644 --- a/tests/SIL.Machine.AspNetCore.Tests/Usings.cs +++ b/tests/SIL.Machine.AspNetCore.Tests/Usings.cs @@ -3,6 +3,7 @@ global using System.Text.Json.Nodes; global using Hangfire; global using Hangfire.Storage; +global using Microsoft.Extensions.DependencyInjection; global using Microsoft.Extensions.Hosting; global using Microsoft.Extensions.Hosting.Internal; global using Microsoft.Extensions.Logging;