Skip to content

Commit

Permalink
updates from reviewer feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Feb 7, 2024
1 parent 8d6702d commit 55ebe3e
Show file tree
Hide file tree
Showing 17 changed files with 150 additions and 125 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,8 @@ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder)

public static IMachineBuilder AddModelCleanupService(this IMachineBuilder builder)
{
builder.Services.AddSingleton<CleanupOldModelsService>();
builder.Services.AddHostedService(p => p.GetRequiredService<CleanupOldModelsService>());
builder.Services.AddSingleton<ModelCleanupService>();
builder.Services.AddHostedService(p => p.GetRequiredService<ModelCleanupService>());
return builder;
}

Expand Down
79 changes: 0 additions & 79 deletions src/SIL.Machine.AspNetCore/Services/CleanupOldModelsService.cs

This file was deleted.

2 changes: 1 addition & 1 deletion src/SIL.Machine.AspNetCore/Services/IFileStorage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Task<IReadOnlyCollection<string>> ListFilesAsync(

Task<Stream> OpenWriteAsync(string path, CancellationToken cancellationToken = default);

Task<string> GetPresignedUrlAsync(string path, int minutesToExpire, CancellationToken cancellationToken = default);
Task<string> GetDownloadUrlAsync(string path, DateTime expiresAt, CancellationToken cancellationToken = default);

Task DeleteAsync(string path, bool recurse = false, CancellationToken cancellationToken = default);
}
4 changes: 1 addition & 3 deletions src/SIL.Machine.AspNetCore/Services/ISharedFileService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@

public interface ISharedFileService
{
public const string ModelDirectory = "models/";

Uri GetBaseUri();

Uri GetResolvedUri(string path);

Task<Uri> GetPresignedUrlAsync(string path, int minutesToExpire);
Task<string> GetDownloadUrlAsync(string path, DateTime expiresAt);

Task<IReadOnlyCollection<string>> ListFilesAsync(
string path,
Expand Down
6 changes: 3 additions & 3 deletions src/SIL.Machine.AspNetCore/Services/InMemoryStorage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -96,13 +96,13 @@ public Task<IReadOnlyCollection<string>> ListFilesAsync(
);
}

public Task<string> GetPresignedUrlAsync(
public Task<string> GetDownloadUrlAsync(
string path,
int minutesToExpire,
DateTime expiresAt,
CancellationToken cancellationToken = default
)
{
return Task.FromResult(path);
throw new NotSupportedException();
}

public Task<Stream> OpenReadAsync(string path, CancellationToken cancellationToken = default)
Expand Down
6 changes: 3 additions & 3 deletions src/SIL.Machine.AspNetCore/Services/LocalStorage.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ public Task<IReadOnlyCollection<string>> ListFilesAsync(
);
}

public Task<string> GetPresignedUrlAsync(
public Task<string> GetDownloadUrlAsync(
string path,
int minutesToExpire,
DateTime expiresAt,
CancellationToken cancellationToken = default
)
{
return Task.FromResult(path);
throw new NotSupportedException();
}

public Task<Stream> OpenReadAsync(string path, CancellationToken cancellationToken = default)
Expand Down
100 changes: 100 additions & 0 deletions src/SIL.Machine.AspNetCore/Services/ModelCleanupService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
namespace SIL.Machine.AspNetCore.Services;

public class ModelCleanupService(
IServiceProvider services,
ISharedFileService sharedFileService,
IRepository<TranslationEngine> engines,
ILogger<ModelCleanupService> logger
) : RecurrentTask("Model Cleanup Service", services, RefreshPeriod, logger)
{
private ISharedFileService SharedFileService { get; } = sharedFileService;
private ILogger<ModelCleanupService> _logger = logger;
private IRepository<TranslationEngine> _engines = engines;
private List<string> _filesPreviouslyMarkedForDeletion = [];
private readonly List<string> _filesNewlyMarkedForDeletion = [];
private static readonly TimeSpan RefreshPeriod = TimeSpan.FromSeconds(10);

protected override async Task DoWorkAsync(IServiceScope scope, CancellationToken cancellationToken)
{
await CheckModelsAsync(cancellationToken);
}

private async Task CheckModelsAsync(CancellationToken cancellationToken)
{
_logger.LogInformation("Running model cleanup job");
IReadOnlyCollection<string> paths = await SharedFileService.ListFilesAsync(
NmtEngineService.ModelDirectory,
cancellationToken: cancellationToken
);
// Get all engine ids from the database
Dictionary<string, int> engineIdsToRevision = _engines
.GetAllAsync(cancellationToken: cancellationToken)
.Result.Select(e => (e.EngineId, e.BuildRevision))
.ToDictionary<string, int>();

foreach (string path in paths)
{
string filename = Path.GetFileName(path);
string filenameWithoutExtensions = filename.Split(".")[0];
string extension = filename[^(filename.Length - filenameWithoutExtensions.Length)..];
if (extension != ".tar.gz")
{
await DeleteFileAsync(
path,
$"filename has to have .tar.gz extension, instead has {extension}",
cancellationToken
);
continue;
}
string[] parts = filenameWithoutExtensions.Split("_");
if (parts.Length != 2)
{
await DeleteFileAsync(
path,
$"filename has to have one underscore, instead has {parts.Length - 1}",
cancellationToken
);
continue;
}
string engineId = parts[0];
if (!engineIdsToRevision.ContainsKey(engineId))
{
await DeleteFileAsync(path, $"engine {engineId} does not exist in the database.", cancellationToken);
continue;
}
if (!int.TryParse(parts[1], out int parsedBuildRevision))
{
await DeleteFileAsync(
path,
$"cannot parse build revision from {parts[1]} for engine {engineId}",
cancellationToken
);
continue;
}
if (engineIdsToRevision[engineId] > parsedBuildRevision)
await DeleteFileAsync(
path,
$"build revision {parsedBuildRevision} is older than the current build revision {engineIdsToRevision[engineId]}",
cancellationToken
);
}
// roll over the list of files previously marked for deletion
_filesPreviouslyMarkedForDeletion = new List<string>(_filesNewlyMarkedForDeletion);
_filesNewlyMarkedForDeletion.Clear();
}

private async Task DeleteFileAsync(string filename, string message, CancellationToken cancellationToken = default)
{
// If a file has been requested to be deleted twice, delete it. Otherwise, mark it for deletion.
if (_filesPreviouslyMarkedForDeletion.Contains(filename))
{
_logger.LogInformation("Deleting old model file {filename}: {message}", filename, message);
await SharedFileService.DeleteAsync(filename, cancellationToken);
}
else
{
_logger.LogInformation("Marking old model file {filename} for deletion: {message}", filename, message);
_filesNewlyMarkedForDeletion.Add(filename);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ public async Task<string> CreateJobScriptAsync(
+ $" 'shared_file_uri': '{baseUri}',\n"
+ $" 'shared_file_folder': '{folder}',\n"
+ (buildOptions is not null ? $" 'build_options': '''{buildOptions}''',\n" : "")
+ (engine.IsModelPersisted ? $" 'save_model': '{engineId}_{engine.BuildRevision + 1}',\n" : "")
// buildRevision + 1 because the build revision is incremented after the build job
// is finished successfully but the file should be saved with the new revision number
+ (engine.IsModelPersisted ? $" 'save_model': '{engineId}_{engine.BuildRevision + 1}',\n" : $"")
+ $" 'clearml': True\n"
+ "}\n"
+ "run(args)\n";
Expand Down
16 changes: 7 additions & 9 deletions src/SIL.Machine.AspNetCore/Services/NmtEngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ ISharedFileService sharedFileService
private readonly ILanguageTagService _languageTagService = languageTagService;
private readonly ISharedFileService _sharedFileService = sharedFileService;

public const string ModelDirectory = "models/";

public TranslationEngineType Type => TranslationEngineType.Nmt;

private const int MinutesToExpire = 60;
Expand Down Expand Up @@ -121,31 +123,27 @@ public async Task<ModelDownloadUrl> GetModelDownloadUrlAsync(
{
TranslationEngine engine = await GetEngineAsync(engineId, cancellationToken);
if (!engine.IsModelPersisted)
throw new InvalidOperationException(
throw new NotSupportedException(
"The model cannot be downloaded. "
+ "To enable downloading the model, recreate the engine with IsModelPersisted property to true."
);
if (engine.BuildRevision == 0)
throw new InvalidOperationException("The engine has not been built yet.");
string filename = $"{engineId}_{engine.BuildRevision}.tar.gz";
bool fileExists = await _sharedFileService.ExistsAsync(
ISharedFileService.ModelDirectory + filename,
NmtEngineService.ModelDirectory + filename,
cancellationToken
);
if (!fileExists)
throw new FileNotFoundException(
$"The model should exist to be downloaded but is not there for BuildRevision {engine.BuildRevision}."
);
var expiresAt = DateTime.UtcNow.AddMinutes(MinutesToExpire);
var modelInfo = new ModelDownloadUrl
{
Url = (
await _sharedFileService.GetPresignedUrlAsync(
ISharedFileService.ModelDirectory + filename,
MinutesToExpire
)
).ToString(),
Url = await _sharedFileService.GetDownloadUrlAsync(NmtEngineService.ModelDirectory + filename, expiresAt),
ModelRevision = engine.BuildRevision,
ExipiresAt = DateTime.UtcNow.AddMinutes(MinutesToExpire)
ExipiresAt = expiresAt
};
return modelInfo;
}
Expand Down
2 changes: 2 additions & 0 deletions src/SIL.Machine.AspNetCore/Services/NmtTrainBuildJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ await PipInstallModuleAsync(
+ $" 'trg_lang': '{ConvertLanguageTag(engine.TargetLanguage)}',\n"
+ $" 'shared_file_uri': '{_sharedFileService.GetBaseUri()}',\n"
+ (buildOptions is not null ? $" 'build_options': '''{buildOptions}''',\n" : "")
// buildRevision + 1 because the build revision is incremented after the build job
// is finished successfully but the file should be saved with the new revision number
+ (
engine.IsModelPersisted
? $" 'save_model': '{engine.Id}_{engine.BuildRevision + 1}',\n"
Expand Down
8 changes: 5 additions & 3 deletions src/SIL.Machine.AspNetCore/Services/S3FileStorage.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using static SIL.Machine.AspNetCore.Utils.SharedFileUtils;

namespace SIL.Machine.AspNetCore.Services;

public class S3FileStorage : DisposableBase, IFileStorage
Expand Down Expand Up @@ -63,9 +65,9 @@ public async Task<IReadOnlyCollection<string>> ListFilesAsync(
return response.S3Objects.Select(s3Obj => s3Obj.Key[_basePath.Length..]).ToList();
}

public Task<string> GetPresignedUrlAsync(
public Task<string> GetDownloadUrlAsync(
string path,
int minutesToExpire,
DateTime expiresAt,
CancellationToken cancellationToken = default
)
{
Expand All @@ -75,7 +77,7 @@ public Task<string> GetPresignedUrlAsync(
{
BucketName = _bucketName,
Key = _basePath + Normalize(path),
Expires = DateTime.UtcNow.AddMinutes(minutesToExpire),
Expires = expiresAt,
ResponseHeaderOverrides = new ResponseHeaderOverrides
{
ContentDisposition = new ContentDisposition() { FileName = Path.GetFileName(path) }.ToString()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,6 @@ ServerCallContext context
{
throw new RpcException(new Status(StatusCode.Aborted, e.Message));
}
catch (FileNotFoundException e)
{
throw new RpcException(new Status(StatusCode.Aborted, e.Message));
}
}

public override async Task<GetQueueSizeResponse> GetQueueSize(
Expand Down
9 changes: 2 additions & 7 deletions src/SIL.Machine.AspNetCore/Services/SharedFileService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,9 @@ public Uri GetResolvedUri(string path)
return new Uri(_baseUri, path);
}

public async Task<Uri> GetPresignedUrlAsync(string path, int minutesToExpire)
public async Task<string> GetDownloadUrlAsync(string path, DateTime expiresAt)
{
string presignedUrl = path;
if (_baseUri is not null)
if (_baseUri.Scheme == "s3")
presignedUrl = await _fileStorage.GetPresignedUrlAsync(path, minutesToExpire);
var url = GetResolvedUri(presignedUrl);
return url;
return await _fileStorage.GetDownloadUrlAsync(path, expiresAt);
}

public Task<IReadOnlyCollection<string>> ListFilesAsync(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public async Task CreateAsync(
string? engineName,
string sourceLanguage,
string targetLanguage,
bool isModelPersisted = false,
bool isModelPersisted = true,
CancellationToken cancellationToken = default
)
{
Expand All @@ -43,7 +43,7 @@ await _engines.InsertAsync(
EngineId = engineId,
SourceLanguage = sourceLanguage,
TargetLanguage = targetLanguage,
IsModelPersisted = isModelPersisted
IsModelPersisted = true // SMT transfer engines are always persisted
},
cancellationToken
);
Expand Down Expand Up @@ -220,7 +220,7 @@ public Task<ModelDownloadUrl> GetModelDownloadUrlAsync(
CancellationToken cancellationToken = default
)
{
throw new NotImplementedException();
throw new NotSupportedException();
}

private async Task<TranslationEngine> GetEngineAsync(string engineId, CancellationToken cancellationToken)
Expand Down
Loading

0 comments on commit 55ebe3e

Please sign in to comment.