Skip to content

Commit

Permalink
Add NLLB-200 language code checking support
Browse files Browse the repository at this point in the history
Add SMT stub support

Reviewer Comments

IsSupportedNatively

InternalCode

UpdatedName

Optional Parameters for language info

Minor fixes

reviewer comments

Updates from review comments

Reviewer comments
  • Loading branch information
johnml1135 committed Jan 30, 2024
1 parent d70813c commit 1fa22b7
Show file tree
Hide file tree
Showing 13 changed files with 340 additions and 138 deletions.
1 change: 1 addition & 0 deletions src/SIL.Machine.AspNetCore/SIL.Machine.AspNetCore.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
<ProjectReference Include="..\SIL.Machine\SIL.Machine.csproj" />
<ProjectReference Include="..\..\..\serval\src\Serval.Grpc\Serval.Grpc.csproj" Condition="Exists('..\..\..\serval\src\Serval.Grpc\Serval.Grpc.csproj')" />
<ProjectReference Include="..\..\..\serval\src\SIL.DataAccess\SIL.DataAccess.csproj" Condition="Exists('..\..\..\serval\src\SIL.DataAccess\SIL.DataAccess.csproj')" />
<EmbeddedResource Include="data\flores200languages.csv" />
</ItemGroup>

<Target Name="ZipThotNewModel" BeforeTargets="BeforeBuild">
Expand Down
2 changes: 1 addition & 1 deletion src/SIL.Machine.AspNetCore/Services/ILanguageTagService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@

public interface ILanguageTagService
{
string ConvertToFlores200Code(string languageTag);
bool ConvertToFlores200Code(string languageTag, out string flores200Code);
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,6 @@ Task StartBuildAsync(
Task CancelBuildAsync(string engineId, CancellationToken cancellationToken = default);

Task<int> GetQueueSizeAsync(CancellationToken cancellationToken = default);

bool IsLanguageNativeToModel(string language, out string internalCode);
}
38 changes: 37 additions & 1 deletion src/SIL.Machine.AspNetCore/Services/LanguageTagService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ public class LanguageTagService : ILanguageTagService

private readonly Dictionary<string, string> _defaultScripts;

private readonly Dictionary<string, string> _flores200Languages;

private static readonly Regex LangTagPattern = new Regex(
"(?'language'[a-zA-Z]{2,8})([_-](?'script'[a-zA-Z]{4}))?",
RegexOptions.ExplicitCapture
Expand All @@ -16,6 +18,7 @@ public LanguageTagService()
{
// initialise SLDR language tags to retrieve latest langtags.json file
_defaultScripts = InitializeDefaultScripts();
_flores200Languages = InitializeFlores200Languages();
}

private static Dictionary<string, string> InitializeDefaultScripts()
Expand Down Expand Up @@ -56,7 +59,40 @@ private static Dictionary<string, string> InitializeDefaultScripts()
return tempDefaultScripts;
}

public string ConvertToFlores200Code(string languageTag)
private static Dictionary<string, string> InitializeFlores200Languages()
{
var tempFlores200Languages = new Dictionary<string, string>();
using var floresStream = Assembly
.GetExecutingAssembly()
.GetManifestResourceStream("SIL.Machine.AspNetCore.data.flores200languages.csv");
Debug.Assert(floresStream is not null);
var reader = new StreamReader(floresStream);
var firstLine = reader.ReadLine();
Debug.Assert(firstLine == "language, code");
while (!reader.EndOfStream)
{
string? line = reader.ReadLine();
if (line is null)
continue;
string[] values = line.Split(',');
tempFlores200Languages[values[1].Trim()] = values[0].Trim();
}
return tempFlores200Languages;
}

/**
* Converts a language tag to a Flores 200 code
* @param {string} languageTag - The language tag to convert
* @param out {string} flores200Code - The converted Flores 200 code
* @returns {bool} is the langauge is the Flores 200 list
*/
public bool ConvertToFlores200Code(string languageTag, out string flores200Code)
{
flores200Code = ResolveLanguageTag(languageTag);
return _flores200Languages.ContainsKey(flores200Code);
}

private string ResolveLanguageTag(string languageTag)
{
// Try to find a pattern of {language code}_{script}
Match langTagMatch = LangTagPattern.Match(languageTag);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,15 @@ public async Task<string> CreateJobScriptAsync(
Uri sharedFileUri = _sharedFileService.GetBaseUri();
string baseUri = sharedFileUri.GetComponents(UriComponents.SchemeAndServer, UriFormat.Unescaped);
string folder = sharedFileUri.GetComponents(UriComponents.Path, UriFormat.Unescaped);
_languageTagService.ConvertToFlores200Code(engine.SourceLanguage, out string srcLang);
_languageTagService.ConvertToFlores200Code(engine.TargetLanguage, out string trgLang);
return "from machine.jobs.build_nmt_engine import run\n"
+ "args = {\n"
+ $" 'model_type': '{_options.CurrentValue.ModelType}',\n"
+ $" 'engine_id': '{engineId}',\n"
+ $" 'build_id': '{buildId}',\n"
+ $" 'src_lang': '{_languageTagService.ConvertToFlores200Code(engine.SourceLanguage)}',\n"
+ $" 'trg_lang': '{_languageTagService.ConvertToFlores200Code(engine.TargetLanguage)}',\n"
+ $" 'src_lang': '{srcLang}',\n"
+ $" 'trg_lang': '{trgLang}',\n"
+ $" 'shared_file_uri': '{baseUri}',\n"
+ $" 'shared_file_folder': '{folder}',\n"
+ (buildOptions is not null ? $" 'build_options': '''{buildOptions}''',\n" : "")
Expand Down
45 changes: 21 additions & 24 deletions src/SIL.Machine.AspNetCore/Services/NmtEngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,23 @@ public static class NmtBuildStages
public const string Postprocess = "postprocess";
}

public class NmtEngineService : ITranslationEngineService
public class NmtEngineService(
IPlatformService platformService,
IDistributedReaderWriterLockFactory lockFactory,
IDataAccessContext dataAccessContext,
IRepository<TranslationEngine> engines,
IBuildJobService buildJobService,
ILanguageTagService languageTagService,
ClearMLMonitorService clearMLMonitorService
) : ITranslationEngineService
{
private readonly IDistributedReaderWriterLockFactory _lockFactory;
private readonly IPlatformService _platformService;
private readonly IDataAccessContext _dataAccessContext;
private readonly IRepository<TranslationEngine> _engines;
private readonly IBuildJobService _buildJobService;
private readonly ClearMLMonitorService _clearMLMonitorService;

public NmtEngineService(
IPlatformService platformService,
IDistributedReaderWriterLockFactory lockFactory,
IDataAccessContext dataAccessContext,
IRepository<TranslationEngine> engines,
IBuildJobService buildJobService,
ClearMLMonitorService clearMLMonitorService
)
{
_lockFactory = lockFactory;
_platformService = platformService;
_dataAccessContext = dataAccessContext;
_engines = engines;
_buildJobService = buildJobService;
_clearMLMonitorService = clearMLMonitorService;
}
private readonly IDistributedReaderWriterLockFactory _lockFactory = lockFactory;
private readonly IPlatformService _platformService = platformService;
private readonly IDataAccessContext _dataAccessContext = dataAccessContext;
private readonly IRepository<TranslationEngine> _engines = engines;
private readonly IBuildJobService _buildJobService = buildJobService;
private readonly ILanguageTagService _languageTagService = languageTagService;
private readonly ClearMLMonitorService _clearMLMonitorService = clearMLMonitorService;

public TranslationEngineType Type => TranslationEngineType.Nmt;

Expand Down Expand Up @@ -151,6 +143,11 @@ public Task<int> GetQueueSizeAsync(CancellationToken cancellationToken = default
return Task.FromResult(_clearMLMonitorService.QueueSize);
}

public bool IsLanguageNativeToModel(string language, out string internalCode)
{
return _languageTagService.ConvertToFlores200Code(language, out internalCode);
}

private async Task CancelBuildJobAsync(string engineId, CancellationToken cancellationToken)
{
(string? buildId, BuildJobState jobState) = await _buildJobService.CancelBuildJobAsync(
Expand Down
13 changes: 5 additions & 8 deletions src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,11 @@ CancellationToken cancellationToken
TranslationEngine? engine = await Engines.GetAsync(e => e.EngineId == engineId, cancellationToken);
if (engine is null)
throw new OperationCanceledException($"Engine {engineId} does not exist. Build canceled.");
_buildPreprocessSummary.Add(
"SourceLanguageResolved",
_languageTagService.ConvertToFlores200Code(engine.SourceLanguage)
);
_buildPreprocessSummary.Add(
"TargetLanguageResolved",
_languageTagService.ConvertToFlores200Code(engine.TargetLanguage)
);

_languageTagService.ConvertToFlores200Code(engine.SourceLanguage, out string srcLang);
_buildPreprocessSummary.Add("SourceLanguageResolved", srcLang);
_languageTagService.ConvertToFlores200Code(engine.TargetLanguage, out string trgLang);
_buildPreprocessSummary.Add("TargetLanguageResolved", trgLang);
Logger.LogInformation("{summary}", _buildPreprocessSummary.ToJsonString());

await using (await @lock.WriterLockAsync(cancellationToken: cancellationToken))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,16 @@

namespace SIL.Machine.AspNetCore.Services;

public class ServalTranslationEngineServiceV1 : TranslationEngineApi.TranslationEngineApiBase
public class ServalTranslationEngineServiceV1(
IEnumerable<ITranslationEngineService> engineServices,
HealthCheckService healthCheckService
) : TranslationEngineApi.TranslationEngineApiBase
{
private static readonly Empty Empty = new();

private readonly Dictionary<TranslationEngineType, ITranslationEngineService> _engineServices;
private readonly Dictionary<TranslationEngineType, ITranslationEngineService> _engineServices = engineServices.ToDictionary(es => es.Type);

private readonly HealthCheckService _healthCheckService;

public ServalTranslationEngineServiceV1(
IEnumerable<ITranslationEngineService> engineServices,
HealthCheckService healthCheckService
)
{
_engineServices = engineServices.ToDictionary(es => es.Type);
_healthCheckService = healthCheckService;
}
private readonly HealthCheckService _healthCheckService = healthCheckService;

public override async Task<Empty> Create(CreateRequest request, ServerCallContext context)
{
Expand Down Expand Up @@ -133,6 +127,22 @@ ServerCallContext context
return new GetQueueSizeResponse { Size = await engineService.GetQueueSizeAsync(context.CancellationToken) };
}

public override Task<GetLanguageInfoResponse> GetLanguageInfo(

Check failure on line 130 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'GetLanguageInfoResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 130 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'GetLanguageInfoResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 130 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'GetLanguageInfoResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 130 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'GetLanguageInfoResponse' could not be found (are you missing a using directive or an assembly reference?)
GetLanguageInfoRequest request,

Check failure on line 131 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'GetLanguageInfoRequest' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 131 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'GetLanguageInfoRequest' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 131 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'GetLanguageInfoRequest' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 131 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'GetLanguageInfoRequest' could not be found (are you missing a using directive or an assembly reference?)
ServerCallContext context
)
{
ITranslationEngineService engineService = GetEngineService(request.EngineType);
bool isNative = engineService.IsLanguageNativeToModel(request.Language, out string internalCode);
return Task.FromResult(
new GetLanguageInfoResponse
{
InternalCode = internalCode,
IsNative = isNative,
}
);
}

public override async Task<HealthCheckResponse> HealthCheck(Empty request, ServerCallContext context)
{
HealthReport healthReport = await _healthCheckService.CheckHealthAsync();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@ public Task<int> GetQueueSizeAsync(CancellationToken cancellationToken = default
return Task.FromResult(Convert.ToInt32(_jobStorage.GetMonitoringApi().EnqueuedCount("smt_transfer")));
}

public bool IsLanguageNativeToModel(string language, out string internalCode)
{
throw new NotSupportedException("SMT transfer engines do not support language info.");
}

private async Task CancelBuildJobAsync(string engineId, CancellationToken cancellationToken)
{
(string? buildId, BuildJobState jobState) = await _buildJobService.CancelBuildJobAsync(
Expand Down
Loading

0 comments on commit 1fa22b7

Please sign in to comment.