Skip to content

Commit

Permalink
Language endpoint (#159)
Browse files Browse the repository at this point in the history
* Add NLLB-200 language code checking support

Add SMT stub support

Reviewer Comments

IsSupportedNatively

InternalCode

UpdatedName

Optional Parameters for language info

Minor fixes

reviewer comments

Updates from review comments

Reviewer comments

* fix Korean script for NLLB: sillsdev/serval#290

* Update GRPC from Serval.

* Fix tests

* Fix formatting.
  • Loading branch information
johnml1135 authored Jan 31, 2024
1 parent 8e7023c commit 5a2142c
Show file tree
Hide file tree
Showing 13 changed files with 356 additions and 139 deletions.
3 changes: 2 additions & 1 deletion src/SIL.Machine.AspNetCore/SIL.Machine.AspNetCore.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
<PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="6.0.16" />
<PackageReference Include="Microsoft.Extensions.Http.Polly" Version="6.0.14" />
<PackageReference Include="Python.Included" Version="3.11.4" />
<PackageReference Include="Serval.Grpc" Version="0.13.0" Condition="!Exists('..\..\..\serval\src\Serval.Grpc\Serval.Grpc.csproj')" />
<PackageReference Include="Serval.Grpc" Version="0.14.0" Condition="!Exists('..\..\..\serval\src\Serval.Grpc\Serval.Grpc.csproj')" />
<PackageReference Include="SIL.DataAccess" Version="0.5.2" Condition="!Exists('..\..\..\serval\src\SIL.DataAccess\SIL.DataAccess.csproj')" />
<PackageReference Include="SIL.WritingSystems" Version="12.0.1" />
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
Expand All @@ -47,6 +47,7 @@
<ProjectReference Include="..\SIL.Machine\SIL.Machine.csproj" />
<ProjectReference Include="..\..\..\serval\src\Serval.Grpc\Serval.Grpc.csproj" Condition="Exists('..\..\..\serval\src\Serval.Grpc\Serval.Grpc.csproj')" />
<ProjectReference Include="..\..\..\serval\src\SIL.DataAccess\SIL.DataAccess.csproj" Condition="Exists('..\..\..\serval\src\SIL.DataAccess\SIL.DataAccess.csproj')" />
<EmbeddedResource Include="data\flores200languages.csv" />
</ItemGroup>

<Target Name="ZipThotNewModel" BeforeTargets="BeforeBuild">
Expand Down
2 changes: 1 addition & 1 deletion src/SIL.Machine.AspNetCore/Services/ILanguageTagService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@

public interface ILanguageTagService
{
string ConvertToFlores200Code(string languageTag);
bool ConvertToFlores200Code(string languageTag, out string flores200Code);
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,6 @@ Task StartBuildAsync(
Task CancelBuildAsync(string engineId, CancellationToken cancellationToken = default);

Task<int> GetQueueSizeAsync(CancellationToken cancellationToken = default);

bool IsLanguageNativeToModel(string language, out string internalCode);
}
44 changes: 43 additions & 1 deletion src/SIL.Machine.AspNetCore/Services/LanguageTagService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,12 @@ public class LanguageTagService : ILanguageTagService
{ "cmn", "zh" }
};

private static readonly Dictionary<string, string> StandardScripts = new() { { "Kore", "Hang" } };

private readonly Dictionary<string, string> _defaultScripts;

private readonly Dictionary<string, string> _flores200Languages;

private static readonly Regex LangTagPattern = new Regex(
"(?'language'[a-zA-Z]{2,8})([_-](?'script'[a-zA-Z]{4}))?",
RegexOptions.ExplicitCapture
Expand All @@ -24,6 +28,7 @@ public LanguageTagService()
{
// initialise SLDR language tags to retrieve latest langtags.json file
_defaultScripts = InitializeDefaultScripts();
_flores200Languages = InitializeFlores200Languages();
}

private static Dictionary<string, string> InitializeDefaultScripts()
Expand Down Expand Up @@ -66,7 +71,40 @@ t is not null
return tempDefaultScripts;
}

public string ConvertToFlores200Code(string languageTag)
private static Dictionary<string, string> InitializeFlores200Languages()
{
var tempFlores200Languages = new Dictionary<string, string>();
using var floresStream = Assembly
.GetExecutingAssembly()
.GetManifestResourceStream("SIL.Machine.AspNetCore.data.flores200languages.csv");
Debug.Assert(floresStream is not null);
var reader = new StreamReader(floresStream);
var firstLine = reader.ReadLine();
Debug.Assert(firstLine == "language, code");
while (!reader.EndOfStream)
{
string? line = reader.ReadLine();
if (line is null)
continue;
string[] values = line.Split(',');
tempFlores200Languages[values[1].Trim()] = values[0].Trim();
}
return tempFlores200Languages;
}

/**
* Converts a language tag to a Flores 200 code
* @param {string} languageTag - The language tag to convert
* @param out {string} flores200Code - The converted Flores 200 code
* @returns {bool} is the langauge is the Flores 200 list
*/
public bool ConvertToFlores200Code(string languageTag, out string flores200Code)
{
flores200Code = ResolveLanguageTag(languageTag);
return _flores200Languages.ContainsKey(flores200Code);
}

private string ResolveLanguageTag(string languageTag)
{
// Try to find a pattern of {language code}_{script}
Match langTagMatch = LangTagPattern.Match(languageTag);
Expand Down Expand Up @@ -101,6 +139,10 @@ public string ConvertToFlores200Code(string languageTag)
else if (_defaultScripts.TryGetValue(languageSubtag, out string? tempScript))
script = tempScript;

// There are a few extra conversions not in SIL Writing Systems that we need to handle
if (script is not null && StandardScripts.TryGetValue(script, out string? tempScript3))
script = tempScript3;

if (script is not null)
return $"{iso639_3Code}_{script}";
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,15 @@ public async Task<string> CreateJobScriptAsync(
Uri sharedFileUri = _sharedFileService.GetBaseUri();
string baseUri = sharedFileUri.GetComponents(UriComponents.SchemeAndServer, UriFormat.Unescaped);
string folder = sharedFileUri.GetComponents(UriComponents.Path, UriFormat.Unescaped);
_languageTagService.ConvertToFlores200Code(engine.SourceLanguage, out string srcLang);
_languageTagService.ConvertToFlores200Code(engine.TargetLanguage, out string trgLang);
return "from machine.jobs.build_nmt_engine import run\n"
+ "args = {\n"
+ $" 'model_type': '{_options.CurrentValue.ModelType}',\n"
+ $" 'engine_id': '{engineId}',\n"
+ $" 'build_id': '{buildId}',\n"
+ $" 'src_lang': '{_languageTagService.ConvertToFlores200Code(engine.SourceLanguage)}',\n"
+ $" 'trg_lang': '{_languageTagService.ConvertToFlores200Code(engine.TargetLanguage)}',\n"
+ $" 'src_lang': '{srcLang}',\n"
+ $" 'trg_lang': '{trgLang}',\n"
+ $" 'shared_file_uri': '{baseUri}',\n"
+ $" 'shared_file_folder': '{folder}',\n"
+ (buildOptions is not null ? $" 'build_options': '''{buildOptions}''',\n" : "")
Expand Down
45 changes: 21 additions & 24 deletions src/SIL.Machine.AspNetCore/Services/NmtEngineService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,23 @@ public static class NmtBuildStages
public const string Postprocess = "postprocess";
}

public class NmtEngineService : ITranslationEngineService
public class NmtEngineService(
IPlatformService platformService,
IDistributedReaderWriterLockFactory lockFactory,
IDataAccessContext dataAccessContext,
IRepository<TranslationEngine> engines,
IBuildJobService buildJobService,
ILanguageTagService languageTagService,
ClearMLMonitorService clearMLMonitorService
) : ITranslationEngineService
{
private readonly IDistributedReaderWriterLockFactory _lockFactory;
private readonly IPlatformService _platformService;
private readonly IDataAccessContext _dataAccessContext;
private readonly IRepository<TranslationEngine> _engines;
private readonly IBuildJobService _buildJobService;
private readonly ClearMLMonitorService _clearMLMonitorService;

public NmtEngineService(
IPlatformService platformService,
IDistributedReaderWriterLockFactory lockFactory,
IDataAccessContext dataAccessContext,
IRepository<TranslationEngine> engines,
IBuildJobService buildJobService,
ClearMLMonitorService clearMLMonitorService
)
{
_lockFactory = lockFactory;
_platformService = platformService;
_dataAccessContext = dataAccessContext;
_engines = engines;
_buildJobService = buildJobService;
_clearMLMonitorService = clearMLMonitorService;
}
private readonly IDistributedReaderWriterLockFactory _lockFactory = lockFactory;
private readonly IPlatformService _platformService = platformService;
private readonly IDataAccessContext _dataAccessContext = dataAccessContext;
private readonly IRepository<TranslationEngine> _engines = engines;
private readonly IBuildJobService _buildJobService = buildJobService;
private readonly ILanguageTagService _languageTagService = languageTagService;
private readonly ClearMLMonitorService _clearMLMonitorService = clearMLMonitorService;

public TranslationEngineType Type => TranslationEngineType.Nmt;

Expand Down Expand Up @@ -151,6 +143,11 @@ public Task<int> GetQueueSizeAsync(CancellationToken cancellationToken = default
return Task.FromResult(_clearMLMonitorService.QueueSize);
}

public bool IsLanguageNativeToModel(string language, out string internalCode)
{
return _languageTagService.ConvertToFlores200Code(language, out internalCode);
}

private async Task CancelBuildJobAsync(string engineId, CancellationToken cancellationToken)
{
(string? buildId, BuildJobState jobState) = await _buildJobService.CancelBuildJobAsync(
Expand Down
13 changes: 5 additions & 8 deletions src/SIL.Machine.AspNetCore/Services/NmtPreprocessBuildJob.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,11 @@ CancellationToken cancellationToken
TranslationEngine? engine = await Engines.GetAsync(e => e.EngineId == engineId, cancellationToken);
if (engine is null)
throw new OperationCanceledException($"Engine {engineId} does not exist. Build canceled.");
_buildPreprocessSummary.Add(
"SourceLanguageResolved",
_languageTagService.ConvertToFlores200Code(engine.SourceLanguage)
);
_buildPreprocessSummary.Add(
"TargetLanguageResolved",
_languageTagService.ConvertToFlores200Code(engine.TargetLanguage)
);

_languageTagService.ConvertToFlores200Code(engine.SourceLanguage, out string srcLang);
_buildPreprocessSummary.Add("SourceLanguageResolved", srcLang);
_languageTagService.ConvertToFlores200Code(engine.TargetLanguage, out string trgLang);
_buildPreprocessSummary.Add("TargetLanguageResolved", trgLang);
Logger.LogInformation("{summary}", _buildPreprocessSummary.ToJsonString());

await using (await @lock.WriterLockAsync(cancellationToken: cancellationToken))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,17 @@

namespace SIL.Machine.AspNetCore.Services;

public class ServalTranslationEngineServiceV1 : TranslationEngineApi.TranslationEngineApiBase
public class ServalTranslationEngineServiceV1(
IEnumerable<ITranslationEngineService> engineServices,
HealthCheckService healthCheckService
) : TranslationEngineApi.TranslationEngineApiBase
{
private static readonly Empty Empty = new();

private readonly Dictionary<TranslationEngineType, ITranslationEngineService> _engineServices;
private readonly Dictionary<TranslationEngineType, ITranslationEngineService> _engineServices =
engineServices.ToDictionary(es => es.Type);

private readonly HealthCheckService _healthCheckService;

public ServalTranslationEngineServiceV1(
IEnumerable<ITranslationEngineService> engineServices,
HealthCheckService healthCheckService
)
{
_engineServices = engineServices.ToDictionary(es => es.Type);
_healthCheckService = healthCheckService;
}
private readonly HealthCheckService _healthCheckService = healthCheckService;

public override async Task<Empty> Create(CreateRequest request, ServerCallContext context)
{
Expand Down Expand Up @@ -133,6 +128,16 @@ ServerCallContext context
return new GetQueueSizeResponse { Size = await engineService.GetQueueSizeAsync(context.CancellationToken) };
}

public override Task<GetLanguageInfoResponse> GetLanguageInfo(
GetLanguageInfoRequest request,
ServerCallContext context
)
{
ITranslationEngineService engineService = GetEngineService(request.EngineType);
bool isNative = engineService.IsLanguageNativeToModel(request.Language, out string internalCode);
return Task.FromResult(new GetLanguageInfoResponse { InternalCode = internalCode, IsNative = isNative, });
}

public override async Task<HealthCheckResponse> HealthCheck(Empty request, ServerCallContext context)
{
HealthReport healthReport = await _healthCheckService.CheckHealthAsync();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,11 @@ public Task<int> GetQueueSizeAsync(CancellationToken cancellationToken = default
return Task.FromResult(Convert.ToInt32(_jobStorage.GetMonitoringApi().EnqueuedCount("smt_transfer")));
}

public bool IsLanguageNativeToModel(string language, out string internalCode)
{
throw new NotSupportedException("SMT transfer engines do not support language info.");
}

private async Task CancelBuildJobAsync(string engineId, CancellationToken cancellationToken)
{
(string? buildId, BuildJobState jobState) = await _buildJobService.CancelBuildJobAsync(
Expand Down
Loading

0 comments on commit 5a2142c

Please sign in to comment.