Skip to content

Commit

Permalink
fixed slow preprocessing due to non-cached/compiled regexes, fixed cr…
Browse files Browse the repository at this point in the history
…ash when deleting model with running pipes, fixed languages for language group models with no yaml file, fixed bug with Tatoeba model list parsing
  • Loading branch information
TommiNieminen committed Oct 5, 2022
1 parent 23aa68f commit dbcb08f
Show file tree
Hide file tree
Showing 7 changed files with 218 additions and 120 deletions.
2 changes: 2 additions & 0 deletions OpusCatMTEngine/App.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
using System.Net;
using System.Reflection;
using System.ServiceModel;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Threading;
Expand Down Expand Up @@ -119,6 +120,7 @@ private void Application_Startup(object sender, StartupEventArgs e)
this.InitializePythonEngine();

this.CheckForUpdatesAsync();

}

private async void CheckForUpdatesAsync()
Expand Down
63 changes: 47 additions & 16 deletions OpusCatMTEngine/MTModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ internal void Shutdown()
foreach (var langpair in this.marianProcesses.Keys)
{
this.marianProcesses[langpair].ShutdownMtPipe();
this.marianProcesses[langpair] = null;
//this.marianProcesses[langpair] = null;
}
}
this.marianProcesses = null;
Expand Down Expand Up @@ -285,13 +285,7 @@ internal void DownloadProgressChanged(object sender, DownloadProgressChangedEven
{
this.InstallProgress = e.ProgressPercentage;
}

public List<IsoLanguage> TargetLanguages
{
get => targetLanguages;
set => targetLanguages = value;
}


public MTModelStatus Status { get => status; set { status = value; NotifyPropertyChanged(); } }

//This creates a zip package of the model that can be moved to another computer
Expand Down Expand Up @@ -460,7 +454,25 @@ internal void ExitHandler(object sender, EventArgs e)
}
}

public List<IsoLanguage> SourceLanguages { get => sourceLanguages; set => sourceLanguages = value; }
public List<IsoLanguage> SourceLanguages
{
get => sourceLanguages;
set
{
sourceLanguages = value;
NotifyPropertyChanged("SourceLanguageString");
}
}

public List<IsoLanguage> TargetLanguages
{
get => targetLanguages;
set
{
targetLanguages = value;
NotifyPropertyChanged("TargetLanguageString");
}
}

public string Name { get => name; set => name = value; }

Expand Down Expand Up @@ -633,14 +645,23 @@ public MTModel(
this.UpdateModelYamlPath();

this.SupportsWordAlignment = this.decoderSettings.models[0].Contains("-align");

if (this.modelYaml == null)
{
this.ParseModelPathForLanguages(modelPath);
}

this.ParseModelConfig();

if (this.modelYaml == null)
{
if (this.modelConfig.SourceLanguageCodes == null ||
this.modelConfig.TargetLanguageCodes == null)
{
this.ParseModelPathForLanguages(modelPath);
}
else
{
this.SourceLanguages = this.modelConfig.SourceLanguageCodes.Select(x => new IsoLanguage(x)).ToList();
this.TargetLanguages = this.modelConfig.TargetLanguageCodes.Select(x => new IsoLanguage(x)).ToList();
}
}

//
this.AutoPreEditRuleCollections = new ObservableCollection<AutoEditRuleCollection>(
this.ModelConfig.AutoPreEditRuleCollectionGuids.Select(x => autoPreEditRuleCollections.SingleOrDefault(
Expand All @@ -654,6 +675,17 @@ public MTModel(

internal void SaveModelConfig()
{
if (this.ModelConfig == null)
{
this.ModelConfig = new MTModelConfig();
}

if (this.SourceLanguages != null && this.TargetLanguages != null)
{
this.ModelConfig.SourceLanguageCodes = new ObservableCollection<String>(this.SourceLanguages.Select(x => x.OriginalCode));
this.ModelConfig.TargetLanguageCodes = new ObservableCollection<String>(this.TargetLanguages.Select(x => x.OriginalCode));
}

//The directory might not exists yet in case of customized models (i.e. copying of the base model
//is not complete)
if (Directory.Exists(this.InstallDir))
Expand Down Expand Up @@ -797,7 +829,6 @@ public MTModel(
//This is used for online models, model uri is included for later download of models
public MTModel(string modelPath, Uri modelUri, string yamlString = null)
{

this.ModelPath = modelPath;
this.modelYaml = yamlString;
if (yamlString != null)
Expand Down Expand Up @@ -1006,7 +1037,7 @@ public string TargetCodesString
}

public string ModelPath { get; internal set; }
public string InstallDir { get; }
public string InstallDir { get; set; }
public bool Prioritized { get => _prioritized; set { _prioritized = value; NotifyPropertyChanged(); } }

private MarianLog TrainingLog;
Expand Down
6 changes: 6 additions & 0 deletions OpusCatMTEngine/MTModelConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ private void NotifyPropertyChanged([CallerMemberName] string propertyName = "")
[YamlMember(Alias = "auto-post-edit-rule-collection-guids", ApplyNamingConventions = false)]
public ObservableCollection<string> AutoPostEditRuleCollectionGuids { get; internal set; }

[YamlMember(Alias = "source-languages", ApplyNamingConventions = false)]
public ObservableCollection<string> SourceLanguageCodes { get; internal set; }

[YamlMember(Alias = "target-languages", ApplyNamingConventions = false)]
public ObservableCollection<string> TargetLanguageCodes { get; internal set; }

private bool finetuningComplete;

public MTModelConfig()
Expand Down
16 changes: 9 additions & 7 deletions OpusCatMTEngine/ModelManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ private void TatoebaModelListDownloaded(object sender, DownloadStringCompletedEv
//Use distinct to remove duplicate entries
foreach (var line in modelList.Split('\n').Distinct())
{
var split = line.Split('\t');
var split = line.Split(new char[] { '\t' });
if (split.Length >= 4)
{
var modelPath = split[0];
Expand All @@ -529,17 +529,17 @@ private void TatoebaModelListDownloaded(object sender, DownloadStringCompletedEv
IEnumerable<string> sourceLangs = split[2].Split(',');
IEnumerable<string> targetLangs;

//Multilingual models have a use-target-labels field, which contains the target
//Multilingual models have a use-target-labels field (fifth column in tsv), which contains the target
//labels of the model, use that instead of target languages, as it will include
//script info (e.g. Latn or Cyrl).
if (split.Length > 4)
if (String.IsNullOrWhiteSpace(split[4]))
{
//Remove the target code delimiters >>code<< -> code
targetLangs = split[4].Replace("<","").Replace(">","").Split(',');
targetLangs = split[3].Split(',');
}
else
{
targetLangs = split[3].Split(',');
//Remove the target code delimiters >>code<< -> code
targetLangs = split[4].Replace("<", "").Replace(">", "").Split(',');
}

//Some entries might have empty source and target languages
Expand All @@ -549,8 +549,10 @@ private void TatoebaModelListDownloaded(object sender, DownloadStringCompletedEv
}
var model = new MTModel(modelPath.Replace(".zip", ""), modelUri);
model.ModelType = modelType;
model.SourceLanguages = sourceLangs.Select(x => new IsoLanguage(x)).ToList();

model.TargetLanguages = targetLangs.Select(x => new IsoLanguage(x)).ToList();
model.SourceLanguages = sourceLangs.Select(x => new IsoLanguage(x)).ToList();

this.onlineModels.Add(model);
}
}
Expand Down
Loading

0 comments on commit dbcb08f

Please sign in to comment.